From 98f21eada0a8dbbffae6cf133dc038b0e55e1b74 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 17 May 2021 15:20:34 +0200 Subject: [PATCH 001/173] feat(metadata): add metadata block for CodeMeta #7844 --- scripts/api/data/metadatablocks/codemeta.tsv | 40 ++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 scripts/api/data/metadatablocks/codemeta.tsv diff --git a/scripts/api/data/metadatablocks/codemeta.tsv b/scripts/api/data/metadatablocks/codemeta.tsv new file mode 100644 index 00000000000..9f32cf20299 --- /dev/null +++ b/scripts/api/data/metadatablocks/codemeta.tsv @@ -0,0 +1,40 @@ +#metadataBlock name dataverseAlias displayName blockURI + codeMeta20 Software Metadata (v2.0) https://codemeta.github.io/terms/ +#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id termURI + softwareVersion Software version Version of the software instance. major.minor, e.g. 1.3 text 0 v#VALUE TRUE FALSE FALSE TRUE TRUE FALSE codeMeta20 https://schema.org/softwareVersion + developmentStatus Development Status Description of development status, e.g. work in progress (wip), active, inactive, suspended. See repostatus.org for more information. text 1 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE codeMeta20 + codeRepository Code Repository Link to the repository where the un-compiled, human readable code and related code is located (SVN, GitHub, CodePlex, institutional GitLab instance, etc.). https://... url 2 #VALUE TRUE FALSE TRUE FALSE TRUE FALSE codeMeta20 https://schema.org/codeRepository + programmingLanguage Programming Language The programming language(s) used to implement the software (e.g. Python, C++, Matlab, Fortran, Java, Julia,...) text 3 #VALUE TRUE FALSE TRUE TRUE TRUE FALSE codeMeta20 https://schema.org/programmingLanguage + operatingSystem Operating Systems Operating systems supported (e.g. Windows 10, OSX 11.3, Android 11). 4 FALSE FALSE TRUE FALSE TRUE FALSE codeMeta20 https://schema.org/operatingSystem + operatingSystemName Name The supported operating systems name Windows, Mac OS X, Linux, Android, ... text 5 #VALUE TRUE FALSE FALSE TRUE TRUE FALSE operatingSystem codeMeta20 + operatingSystemVersion Version The supported operating systems version text 6 #VALUE TRUE FALSE FALSE TRUE TRUE FALSE operatingSystem codeMeta20 + applicationCategory Application Category Type of software application, e.g. Simulation, Analysis, Visualisation. text #VALUE TRUE FALSE TRUE TRUE TRUE FALSE codeMeta20 https://schema.org/applicationCategory + applicationSubCategory Application Subcategory Subcategory of the application, e.g. Arcade Game. text #VALUE TRUE FALSE TRUE TRUE FALSE FALSE codeMeta20 https://schema.org/applicationSubCategory + softwareHelp Software Help/Documentation Link to help texts or documentation https://... url #VALUE FALSE FALSE TRUE FALSE TRUE FALSE codeMeta20 https://schema.org/softwareHelp + buildInstructions Build instructions Link to installation instructions/documentation https://... url #VALUE FALSE FALSE FALSE TRUE FALSE FALSE codeMeta20 + runtimePlatform Runtime Platform Runtime platform or script interpreter dependencies (Example - Java v1, Python2.3, .Net Framework 3.0). Supersedes runtime. text #VALUE TRUE FALSE TRUE TRUE FALSE FALSE codeMeta20 https://schema.org/runtimePlatform + targetProduct Target Product Target Operating System / Product to which the code applies. If applies to several versions, just the product name can be used. text #VALUE FALSE FALSE TRUE TRUE FALSE FALSE codeMeta20 https://schema.org/targetProduct + softwareRequirements Software Requirements Required software dependencies none FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://schema.org/softwareRequirements + softwareRequirementsName Name Name or title of the required software/library text #VALUE FALSE FALSE FALSE FALSE FALSE softwareRequirements codeMeta20 + softwareRequirementsVersion Version Version of the required software/library text #VALUE FALSE FALSE FALSE FALSE FALSE softwareRequirements codeMeta20 + softwareRequirementsUrl URL Link to required software/library https://... url #VALUE FALSE FALSE FALSE FALSE FALSE softwareRequirements codeMeta20 + softwareSuggestions Software Suggestions Optional dependencies, e.g. for optional features, code development, etc. none FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 + softwareSuggestionsName Name Name or title of the optional software/library text #VALUE FALSE FALSE FALSE FALSE FALSE softwareSuggestions codeMeta20 + softwareSuggestionsVersion Version Version of the optional software/library text #VALUE FALSE FALSE FALSE FALSE FALSE softwareSuggestions codeMeta20 + softwareSuggestionsUrl URL Link to optional software/library https://... url #VALUE FALSE FALSE FALSE FALSE FALSE softwareSuggestions codeMeta20 + permissions Permissions Permission(s) required to run the code (for example, a mobile app may require full internet access or may run only on wifi). text #VALUE FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://schema.org/permissions + memoryRequirements Memory Requirements Minimum memory requirements. text #VALUE FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://schema.org/memoryRequirements + processorRequirements Processor Requirements Processor architecture required to run the application (e.g. IA64). text #VALUE FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://schema.org/processorRequirements + storageRequirements Storage Requirements Storage requirements (e.g. free space required). text #VALUE FALSE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://schema.org/storageRequirements + releaseNotes Release Notes Link to release notes https://... url #VALUE FALSE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://schema.org/releaseNotes + contIntegration Continous integration Link to continuous integration service https://... url #VALUE FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 + issueTracker Issue Tracker Link to software bug reporting or issue tracking system https://... url #VALUE FALSE FALSE FALSE TRUE FALSE FALSE codeMeta20 +#controlledVocabulary DatasetField Value identifier displayOrder + developmentStatus Concept concept 0 + developmentStatus WIP wip 1 + developmentStatus Active active 2 + developmentStatus Inactive inactive 3 + developmentStatus Unsupported unsupported 4 + developmentStatus Moved moved 5 + developmentStatus Suspended suspended 6 + developmentStatus Abandoned abandoned 7 \ No newline at end of file From f9f9cbda095f0c72ce54ad020933d005c1d9d1ee Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 17 May 2021 15:34:46 +0200 Subject: [PATCH 002/173] docs(metadata): add CodeMeta reference to user guide --- doc/sphinx-guides/source/user/appendix.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/sphinx-guides/source/user/appendix.rst b/doc/sphinx-guides/source/user/appendix.rst index 003f02cdd61..e0fa83ad2ca 100755 --- a/doc/sphinx-guides/source/user/appendix.rst +++ b/doc/sphinx-guides/source/user/appendix.rst @@ -26,5 +26,6 @@ Detailed below are what metadata schemas we support for Citation and Domain Spec `Virtual Observatory (VO) Discovery and Provenance Metadata `__ (`see .tsv version `__). - `Life Sciences Metadata `__: based on `ISA-Tab Specification `__, along with controlled vocabulary from subsets of the `OBI Ontology `__ and the `NCBI Taxonomy for Organisms `__ (`see .tsv version `__). - `Journal Metadata `__: based on the `Journal Archiving and Interchange Tag Set, version 1.2 `__ (`see .tsv version `__). +- `CodeMeta Software Metadata `__: based on the `CodeMeta Software Metadata Schema, version 2.0 `__ (`see .tsv version `__) See also the `Dataverse Software 4.0 Metadata Crosswalk: DDI, DataCite, DC, DCTerms, VO, ISA-Tab `__ document and the :doc:`/admin/metadatacustomization` section of the Admin Guide. From ed485df14d3761fb3fca4e0ea0bfa2d20ed2f332 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 17 May 2021 15:35:25 +0200 Subject: [PATCH 003/173] feat(metadata): load CodeMeta by default in new installations. --- scripts/api/setup-datasetfields.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/api/setup-datasetfields.sh b/scripts/api/setup-datasetfields.sh index 0d2d60b9538..741a439e542 100755 --- a/scripts/api/setup-datasetfields.sh +++ b/scripts/api/setup-datasetfields.sh @@ -7,3 +7,4 @@ curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @da curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/astrophysics.tsv -H "Content-type: text/tab-separated-values" curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/biomedical.tsv -H "Content-type: text/tab-separated-values" curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/journals.tsv -H "Content-type: text/tab-separated-values" +curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/codemeta.tsv -H "Content-type: text/tab-separated-values" From 3c497a1475e864103c0583bea861e5e9376e23d1 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 18 May 2021 13:08:58 +0200 Subject: [PATCH 004/173] fix(metadata): fix wrong tab in CodeMeta and rephrase softwareVersion watermark helptext #7844 --- scripts/api/data/metadatablocks/codemeta.tsv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/api/data/metadatablocks/codemeta.tsv b/scripts/api/data/metadatablocks/codemeta.tsv index 9f32cf20299..c2711bca3ed 100644 --- a/scripts/api/data/metadatablocks/codemeta.tsv +++ b/scripts/api/data/metadatablocks/codemeta.tsv @@ -1,14 +1,14 @@ #metadataBlock name dataverseAlias displayName blockURI codeMeta20 Software Metadata (v2.0) https://codemeta.github.io/terms/ #datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id termURI - softwareVersion Software version Version of the software instance. major.minor, e.g. 1.3 text 0 v#VALUE TRUE FALSE FALSE TRUE TRUE FALSE codeMeta20 https://schema.org/softwareVersion + softwareVersion Software version Version of the software instance, usually following some convention like SemVer etc. e.g. 0.2.1 or 1.3 or 2021.1 etc text 0 v#VALUE TRUE FALSE FALSE TRUE TRUE FALSE codeMeta20 https://schema.org/softwareVersion developmentStatus Development Status Description of development status, e.g. work in progress (wip), active, inactive, suspended. See repostatus.org for more information. text 1 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE codeMeta20 codeRepository Code Repository Link to the repository where the un-compiled, human readable code and related code is located (SVN, GitHub, CodePlex, institutional GitLab instance, etc.). https://... url 2 #VALUE TRUE FALSE TRUE FALSE TRUE FALSE codeMeta20 https://schema.org/codeRepository programmingLanguage Programming Language The programming language(s) used to implement the software (e.g. Python, C++, Matlab, Fortran, Java, Julia,...) text 3 #VALUE TRUE FALSE TRUE TRUE TRUE FALSE codeMeta20 https://schema.org/programmingLanguage operatingSystem Operating Systems Operating systems supported (e.g. Windows 10, OSX 11.3, Android 11). 4 FALSE FALSE TRUE FALSE TRUE FALSE codeMeta20 https://schema.org/operatingSystem operatingSystemName Name The supported operating systems name Windows, Mac OS X, Linux, Android, ... text 5 #VALUE TRUE FALSE FALSE TRUE TRUE FALSE operatingSystem codeMeta20 operatingSystemVersion Version The supported operating systems version text 6 #VALUE TRUE FALSE FALSE TRUE TRUE FALSE operatingSystem codeMeta20 - applicationCategory Application Category Type of software application, e.g. Simulation, Analysis, Visualisation. text #VALUE TRUE FALSE TRUE TRUE TRUE FALSE codeMeta20 https://schema.org/applicationCategory + applicationCategory Application Category Type of software application, e.g. Simulation, Analysis, Visualisation. "" text #VALUE TRUE FALSE TRUE TRUE TRUE FALSE codeMeta20 https://schema.org/applicationCategory applicationSubCategory Application Subcategory Subcategory of the application, e.g. Arcade Game. text #VALUE TRUE FALSE TRUE TRUE FALSE FALSE codeMeta20 https://schema.org/applicationSubCategory softwareHelp Software Help/Documentation Link to help texts or documentation https://... url #VALUE FALSE FALSE TRUE FALSE TRUE FALSE codeMeta20 https://schema.org/softwareHelp buildInstructions Build instructions Link to installation instructions/documentation https://... url #VALUE FALSE FALSE FALSE TRUE FALSE FALSE codeMeta20 From 492491e89ef13f6254511b172641e1669b485e17 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 18 May 2021 13:11:52 +0200 Subject: [PATCH 005/173] fix(metadata): add standard name to Codemeta MDB displayName. #7844 --- scripts/api/data/metadatablocks/codemeta.tsv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/api/data/metadatablocks/codemeta.tsv b/scripts/api/data/metadatablocks/codemeta.tsv index c2711bca3ed..020cdcc11fc 100644 --- a/scripts/api/data/metadatablocks/codemeta.tsv +++ b/scripts/api/data/metadatablocks/codemeta.tsv @@ -1,5 +1,5 @@ #metadataBlock name dataverseAlias displayName blockURI - codeMeta20 Software Metadata (v2.0) https://codemeta.github.io/terms/ + codeMeta20 Software Metadata (CodeMeta v2.0) https://codemeta.github.io/terms/ #datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id termURI softwareVersion Software version Version of the software instance, usually following some convention like SemVer etc. e.g. 0.2.1 or 1.3 or 2021.1 etc text 0 v#VALUE TRUE FALSE FALSE TRUE TRUE FALSE codeMeta20 https://schema.org/softwareVersion developmentStatus Development Status Description of development status, e.g. work in progress (wip), active, inactive, suspended. See repostatus.org for more information. text 1 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE codeMeta20 From 382c1e4035752b3917cd9c967e55a8d41601a20d Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 21 Jul 2022 13:49:58 +0200 Subject: [PATCH 006/173] fix(metadata): make CodeMeta TSV usable #7844 - Add missing displayOrder values - Fix missing type for software requirements - Avoid splitting up compound fields too much, otherwise data is not exportable to schema.org or CodeMeta JSON-LD without special handling (#7856) - Tweak order - Tweak descriptions and examples - Fix whitespaces and line endings --- scripts/api/data/metadatablocks/codemeta.tsv | 77 ++++++++++---------- 1 file changed, 37 insertions(+), 40 deletions(-) diff --git a/scripts/api/data/metadatablocks/codemeta.tsv b/scripts/api/data/metadatablocks/codemeta.tsv index 020cdcc11fc..029ca2355ec 100644 --- a/scripts/api/data/metadatablocks/codemeta.tsv +++ b/scripts/api/data/metadatablocks/codemeta.tsv @@ -1,40 +1,37 @@ -#metadataBlock name dataverseAlias displayName blockURI - codeMeta20 Software Metadata (CodeMeta v2.0) https://codemeta.github.io/terms/ -#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id termURI - softwareVersion Software version Version of the software instance, usually following some convention like SemVer etc. e.g. 0.2.1 or 1.3 or 2021.1 etc text 0 v#VALUE TRUE FALSE FALSE TRUE TRUE FALSE codeMeta20 https://schema.org/softwareVersion - developmentStatus Development Status Description of development status, e.g. work in progress (wip), active, inactive, suspended. See repostatus.org for more information. text 1 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE codeMeta20 - codeRepository Code Repository Link to the repository where the un-compiled, human readable code and related code is located (SVN, GitHub, CodePlex, institutional GitLab instance, etc.). https://... url 2 #VALUE TRUE FALSE TRUE FALSE TRUE FALSE codeMeta20 https://schema.org/codeRepository - programmingLanguage Programming Language The programming language(s) used to implement the software (e.g. Python, C++, Matlab, Fortran, Java, Julia,...) text 3 #VALUE TRUE FALSE TRUE TRUE TRUE FALSE codeMeta20 https://schema.org/programmingLanguage - operatingSystem Operating Systems Operating systems supported (e.g. Windows 10, OSX 11.3, Android 11). 4 FALSE FALSE TRUE FALSE TRUE FALSE codeMeta20 https://schema.org/operatingSystem - operatingSystemName Name The supported operating systems name Windows, Mac OS X, Linux, Android, ... text 5 #VALUE TRUE FALSE FALSE TRUE TRUE FALSE operatingSystem codeMeta20 - operatingSystemVersion Version The supported operating systems version text 6 #VALUE TRUE FALSE FALSE TRUE TRUE FALSE operatingSystem codeMeta20 - applicationCategory Application Category Type of software application, e.g. Simulation, Analysis, Visualisation. "" text #VALUE TRUE FALSE TRUE TRUE TRUE FALSE codeMeta20 https://schema.org/applicationCategory - applicationSubCategory Application Subcategory Subcategory of the application, e.g. Arcade Game. text #VALUE TRUE FALSE TRUE TRUE FALSE FALSE codeMeta20 https://schema.org/applicationSubCategory - softwareHelp Software Help/Documentation Link to help texts or documentation https://... url #VALUE FALSE FALSE TRUE FALSE TRUE FALSE codeMeta20 https://schema.org/softwareHelp - buildInstructions Build instructions Link to installation instructions/documentation https://... url #VALUE FALSE FALSE FALSE TRUE FALSE FALSE codeMeta20 - runtimePlatform Runtime Platform Runtime platform or script interpreter dependencies (Example - Java v1, Python2.3, .Net Framework 3.0). Supersedes runtime. text #VALUE TRUE FALSE TRUE TRUE FALSE FALSE codeMeta20 https://schema.org/runtimePlatform - targetProduct Target Product Target Operating System / Product to which the code applies. If applies to several versions, just the product name can be used. text #VALUE FALSE FALSE TRUE TRUE FALSE FALSE codeMeta20 https://schema.org/targetProduct - softwareRequirements Software Requirements Required software dependencies none FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://schema.org/softwareRequirements - softwareRequirementsName Name Name or title of the required software/library text #VALUE FALSE FALSE FALSE FALSE FALSE softwareRequirements codeMeta20 - softwareRequirementsVersion Version Version of the required software/library text #VALUE FALSE FALSE FALSE FALSE FALSE softwareRequirements codeMeta20 - softwareRequirementsUrl URL Link to required software/library https://... url #VALUE FALSE FALSE FALSE FALSE FALSE softwareRequirements codeMeta20 - softwareSuggestions Software Suggestions Optional dependencies, e.g. for optional features, code development, etc. none FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 - softwareSuggestionsName Name Name or title of the optional software/library text #VALUE FALSE FALSE FALSE FALSE FALSE softwareSuggestions codeMeta20 - softwareSuggestionsVersion Version Version of the optional software/library text #VALUE FALSE FALSE FALSE FALSE FALSE softwareSuggestions codeMeta20 - softwareSuggestionsUrl URL Link to optional software/library https://... url #VALUE FALSE FALSE FALSE FALSE FALSE softwareSuggestions codeMeta20 - permissions Permissions Permission(s) required to run the code (for example, a mobile app may require full internet access or may run only on wifi). text #VALUE FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://schema.org/permissions - memoryRequirements Memory Requirements Minimum memory requirements. text #VALUE FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://schema.org/memoryRequirements - processorRequirements Processor Requirements Processor architecture required to run the application (e.g. IA64). text #VALUE FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://schema.org/processorRequirements - storageRequirements Storage Requirements Storage requirements (e.g. free space required). text #VALUE FALSE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://schema.org/storageRequirements - releaseNotes Release Notes Link to release notes https://... url #VALUE FALSE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://schema.org/releaseNotes - contIntegration Continous integration Link to continuous integration service https://... url #VALUE FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 - issueTracker Issue Tracker Link to software bug reporting or issue tracking system https://... url #VALUE FALSE FALSE FALSE TRUE FALSE FALSE codeMeta20 -#controlledVocabulary DatasetField Value identifier displayOrder - developmentStatus Concept concept 0 - developmentStatus WIP wip 1 - developmentStatus Active active 2 - developmentStatus Inactive inactive 3 - developmentStatus Unsupported unsupported 4 - developmentStatus Moved moved 5 - developmentStatus Suspended suspended 6 - developmentStatus Abandoned abandoned 7 \ No newline at end of file +#metadataBlock name dataverseAlias displayName blockURI + codeMeta20 Software Metadata (CodeMeta v2.0) https://codemeta.github.io/terms/ +#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id termURI + softwareVersion Software Version Version of the software instance, usually following some convention like SemVer etc. e.g. 0.2.1 or 1.3 or 2021.1 etc text 0 #VALUE TRUE FALSE FALSE TRUE TRUE FALSE codeMeta20 https://schema.org/softwareVersion + developmentStatus Development Status Description of development status, e.g. work in progress (wip), active, etc. See repostatus.org for more information. text 1 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE codeMeta20 https://www.repostatus.org + codeRepository Code Repository Link to the repository where the un-compiled, human readable code and related code is located (SVN, GitHub, CodePlex, institutional GitLab instance, Gitea, etc.). e.g. https://github.com/user/project url 2 #VALUE TRUE FALSE TRUE FALSE TRUE FALSE codeMeta20 https://schema.org/codeRepository + applicationCategory Application Category Type of software application, e.g. Simulation, Analysis, Visualisation. text 3 #VALUE TRUE FALSE TRUE TRUE TRUE FALSE codeMeta20 https://schema.org/applicationCategory + applicationSubCategory Application Subcategory Subcategory of the application, e.g. Arcade Game. text 4 #VALUE TRUE FALSE TRUE TRUE FALSE FALSE codeMeta20 https://schema.org/applicationSubCategory + programmingLanguage Programming Language The programming language(s) used to implement the software (e.g. Python, C++, Matlab, Fortran, Java, Julia,...) text 5 #VALUE TRUE FALSE TRUE TRUE TRUE FALSE codeMeta20 https://schema.org/programmingLanguage + runtimePlatform Runtime Platform Runtime platform or script interpreter dependencies (e.g. Java 11, Python 3.10 or .Net Framework 4.8). e.g. Python 3.10 text 6 #VALUE TRUE FALSE TRUE TRUE FALSE FALSE codeMeta20 https://schema.org/runtimePlatform + operatingSystem Operating Systems Operating systems supported (e.g. Windows 10, OSX 11.3, Android 11). text 7 #VALUE TRUE FALSE TRUE TRUE TRUE FALSE codeMeta20 https://schema.org/operatingSystem + targetProduct Target Product Target Operating System / Product to which the code applies. If applies to several versions, just the product name can be used. text 8 #VALUE TRUE FALSE TRUE TRUE FALSE FALSE codeMeta20 https://schema.org/targetProduct + buildInstructions Build Instructions Link to installation instructions/documentation e.g. https://github.com/user/project/blob/main/BUILD.md url 9 #VALUE FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://codemeta.github.io/terms/buildInstructions + softwareRequirementsItem Software Requirements Required software dependencies none 10 FALSE FALSE TRUE FALSE TRUE FALSE codeMeta20 + softwareRequirements Name & Version Name and version of the required software/library dependency e.g. Pandas 1.4.3 text 0 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE softwareRequirementsItem codeMeta20 https://schema.org/softwareRequirements + softwareRequirementsInfoUrl Info URL Link to required software/library homepage or documentation (ideally also versioned) e.g. https://pandas.pydata.org/pandas-docs/version/1.4.3 url 1 #VALUE FALSE FALSE FALSE FALSE TRUE FALSE softwareRequirementsItem codeMeta20 https://dataverse.org/schema/codeMeta20/softwareRequirementsInfoUrl + softwareSuggestionsItem Software Suggestions Optional dependencies, e.g. for optional features, code development, etc. none 11 FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 + softwareSuggestions Name & Version Name and version of the optional software/library dependency e.g. Sphinx 5.0.2 text 0 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE softwareSuggestionsItem codeMeta20 https://codemeta.github.io/terms/softwareSuggestions + softwareSuggestionsInfoUrl Info URL Link to optional software/library homepage or documentation (ideally also versioned) e.g. https://www.sphinx-doc.org url 1 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE softwareSuggestionsItem codeMeta20 https://dataverse.org/schema/codeMeta20/softwareSuggestionsInfoUrl + memoryRequirements Memory Requirements Minimum memory requirements. text 12 #VALUE TRUE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://schema.org/memoryRequirements + processorRequirements Processor Requirements Processor architecture required to run the application (e.g. IA64). text 13 #VALUE TRUE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://schema.org/processorRequirements + storageRequirements Storage Requirements Storage requirements (e.g. free space required). text 14 #VALUE TRUE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://schema.org/storageRequirements + permissions Permissions Permission(s) required to run the code (for example, a mobile app may require full internet access or may run only on wifi). text 15 #VALUE TRUE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://schema.org/permissions + softwareHelp Software Help/Documentation Link to help texts or documentation e.g. https://user.github.io/project/docs url 16 #VALUE FALSE FALSE TRUE FALSE TRUE FALSE codeMeta20 https://schema.org/softwareHelp + readme Readme Link to the README of the project e.g. https://github.com/user/project/blob/main/README.md url 17 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://codemeta.github.io/terms/readme + releaseNotes Release Notes Link to release notes e.g. https://github.com/user/project/blob/main/docs/release-0.1.md url 18 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://schema.org/releaseNotes + contIntegration Continuous Integration Link to continuous integration service e.g. https://github.com/user/project/actions url 19 #VALUE FALSE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://codemeta.github.io/terms/contIntegration + issueTracker Issue Tracker Link to software bug reporting or issue tracking system e.g. https://github.com/user/project/issues url 20 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://codemeta.github.io/terms/issueTracker +#controlledVocabulary DatasetField Value identifier displayOrder + developmentStatus Concept concept 0 + developmentStatus WIP wip 1 + developmentStatus Active active 2 + developmentStatus Inactive inactive 3 + developmentStatus Unsupported unsupported 4 + developmentStatus Moved moved 5 + developmentStatus Suspended suspended 6 + developmentStatus Abandoned abandoned 7 From 1e8567d2ad343547d39c3df3d32e4a1d81229d6e Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 22 Jul 2022 10:01:45 +0200 Subject: [PATCH 007/173] feat(metadata): add i18n properties for CodeMeta #7844 --- .../java/propertyFiles/codeMeta20.properties | 85 +++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 src/main/java/propertyFiles/codeMeta20.properties diff --git a/src/main/java/propertyFiles/codeMeta20.properties b/src/main/java/propertyFiles/codeMeta20.properties new file mode 100644 index 00000000000..e203c1e46e9 --- /dev/null +++ b/src/main/java/propertyFiles/codeMeta20.properties @@ -0,0 +1,85 @@ +metadatablock.name=codeMeta20 +metadatablock.displayName=Software Metadata (CodeMeta 2.0) +datasetfieldtype.softwareVersion.title=Software Version +datasetfieldtype.softwareVersion.description=Version of the software instance, usually following some convention like SemVer etc. +datasetfieldtype.softwareVersion.watermark=e.g. 0.2.1 or 1.3 or 2021.1 etc +datasetfieldtype.developmentStatus.title=Development Status +datasetfieldtype.developmentStatus.description=Description of development status, e.g. work in progress (wip), active, etc. See repostatus.org for more information. +datasetfieldtype.developmentStatus.watermark= Development Status +datasetfieldtype.codeRepository.title=Code Repository +datasetfieldtype.codeRepository.description=Link to the repository where the un-compiled, human readable code and related code is located (SVN, GitHub, CodePlex, institutional GitLab instance, Gitea, etc.). +datasetfieldtype.codeRepository.watermark=e.g. https://github.com/user/project +datasetfieldtype.applicationCategory.title= Application Category +datasetfieldtype.applicationCategory.description=Type of software application, e.g. Simulation, Analysis, Visualisation. +datasetfieldtype.applicationCategory.watermark= +datasetfieldtype.applicationSubCategory.title=Application Subcategory +datasetfieldtype.applicationSubCategory.description=Subcategory of the application, e.g. Arcade Game. +datasetfieldtype.applicationSubCategory.watermark= +datasetfieldtype.programmingLanguage.title=Programming Language +datasetfieldtype.programmingLanguage.description=The programming language(s) used to implement the software (e.g. Python, C++, Matlab, Fortran, Java, Julia,...) +datasetfieldtype.programmingLanguage.watermark= +datasetfieldtype.runtimePlatform.title=Runtime Platform +datasetfieldtype.runtimePlatform.description=Runtime platform or script interpreter dependencies (e.g. Java 11, Python 3.10 or .Net Framework 4.8). +datasetfieldtype.runtimePlatform.watermark=e.g. Python 3.10 +datasetfieldtype.operatingSystem.title=Operating Systems +datasetfieldtype.operatingSystem.description=Operating systems supported (e.g. Windows 10, OSX 11.3, Android 11). +datasetfieldtype.operatingSystem.watermark= +datasetfieldtype.targetProduct.title=Target Product +datasetfieldtype.targetProduct.description=Target Operating System / Product to which the code applies. If applies to several versions, just the product name can be used. +datasetfieldtype.targetProduct.watermark= +datasetfieldtype.buildInstructions.title=Build Instructions +datasetfieldtype.buildInstructions.description=Link to installation instructions/documentation +datasetfieldtype.buildInstructions.watermark=e.g. https://github.com/user/project/blob/main/BUILD.md +datasetfieldtype.softwareRequirementsItem.title=Software Requirements +datasetfieldtype.softwareRequirementsItem.description=Required software dependencies +datasetfieldtype.softwareRequirementsItem.watermark= +datasetfieldtype.softwareRequirements.title=Name & Version +datasetfieldtype.softwareRequirements.description=Name and version of the required software/library dependency +datasetfieldtype.softwareRequirements.watermark=e.g. Pandas 1.4.3 +datasetfieldtype.softwareRequirementsInfoUrl.title=Info URL +datasetfieldtype.softwareRequirementsInfoUrl.description=Link to required software/library homepage or documentation (ideally also versioned) +datasetfieldtype.softwareRequirementsInfoUrl.watermark=e.g. https://pandas.pydata.org/pandas-docs/version/1.4.3 +datasetfieldtype.softwareSuggestionsItem.title=Software Suggestions +datasetfieldtype.softwareSuggestionsItem.description=Optional dependencies, e.g. for optional features, code development, etc. +datasetfieldtype.softwareSuggestionsItem.watermark= +datasetfieldtype.softwareSuggestions.title=Name & Version +datasetfieldtype.softwareSuggestions.description=Name and version of the optional software/library dependency +datasetfieldtype.softwareSuggestions.watermark=e.g. Sphinx 5.0.2 +datasetfieldtype.softwareSuggestionsInfoUrl.title=Info URL +datasetfieldtype.softwareSuggestionsInfoUrl.description=Link to optional software/library homepage or documentation (ideally also versioned) +datasetfieldtype.softwareSuggestionsInfoUrl.watermark=e.g. https://www.sphinx-doc.org +datasetfieldtype.memoryRequirements.title=Memory Requirements +datasetfieldtype.memoryRequirements.description=Minimum memory requirements. +datasetfieldtype.memoryRequirements.watermark= +datasetfieldtype.processorRequirements.title=Processor Requirements +datasetfieldtype.processorRequirements.description=Processor architecture required to run the application (e.g. IA64). +datasetfieldtype.processorRequirements.watermark= +datasetfieldtype.storageRequirements.title=Storage Requirements +datasetfieldtype.storageRequirements.description=Storage requirements (e.g. free space required). +datasetfieldtype.storageRequirements.watermark= +datasetfieldtype.permissions.title=Permissions +datasetfieldtype.permissions.description=Permission(s) required to run the code (for example, a mobile app may require full internet access or may run only on wifi). +datasetfieldtype.permissions.watermark= +datasetfieldtype.softwareHelp.title=Software Help/Documentation +datasetfieldtype.softwareHelp.description=Link to help texts or documentation +datasetfieldtype.softwareHelp.watermark=e.g. https://user.github.io/project/docs +datasetfieldtype.readme.title=Readme +datasetfieldtype.readme.description=Link to the README of the project +datasetfieldtype.readme.watermark=e.g. https://github.com/user/project/blob/main/README.md +datasetfieldtype.releaseNotes.title=Release Notes +datasetfieldtype.releaseNotes.description=Link to release notes +datasetfieldtype.releaseNotes.watermark=e.g. https://github.com/user/project/blob/main/docs/release-0.1.md +datasetfieldtype.contIntegration.title=Continuous Integration +datasetfieldtype.contIntegration.description=Link to continuous integration service +datasetfieldtype.contIntegration.watermark=e.g. https://github.com/user/project/actions +datasetfieldtype.issueTracker.title=Issue Tracker +datasetfieldtype.issueTracker.description=Link to software bug reporting or issue tracking system +datasetfieldtype.issueTracker.watermark=e.g. https://github.com/user/project/issues +controlledvocabulary.developmentStatus.concept=Concept +controlledvocabulary.developmentStatus.wip=WIP +controlledvocabulary.developmentStatus.active=Active +controlledvocabulary.developmentStatus.inactive=Inactive +controlledvocabulary.developmentStatus.unsupported=Unsupported +controlledvocabulary.developmentStatus.moved=Moved +controlledvocabulary.developmentStatus.suspended=Suspended +controlledvocabulary.developmentStatus.abandoned=Abandoned From c6c669c0ebbb8a3ef161ef48b39391a0ee7064a9 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 1 Aug 2022 12:38:14 +0200 Subject: [PATCH 008/173] refactor(metadata): move CodeMeta to experimental #7844 With the merge of computational workflow metadata considered experimental, move CodeMeta there, too. --- doc/sphinx-guides/source/user/appendix.rst | 2 +- scripts/api/setup-datasetfields.sh | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/user/appendix.rst b/doc/sphinx-guides/source/user/appendix.rst index fe8dc580f1e..d6009edc9c9 100755 --- a/doc/sphinx-guides/source/user/appendix.rst +++ b/doc/sphinx-guides/source/user/appendix.rst @@ -30,13 +30,13 @@ Detailed below are what metadata schemas we support for Citation and Domain Spec `Virtual Observatory (VO) Discovery and Provenance Metadata `__ (`see .tsv version `__). - `Life Sciences Metadata `__ (`see .tsv version `__): based on `ISA-Tab Specification `__, along with controlled vocabulary from subsets of the `OBI Ontology `__ and the `NCBI Taxonomy for Organisms `__. - `Journal Metadata `__ (`see .tsv version `__): based on the `Journal Archiving and Interchange Tag Set, version 1.2 `__. -- `CodeMeta Software Metadata `__: based on the `CodeMeta Software Metadata Schema, version 2.0 `__ (`see .tsv version `__) Experimental Metadata ~~~~~~~~~~~~~~~~~~~~~ Unlike supported metadata, experimental metadata is not enabled by default in a new Dataverse installation. Feedback via any `channel `_ is welcome! +- `CodeMeta Software Metadata `__: based on the `CodeMeta Software Metadata Schema, version 2.0 `__ (`see .tsv version `__) - `Computational Workflow Metadata `__ (`see .tsv version `__): adapted from `Bioschemas Computational Workflow Profile, version 1.0 `__ and `Codemeta `__. See Also diff --git a/scripts/api/setup-datasetfields.sh b/scripts/api/setup-datasetfields.sh index 741a439e542..0d2d60b9538 100755 --- a/scripts/api/setup-datasetfields.sh +++ b/scripts/api/setup-datasetfields.sh @@ -7,4 +7,3 @@ curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @da curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/astrophysics.tsv -H "Content-type: text/tab-separated-values" curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/biomedical.tsv -H "Content-type: text/tab-separated-values" curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/journals.tsv -H "Content-type: text/tab-separated-values" -curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/codemeta.tsv -H "Content-type: text/tab-separated-values" From 727ef9be9bd948f7c9b39855fa2648655439371b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 7 Sep 2022 17:36:04 -0400 Subject: [PATCH 009/173] fix progress during hash calc --- src/main/webapp/resources/js/fileupload.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/webapp/resources/js/fileupload.js b/src/main/webapp/resources/js/fileupload.js index 564239ee7ef..a478235c09f 100644 --- a/src/main/webapp/resources/js/fileupload.js +++ b/src/main/webapp/resources/js/fileupload.js @@ -144,6 +144,7 @@ var fileUpload = class fileUploadClass { async doUpload() { this.state = UploadState.UPLOADING; var thisFile = curFile-1; + this.id=thisFile; //This appears to be the earliest point when the file table has been populated, and, since we don't know how many table entries have had ids added already, we check var filerows = $('.ui-fileupload-files .ui-fileupload-row'); //Add an id attribute to each entry so we can later match progress and errors with the right entry @@ -318,7 +319,7 @@ var fileUpload = class fileUploadClass { if (directUploadReport) { getMD5(this.file, prog => { var current = 1 + prog; - $('progress').attr({ + $('[upid="' + this.id + '"] progress').attr({ value: current, max: 2 }); From d35e1c3ea8e149854598053f9dd0dd35bc9b132b Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 12 Sep 2022 09:04:11 -0400 Subject: [PATCH 010/173] Trigger auto-analyze more frequently for guestbook estimates --- .../db/migration/V5.12.0.1__8840-improve-guestbook-estimates.sql | 1 + 1 file changed, 1 insertion(+) create mode 100644 src/main/resources/db/migration/V5.12.0.1__8840-improve-guestbook-estimates.sql diff --git a/src/main/resources/db/migration/V5.12.0.1__8840-improve-guestbook-estimates.sql b/src/main/resources/db/migration/V5.12.0.1__8840-improve-guestbook-estimates.sql new file mode 100644 index 00000000000..91ab5253f9c --- /dev/null +++ b/src/main/resources/db/migration/V5.12.0.1__8840-improve-guestbook-estimates.sql @@ -0,0 +1 @@ +ALTER TABLE guestbookresponse SET (autovacuum_analyze_scale_factor = 0.01); \ No newline at end of file From 11ff8d7a7a75c9566935e6b2581bb88cdad0529e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 12 Sep 2022 09:29:19 -0400 Subject: [PATCH 011/173] rel note --- doc/release-notes/8840-improved-download-estimate.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 doc/release-notes/8840-improved-download-estimate.md diff --git a/doc/release-notes/8840-improved-download-estimate.md b/doc/release-notes/8840-improved-download-estimate.md new file mode 100644 index 00000000000..cb264b7e683 --- /dev/null +++ b/doc/release-notes/8840-improved-download-estimate.md @@ -0,0 +1 @@ +To improve performance, Dataverse estimates download counts. This release includes an update that makes the estimate more accurate. \ No newline at end of file From a72e88ced9f09e0c8897e5707598252f2d54184c Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 20 Jun 2022 17:46:47 +0200 Subject: [PATCH 012/173] refactor(settings): simplify SystemConfig.getVersion #7000 Instead of trying to read a built time file from Maven, use MicroProfile Config to retrieve the version and build number. The version is by default set via microprofile-config.properties (or overridden by an env var in a container). The build number is still read from either BuildNumber.properties or, if not present, from MicroProfile Config, defaulting to empty. This also avoids copying extra files into containers to retrieve the version string. --- .../iq/dataverse/util/SystemConfig.java | 175 ++++-------------- .../iq/dataverse/util/SystemConfigTest.java | 36 ++++ 2 files changed, 77 insertions(+), 134 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index bd27405fae5..25dd3dd6138 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -1,18 +1,28 @@ package edu.harvard.iq.dataverse.util; import com.ocpsoft.pretty.PrettyContext; - import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataverseServiceBean; import edu.harvard.iq.dataverse.DvObjectContainer; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinAuthenticationProvider; import edu.harvard.iq.dataverse.authorization.providers.oauth2.AbstractOAuth2AuthenticationProvider; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.validation.PasswordValidatorUtil; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; +import org.eclipse.microprofile.config.Config; +import org.eclipse.microprofile.config.ConfigProvider; +import org.passay.CharacterRule; + +import javax.ejb.EJB; +import javax.ejb.Stateless; +import javax.inject.Named; +import javax.json.Json; +import javax.json.JsonArray; +import javax.json.JsonObject; +import javax.json.JsonReader; +import javax.json.JsonString; +import javax.json.JsonValue; import java.io.StringReader; import java.net.InetAddress; import java.net.UnknownHostException; @@ -23,25 +33,11 @@ import java.util.List; import java.util.Map; import java.util.MissingResourceException; -import java.util.Properties; import java.util.ResourceBundle; import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; -import javax.ejb.EJB; -import javax.ejb.Stateless; -import javax.inject.Named; -import javax.json.Json; -import javax.json.JsonArray; -import javax.json.JsonObject; -import javax.json.JsonReader; -import javax.json.JsonString; -import javax.json.JsonValue; - -import org.passay.CharacterRule; -import org.apache.commons.io.IOUtils; - /** * System-wide configuration */ @@ -50,6 +46,7 @@ public class SystemConfig { private static final Logger logger = Logger.getLogger(SystemConfig.class.getCanonicalName()); + private static final Config config = ConfigProvider.getConfig(); @EJB SettingsServiceBean settingsService; @@ -109,9 +106,8 @@ public class SystemConfig { public static final long defaultZipDownloadLimit = 104857600L; // 100MB private static final int defaultMultipleUploadFilesLimit = 1000; private static final int defaultLoginSessionTimeout = 480; // = 8 hours - - private static String appVersionString = null; - private static String buildNumberString = null; + + private String buildNumber = null; private static final String JVM_TIMER_SERVER_OPTION = "dataverse.timerServer"; @@ -132,127 +128,38 @@ public String getVersion() { // candidate for being moved into some kind of an application-scoped caching // service... some CachingService @Singleton - ? (L.A. 5.8) public String getVersion(boolean withBuildNumber) { - - if (appVersionString == null) { - - // The Version Number is no longer supplied in a .properties file - so - // we can't just do - // return BundleUtil.getStringFromBundle("version.number", null, ResourceBundle.getBundle("VersionNumber", Locale.US)); - // - // Instead, we'll rely on Maven placing the version number into the - // Manifest, and getting it from there: - // (this is considered a better practice, and will also allow us - // to maintain this number in only one place - the pom.xml file) - // -- L.A. 4.0.2 - - // One would assume, that once the version is in the MANIFEST.MF, - // as Implementation-Version:, it would be possible to obtain - // said version simply as - // appVersionString = getClass().getPackage().getImplementationVersion(); - // alas - that's not working, for whatever reason. (perhaps that's - // only how it works with jar-ed packages; not with .war files). - // People on the interwebs suggest that one should instead - // open the Manifest as a resource, then extract its attributes. - // There were some complications with that too. Plus, relying solely - // on the MANIFEST.MF would NOT work for those of the developers who - // are using "in place deployment" (i.e., where - // Netbeans runs their builds directly from the local target - // directory, bypassing the war file deployment; and the Manifest - // is only available in the .war file). For that reason, I am - // going to rely on the pom.properties file, and use java.util.Properties - // to read it. We have to look for this file in 2 different places - // depending on whether this is a .war file deployment, or a - // developers build. (the app-level META-INF is only populated when - // a .war file is built; the "maven-archiver" directory, on the other - // hand, is only available when it's a local build deployment). - // So, long story short, I'm resorting to the convoluted steps below. - // It may look hacky, but it should actually be pretty solid and - // reliable. - - - // First, find the absolute path url of the application persistence file - // always supplied with the Dataverse app: - java.net.URL fileUrl = Thread.currentThread().getContextClassLoader().getResource("META-INF/persistence.xml"); - String filePath = null; - - - if (fileUrl != null) { - filePath = fileUrl.getFile(); - if (filePath != null) { - InputStream mavenPropertiesInputStream = null; - String mavenPropertiesFilePath; - Properties mavenProperties = new Properties(); - - - filePath = filePath.replaceFirst("/[^/]*$", "/"); - // Using a relative path, find the location of the maven pom.properties file. - // First, try to look for it in the app-level META-INF. This will only be - // available if it's a war file deployment: - mavenPropertiesFilePath = filePath.concat("../../../META-INF/maven/edu.harvard.iq/dataverse/pom.properties"); - - try { - mavenPropertiesInputStream = new FileInputStream(mavenPropertiesFilePath); - } catch (IOException ioex) { - // OK, let's hope this is a local dev. build. - // In that case the properties file should be available in - // the maven-archiver directory: - - mavenPropertiesFilePath = filePath.concat("../../../../maven-archiver/pom.properties"); - - // try again: - - try { - mavenPropertiesInputStream = new FileInputStream(mavenPropertiesFilePath); - } catch (IOException ioex2) { - logger.warning("Failed to find and/or open for reading the pom.properties file."); - mavenPropertiesInputStream = null; - } - } - - if (mavenPropertiesInputStream != null) { - try { - mavenProperties.load(mavenPropertiesInputStream); - appVersionString = mavenProperties.getProperty("version"); - } catch (IOException ioex) { - logger.warning("caught IOException trying to read and parse the pom properties file."); - } finally { - IOUtils.closeQuietly(mavenPropertiesInputStream); - } - } - - } else { - logger.warning("Null file path representation of the location of persistence.xml in the webapp root directory!"); - } - } else { - logger.warning("Could not find the location of persistence.xml in the webapp root directory!"); - } - - - if (appVersionString == null) { - // still null? - defaulting to 4.0: - appVersionString = "4.0"; - } - } + // Retrieve the version via MPCONFIG + // NOTE: You may override the version via all methods of MPCONFIG. + // It will default to read from microprofile-config.properties source, + // which contains in the source a Maven property reference to ${project.version}. + // When packaging the app to deploy it, Maven will replace this, rendering it a static entry. + // NOTE: MicroProfile Config will cache the entry for us in internal maps. + String appVersion = JvmSettings.VERSION.lookup(); if (withBuildNumber) { - if (buildNumberString == null) { - // (build number is still in a .properties file in the source tree; it only - // contains a real build number if this war file was built by - // Jenkins) - + if (buildNumber == null) { + // (build number is still in a .properties file in the source tree; it only + // contains a real build number if this war file was built by Jenkins) + // TODO: might be replaced with same trick as for version via Maven property w/ empty default try { - buildNumberString = ResourceBundle.getBundle("BuildNumber").getString("build.number"); + buildNumber = ResourceBundle.getBundle("BuildNumber").getString("build.number"); } catch (MissingResourceException ex) { - buildNumberString = null; + buildNumber = null; + } + + // Also try to read the build number via MicroProfile Config if not already present from the + // properties file (so can be overridden by env var or other source) + if (buildNumber == null || buildNumber.isEmpty()) { + buildNumber = JvmSettings.BUILD.lookupOptional().orElse(""); } } - if (buildNumberString != null && !buildNumberString.equals("")) { - return appVersionString + " build " + buildNumberString; - } - } + if (!buildNumber.equals("")) { + return appVersion + " build " + buildNumber; + } + } - return appVersionString; + return appVersion; } public String getSolrHostColonPort() { diff --git a/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java b/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java index 891b029f521..b8ad0a57748 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java @@ -1,13 +1,49 @@ package edu.harvard.iq.dataverse.util; +import edu.harvard.iq.dataverse.settings.JvmSettings; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; class SystemConfigTest { + SystemConfig systemConfig = new SystemConfig(); + + @Test + void testGetVersion() { + // given + String version = "100.100"; + System.setProperty(JvmSettings.VERSION.getScopedKey(), version); + + // when + String result = systemConfig.getVersion(false); + + // then + assertEquals(version, result); + } + + @Test + void testGetVersionWithBuild() { + // given + String version = "100.100"; + String build = "FOOBAR"; + System.setProperty(JvmSettings.VERSION.getScopedKey(), version); + System.setProperty(JvmSettings.BUILD.getScopedKey(), build); + + // when + String result = systemConfig.getVersion(true); + + // then + assertTrue(result.startsWith(version), "'" + result + "' not starting with " + version); + assertTrue(result.contains("build")); + + // Cannot test this here - there might be the bundle file present which is not under test control + //assertTrue(result.endsWith(build), "'" + result + "' not ending with " + build); + } + @Test void testGetLongLimitFromStringOrDefault_withNullInput() { long defaultValue = 5L; From 5f925edf6668893c96df5117157086ef641a5b44 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 30 Jun 2022 21:57:49 +0200 Subject: [PATCH 013/173] docs(dev): add some tips about new options dataverse.build and dataverse.version #7000 --- doc/sphinx-guides/source/developers/tips.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/doc/sphinx-guides/source/developers/tips.rst b/doc/sphinx-guides/source/developers/tips.rst index 3fff3e76ea8..2b15948bd34 100755 --- a/doc/sphinx-guides/source/developers/tips.rst +++ b/doc/sphinx-guides/source/developers/tips.rst @@ -173,6 +173,13 @@ commit id in your test deployment webpages on the bottom right corner next to th When you prefer manual updates, there is another script, see above: :ref:`custom_build_num_script`. +An alternative to that is using *MicroProfile Config* and set the option ``dataverse.build`` via a system property, +environment variable (``DATAVERSE_BUILD``) or `one of the other config sources +`__. + +You could even override the version itself with the option ``dataverse.version`` in the same way, which is usually +picked up from a build time source. + Sample Data ----------- From 32f7a6f59743dda59fa3ff3c779fc35e6239f1e1 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 21 Jun 2022 20:57:01 +0200 Subject: [PATCH 014/173] refactor(settings): make Solr endpoint configurable via MPCONFIG #7000 By refactoring SystemConfig.getSolrHostColonPort, the Solr endpoint is not just configurable via a database setting, but also by all mechanisms of MicroProfile Config. - The database setting still has priority over the other mechanisms. - It's completely backward compatible, no config change necessary. - Tests have been added to ensure the behaviour - Default ("localhost:8983") for no setting given is now also done via MPCONFIG - Default for container usage ("solr:8983") possible via MPCONFIG profile "ct" --- .../iq/dataverse/settings/JvmSettings.java | 5 ++ .../iq/dataverse/util/SystemConfig.java | 37 +++++++----- .../META-INF/microprofile-config.properties | 6 ++ .../iq/dataverse/util/SystemConfigTest.java | 59 ++++++++++++++++++- 4 files changed, 92 insertions(+), 15 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index 223e4b86da9..e73453abc16 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -42,6 +42,11 @@ public enum JvmSettings { VERSION(PREFIX, "version"), BUILD(PREFIX, "build"), + // SOLR INDEX SETTINGS + SCOPE_SOLR(PREFIX, "solr"), + SOLR_HOST(SCOPE_SOLR, "host"), + SOLR_PORT(SCOPE_SOLR, "port"), + ; private static final String SCOPE_SEPARATOR = "."; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index bd27405fae5..acdd112196f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -8,6 +8,7 @@ import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinAuthenticationProvider; import edu.harvard.iq.dataverse.authorization.providers.oauth2.AbstractOAuth2AuthenticationProvider; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.validation.PasswordValidatorUtil; import java.io.FileInputStream; @@ -24,6 +25,7 @@ import java.util.Map; import java.util.MissingResourceException; import java.util.Properties; +import java.util.Optional; import java.util.ResourceBundle; import java.util.logging.Logger; import java.util.regex.Matcher; @@ -95,12 +97,6 @@ public class SystemConfig { */ private static final String PASSWORD_RESET_TIMEOUT_IN_MINUTES = "dataverse.auth.password-reset-timeout-in-minutes"; - /** - * A common place to find the String for a sane Solr hostname:port - * combination. - */ - private String saneDefaultForSolrHostColonPort = "localhost:8983"; - /** * The default number of datafiles that we allow to be created through * zip file upload. @@ -254,15 +250,28 @@ public String getVersion(boolean withBuildNumber) { return appVersionString; } - + + /** + * Retrieve the Solr endpoint in "host:port" form, to be used with a Solr client. + * + * This will retrieve the setting from either the database ({@link SettingsServiceBean.Key#SolrHostColonPort}) or + * via Microprofile Config API (properties {@link JvmSettings#SOLR_HOST} and {@link JvmSettings#SOLR_PORT}). + * + * A database setting always takes precedence. If not given via other config sources, a default from + * resources/META-INF/microprofile-config.properties is used. (It's possible to use profiles.) + * + * @return Solr endpoint as string "hostname:port" + */ public String getSolrHostColonPort() { - String SolrHost; - if ( System.getenv("SOLR_SERVICE_HOST") != null && System.getenv("SOLR_SERVICE_HOST") != ""){ - SolrHost = System.getenv("SOLR_SERVICE_HOST"); - } - else SolrHost = saneDefaultForSolrHostColonPort; - String solrHostColonPort = settingsService.getValueForKey(SettingsServiceBean.Key.SolrHostColonPort, SolrHost); - return solrHostColonPort; + // Get from MPCONFIG. Might be configured by a sysadmin or simply return the default shipped with + // resources/META-INF/microprofile-config.properties. + // NOTE: containers should use system property mp.config.profile=ct to use sane container usage default + String host = JvmSettings.SOLR_HOST.lookup(); + String port = JvmSettings.SOLR_PORT.lookup(); + + // DB setting takes precedence over all. If not present, will return default from above. + return Optional.ofNullable(settingsService.getValueForKey(SettingsServiceBean.Key.SolrHostColonPort)) + .orElse(host + ":" + port); } public boolean isProvCollectionEnabled() { diff --git a/src/main/resources/META-INF/microprofile-config.properties b/src/main/resources/META-INF/microprofile-config.properties index 16298d83118..b6aa686de01 100644 --- a/src/main/resources/META-INF/microprofile-config.properties +++ b/src/main/resources/META-INF/microprofile-config.properties @@ -3,6 +3,12 @@ dataverse.version=${project.version} dataverse.build= +# SEARCH INDEX +dataverse.solr.host=localhost +# Activating mp config profile -Dmp.config.profile=ct changes default to "solr" as DNS name +%ct.dataverse.solr.host=solr +dataverse.solr.port=8983 + # DATABASE dataverse.db.host=localhost dataverse.db.port=5432 diff --git a/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java b/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java index 891b029f521..75f919b90ab 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java @@ -1,13 +1,70 @@ package edu.harvard.iq.dataverse.util; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.testing.JvmSetting; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.Mockito.doReturn; +@ExtendWith(MockitoExtension.class) class SystemConfigTest { - + + @InjectMocks + SystemConfig systemConfig = new SystemConfig(); + @Mock + SettingsServiceBean settingsService; + + @Test + @JvmSetting(key = JvmSettings.SOLR_HOST, value = "foobar") + @JvmSetting(key = JvmSettings.SOLR_PORT, value = "1234") + void testGetSolrHostColonPortNoDBEntry() { + // given + String hostPort = "foobar:1234"; + + // when + doReturn(null).when(settingsService).getValueForKey(SettingsServiceBean.Key.SolrHostColonPort); + String result = systemConfig.getSolrHostColonPort(); + + // then + assertEquals(hostPort, result); + } + + @Test + @JvmSetting(key = JvmSettings.SOLR_HOST, value = "foobar") + @JvmSetting(key = JvmSettings.SOLR_PORT, value = "1234") + void testGetSolrHostColonPortWithDBEntry() { + // given + String dbEntry = "hello:4321"; + + // when + doReturn(dbEntry).when(settingsService).getValueForKey(SettingsServiceBean.Key.SolrHostColonPort); + String result = systemConfig.getSolrHostColonPort(); + + // then + assertEquals(dbEntry, result); + } + + @Test + void testGetSolrHostColonPortDefault() { + // given + String hostPort = "localhost:8983"; + + // when + doReturn(null).when(settingsService).getValueForKey(SettingsServiceBean.Key.SolrHostColonPort); + String result = systemConfig.getSolrHostColonPort(); + + // then + assertEquals(hostPort, result); + } + @Test void testGetLongLimitFromStringOrDefault_withNullInput() { long defaultValue = 5L; From af36a0d4b6fb03502bb6dec65d0acfd60116d2c4 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 21 Jun 2022 20:59:40 +0200 Subject: [PATCH 015/173] feat(settings,solr): make Solr URL details configurable When using Dataverse with a non-default Solr, HTTPS, custom core name or similar, it's necessary to have a configurable URL for the Solr endpoint. This becomes now possible via MicroProfile Config, defaulting to the old variant. --- .../iq/dataverse/search/IndexServiceBean.java | 13 +++- .../dataverse/search/SolrClientService.java | 12 ++- .../iq/dataverse/settings/JvmSettings.java | 3 + .../META-INF/microprofile-config.properties | 3 + .../search/IndexServiceBeanTest.java | 73 +++++++++++++++---- .../search/SolrClientServiceTest.java | 59 +++++++++++++++ 6 files changed, 144 insertions(+), 19 deletions(-) create mode 100644 src/test/java/edu/harvard/iq/dataverse/search/SolrClientServiceTest.java diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 484e5768eb1..06a6e5928df 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -30,6 +30,7 @@ import edu.harvard.iq.dataverse.datavariable.VariableMetadataUtil; import edu.harvard.iq.dataverse.datavariable.VariableServiceBean; import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.StringUtil; @@ -86,6 +87,8 @@ import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; import org.apache.tika.sax.BodyContentHandler; +import org.eclipse.microprofile.config.Config; +import org.eclipse.microprofile.config.ConfigProvider; import org.xml.sax.ContentHandler; @Stateless @@ -93,6 +96,7 @@ public class IndexServiceBean { private static final Logger logger = Logger.getLogger(IndexServiceBean.class.getCanonicalName()); + private static final Config config = ConfigProvider.getConfig(); @PersistenceContext(unitName = "VDCNet-ejbPU") private EntityManager em; @@ -153,13 +157,18 @@ public class IndexServiceBean { public static final String HARVESTED = "Harvested"; private String rootDataverseName; private Dataverse rootDataverseCached; - private SolrClient solrServer; + SolrClient solrServer; private VariableMetadataUtil variableMetadataUtil; @PostConstruct public void init() { - String urlString = "http://" + systemConfig.getSolrHostColonPort() + "/solr/collection1"; + // Get from MPCONFIG. Might be configured by a sysadmin or simply return the default shipped with + // resources/META-INF/microprofile-config.properties. + String protocol = JvmSettings.SOLR_PROT.lookup(); + String path = JvmSettings.SOLR_PATH.lookup(); + + String urlString = protocol + "://" + systemConfig.getSolrHostColonPort() + path; solrServer = new HttpSolrClient.Builder(urlString).build(); rootDataverseName = findRootDataverseCached().getName(); diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrClientService.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrClientService.java index f00ece9aacc..70483853979 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrClientService.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrClientService.java @@ -5,6 +5,7 @@ */ package edu.harvard.iq.dataverse.search; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.SystemConfig; import java.io.IOException; import java.util.logging.Logger; @@ -15,6 +16,8 @@ import javax.inject.Named; import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.impl.HttpSolrClient; +import org.eclipse.microprofile.config.Config; +import org.eclipse.microprofile.config.ConfigProvider; /** * @@ -30,6 +33,7 @@ @Singleton public class SolrClientService { private static final Logger logger = Logger.getLogger(SolrClientService.class.getCanonicalName()); + private static final Config config = ConfigProvider.getConfig(); @EJB SystemConfig systemConfig; @@ -38,9 +42,13 @@ public class SolrClientService { @PostConstruct public void init() { - String urlString = "http://" + systemConfig.getSolrHostColonPort() + "/solr/collection1"; - solrClient = new HttpSolrClient.Builder(urlString).build(); + // Get from MPCONFIG. Might be configured by a sysadmin or simply return the default shipped with + // resources/META-INF/microprofile-config.properties. + String protocol = JvmSettings.SOLR_PROT.lookup(); + String path = JvmSettings.SOLR_PATH.lookup(); + String urlString = protocol + "://" + systemConfig.getSolrHostColonPort() + path; + solrClient = new HttpSolrClient.Builder(urlString).build(); } @PreDestroy diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index e73453abc16..222346e3b35 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -46,6 +46,9 @@ public enum JvmSettings { SCOPE_SOLR(PREFIX, "solr"), SOLR_HOST(SCOPE_SOLR, "host"), SOLR_PORT(SCOPE_SOLR, "port"), + SOLR_PROT(SCOPE_SOLR, "protocol"), + SOLR_CORE(SCOPE_SOLR, "core"), + SOLR_PATH(SCOPE_SOLR, "path"), ; diff --git a/src/main/resources/META-INF/microprofile-config.properties b/src/main/resources/META-INF/microprofile-config.properties index b6aa686de01..c846d80220c 100644 --- a/src/main/resources/META-INF/microprofile-config.properties +++ b/src/main/resources/META-INF/microprofile-config.properties @@ -8,6 +8,9 @@ dataverse.solr.host=localhost # Activating mp config profile -Dmp.config.profile=ct changes default to "solr" as DNS name %ct.dataverse.solr.host=solr dataverse.solr.port=8983 +dataverse.solr.protocol=http +dataverse.solr.core=collection1 +dataverse.solr.path=/solr/${dataverse.solr.core} # DATABASE dataverse.db.host=localhost diff --git a/src/test/java/edu/harvard/iq/dataverse/search/IndexServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/search/IndexServiceBeanTest.java index ad4647e4898..aab6af660cb 100644 --- a/src/test/java/edu/harvard/iq/dataverse/search/IndexServiceBeanTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/search/IndexServiceBeanTest.java @@ -1,18 +1,5 @@ package edu.harvard.iq.dataverse.search; -import static org.junit.Assert.assertTrue; - -import java.io.IOException; -import java.util.Arrays; -import java.util.Set; -import java.util.logging.Logger; -import java.util.stream.Collectors; - -import org.apache.solr.client.solrj.SolrServerException; -import org.junit.Before; -import org.junit.Test; -import org.mockito.Mockito; - import edu.harvard.iq.dataverse.ControlledVocabularyValue; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetField; @@ -26,21 +13,47 @@ import edu.harvard.iq.dataverse.MetadataBlock; import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.mocks.MocksFactory; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.testing.JvmSetting; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.impl.HttpSolrClient; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.junit.jupiter.MockitoExtension; +import java.io.IOException; +import java.util.Arrays; +import java.util.Set; +import java.util.logging.Logger; +import java.util.stream.Collectors; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +@ExtendWith(MockitoExtension.class) public class IndexServiceBeanTest { private static final Logger logger = Logger.getLogger(IndexServiceBeanTest.class.getCanonicalName()); private IndexServiceBean indexService; private Dataverse dataverse; - @Before + @Mock + private SettingsServiceBean settingsService; + @InjectMocks + private SystemConfig systemConfig = new SystemConfig(); + + @BeforeEach public void setUp() { dataverse = MocksFactory.makeDataverse(); dataverse.setDataverseType(DataverseType.UNCATEGORIZED); indexService = new IndexServiceBean(); - indexService.systemConfig = new SystemConfig(); + indexService.systemConfig = systemConfig; indexService.settingsService = Mockito.mock(SettingsServiceBean.class); indexService.dataverseService = Mockito.mock(DataverseServiceBean.class); indexService.datasetFieldService = Mockito.mock(DatasetFieldServiceBean.class); @@ -48,6 +61,36 @@ public void setUp() { Mockito.when(indexService.dataverseService.findRootDataverse()).thenReturn(dataverse); } + + @Test + public void testInitWithDefaults() { + // given + String url = "http://localhost:8983/solr/collection1"; + + // when + indexService.init(); + + // then + HttpSolrClient client = (HttpSolrClient) indexService.solrServer; + assertEquals(url, client.getBaseURL()); + } + + + @Test + @JvmSetting(key = JvmSettings.SOLR_HOST, value = "foobar") + @JvmSetting(key = JvmSettings.SOLR_PORT, value = "1234") + @JvmSetting(key = JvmSettings.SOLR_CORE, value = "test") + void testInitWithConfig() { + // given + String url = "http://foobar:1234/solr/test"; + + // when + indexService.init(); + + // then + HttpSolrClient client = (HttpSolrClient) indexService.solrServer; + assertEquals(url, client.getBaseURL()); + } @Test public void TestIndexing() throws SolrServerException, IOException { diff --git a/src/test/java/edu/harvard/iq/dataverse/search/SolrClientServiceTest.java b/src/test/java/edu/harvard/iq/dataverse/search/SolrClientServiceTest.java new file mode 100644 index 00000000000..a3b3c8a2080 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/search/SolrClientServiceTest.java @@ -0,0 +1,59 @@ +package edu.harvard.iq.dataverse.search; + +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.testing.JvmSetting; +import org.apache.solr.client.solrj.impl.HttpSolrClient; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +@ExtendWith(MockitoExtension.class) +class SolrClientServiceTest { + + @Mock + SettingsServiceBean settingsServiceBean; + @InjectMocks + SystemConfig systemConfig; + SolrClientService clientService = new SolrClientService(); + + @BeforeEach + void setUp() { + clientService.systemConfig = systemConfig; + } + + @Test + void testInitWithDefaults() { + // given + String url = "http://localhost:8983/solr/collection1"; + + // when + clientService.init(); + + // then + HttpSolrClient client = (HttpSolrClient) clientService.getSolrClient(); + assertEquals(url, client.getBaseURL()); + } + + @Test + @JvmSetting(key = JvmSettings.SOLR_HOST, value = "foobar") + @JvmSetting(key = JvmSettings.SOLR_PORT, value = "1234") + @JvmSetting(key = JvmSettings.SOLR_CORE, value = "test") + void testInitWithConfig() { + // given + String url = "http://foobar:1234/solr/test"; + + // when + clientService.init(); + + // then + HttpSolrClient client = (HttpSolrClient) clientService.getSolrClient(); + assertEquals(url, client.getBaseURL()); + } +} \ No newline at end of file From 0727d85b6a2755e2eb754988ff20a55aae8c92fb Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 23 Jun 2022 18:13:27 +0200 Subject: [PATCH 016/173] docs(settings): mark :SolrHostColonPort with @Deprecated #7000 --- .../harvard/iq/dataverse/settings/SettingsServiceBean.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index c12b8f6e452..98dd6e2fa3b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -174,7 +174,12 @@ public enum Key { * */ SearchRespectPermissionRoot, - /** Solr hostname and port, such as "localhost:8983". */ + /** + * Solr hostname and port, such as "localhost:8983". + * @deprecated New installations should not use this database setting, but use {@link JvmSettings#SOLR_HOST} + * and {@link JvmSettings#SOLR_PORT}. + */ + @Deprecated(forRemoval = true, since = "2022-07-01") SolrHostColonPort, /** Enable full-text indexing in solr up to max file size */ SolrFullTextIndexing, //true or false (default) From 6965bebe78b83987af069f4f6a13e6a50539884a Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 21 Jun 2022 21:34:07 +0200 Subject: [PATCH 017/173] docs(settings): add Solr MPCONFIG options to guides #7000 Describe the new options to set the Solr endpoint, crosslinking the old way and adding hints about MPCONFIG profiles. --- .../source/installation/config.rst | 74 +++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index ab0bad70206..c1cf39e4182 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1462,6 +1462,61 @@ Defaults to ``5432``, the default PostgreSQL port. Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_DB_PORT``. +.. _dataverse.solr.host: + +dataverse.solr.host ++++++++++++++++++++ + +The hostname of a Solr server to connect to. Remember to restart / redeploy Dataverse after changing the setting +(as with :ref:`:SolrHostColonPort`). + +Defaults to ``localhost``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SOLR_HOST``. +Defaults to ``solr``, when used with ``mp.config.profile=ct`` (:ref:`see below <:ApplicationServerSettings>`). + +dataverse.solr.port ++++++++++++++++++++ + +The Solr server port to connect to. Remember to restart / redeploy Dataverse after changing the setting +(as with :ref:`:SolrHostColonPort`). + +Defaults to ``8983``, the default Solr port. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SOLR_PORT``. + +dataverse.solr.core ++++++++++++++++++++ + +The name of the Solr core to use for this Dataverse installation. Might be used to switch to a different core quickly. +Remember to restart / redeploy Dataverse after changing the setting (as with :ref:`:SolrHostColonPort`). + +Defaults to ``collection1``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SOLR_CORE``. + +dataverse.solr.protocol ++++++++++++++++++++++++ + +The Solr server URL protocol for the connection. Remember to restart / redeploy Dataverse after changing the setting +(as with :ref:`:SolrHostColonPort`). + +Defaults to ``http``, but might be set to ``https`` for extra secure Solr installations. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SOLR_PROTOCOL``. + +dataverse.solr.path ++++++++++++++++++++ + +The path part of the Solr endpoint URL (e.g. ``/solr/collection1`` of ``http://localhost:8389/solr/collection1``). +Might be used to target a Solr API at non-default places. Remember to restart / redeploy Dataverse after changing the +setting (as with :ref:`:SolrHostColonPort`). + +Defaults to ``/solr/${dataverse.solr.core}``, interpolating the core name when used. Make sure to include the variable +when using it to configure your core name! + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SOLR_PATH``. + dataverse.rserve.host +++++++++++++++++++++ @@ -1673,6 +1728,21 @@ To facilitate large file upload and download, the Dataverse Software installer b and restart Payara to apply your change. +mp.config.profile ++++++++++++++++++ + +MicroProfile Config 2.0 defines the `concept of "profiles" `_. +They can be used to change configuration values by context. This is used in Dataverse to change some configuration +defaults when used inside container context rather classic installations. + +As per the spec, you will need to set the configuration value ``mp.config.profile`` to ``ct`` as early as possible. +This is best done with a system property: + +``./asadmin create-system-properties 'mp.config.profile=ct'`` + +You might also create your own profiles and use these, please refer to the upstream documentation linked above. + + .. _database-settings: Database Settings @@ -2160,6 +2230,8 @@ Limit the number of files in a zip that your Dataverse installation will accept. ``curl -X PUT -d 2048 http://localhost:8080/api/admin/settings/:ZipUploadFilesLimit`` +.. _:SolrHostColonPort: + :SolrHostColonPort ++++++++++++++++++ @@ -2167,6 +2239,8 @@ By default your Dataverse installation will attempt to connect to Solr on port 8 ``curl -X PUT -d localhost:8983 http://localhost:8080/api/admin/settings/:SolrHostColonPort`` +**Note:** instead of using a database setting, you could alternatively use JVM settings like :ref:`dataverse.solr.host`. + :SolrFullTextIndexing +++++++++++++++++++++ From a7fe29c8e2e088fff71a13327e28c7cbb9595c15 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 16 Sep 2022 10:32:10 +0200 Subject: [PATCH 018/173] test(settings): make SystemConfigTest version testing use JvmSetting extension --- .../harvard/iq/dataverse/util/SystemConfigTest.java | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java b/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java index b8ad0a57748..3bbe331a361 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java @@ -1,6 +1,7 @@ package edu.harvard.iq.dataverse.util; import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.testing.JvmSetting; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; @@ -26,22 +27,18 @@ void testGetVersion() { } @Test + @JvmSetting(key = JvmSettings.VERSION, value = "100.100") + @JvmSetting(key = JvmSettings.BUILD, value = "FOOBAR") void testGetVersionWithBuild() { - // given - String version = "100.100"; - String build = "FOOBAR"; - System.setProperty(JvmSettings.VERSION.getScopedKey(), version); - System.setProperty(JvmSettings.BUILD.getScopedKey(), build); - // when String result = systemConfig.getVersion(true); // then - assertTrue(result.startsWith(version), "'" + result + "' not starting with " + version); + assertTrue(result.startsWith("100.100"), "'" + result + "' not starting with 100.100"); assertTrue(result.contains("build")); // Cannot test this here - there might be the bundle file present which is not under test control - //assertTrue(result.endsWith(build), "'" + result + "' not ending with " + build); + //assertTrue(result.endsWith("FOOBAR"), "'" + result + "' not ending with FOOBAR"); } @Test From 3fb596e1f97fc074e8dc2056728cb5ace97989e1 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 27 Jun 2022 14:19:35 +0200 Subject: [PATCH 019/173] refactor(settings): replace dataverse.fqdn and siteUrl lookups via MPCONFIG #7000 - Add both settings to JvmSettings to enable lookup - Refactor SystemConfig.getDataverseSiteUrlStatic to use MPCONFIG, but keep current behaviour of constructing the URL from FQDN or DNS reverse lookup. (Out of scope here, see #6636) - Replace clones of the method in Xrecord, DdiExportUtil, HandlenetServiceBean with direct usages of the static method to avoid unnecessary duplicated code. - Refactor SchemaDotOrgExporterTest with @JvmSetting for site url. - Remove unused constants from SystemConfig - Added default for container usage within "ct" profile, so we avoid extra lookups/settings for development usage. See also https://github.com/IQSS/dataverse/issues/6636 --- .../iq/dataverse/HandlenetServiceBean.java | 19 +--- .../edu/harvard/iq/dataverse/api/Info.java | 3 +- .../dataverse/export/ddi/DdiExportUtil.java | 38 +------ .../harvest/server/xoai/Xrecord.java | 25 +---- .../iq/dataverse/settings/JvmSettings.java | 2 + .../iq/dataverse/util/SystemConfig.java | 98 +++++++++---------- .../META-INF/microprofile-config.properties | 5 + .../export/SchemaDotOrgExporterTest.java | 6 +- 8 files changed, 69 insertions(+), 127 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java index 1a8ee8a85e8..df16991b51e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java @@ -24,8 +24,6 @@ import java.io.File; import java.io.FileInputStream; -import java.net.InetAddress; -import java.net.UnknownHostException; import java.util.*; import java.util.logging.Level; import java.util.logging.Logger; @@ -34,6 +32,7 @@ import java.security.PrivateKey; /* Handlenet imports: */ +import edu.harvard.iq.dataverse.util.SystemConfig; import net.handle.hdllib.AbstractMessage; import net.handle.hdllib.AbstractResponse; import net.handle.hdllib.AdminRecord; @@ -247,21 +246,7 @@ private String getRegistrationUrl(DvObject dvObject) { } public String getSiteUrl() { - logger.log(Level.FINE,"getSiteUrl"); - String hostUrl = System.getProperty("dataverse.siteUrl"); - if (hostUrl != null && !"".equals(hostUrl)) { - return hostUrl; - } - String hostName = System.getProperty("dataverse.fqdn"); - if (hostName == null) { - try { - hostName = InetAddress.getLocalHost().getCanonicalHostName(); - } catch (UnknownHostException e) { - return null; - } - } - hostUrl = "https://" + hostName; - return hostUrl; + return SystemConfig.getDataverseSiteUrlStatic(); } private byte[] readKey(final String file) { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Info.java b/src/main/java/edu/harvard/iq/dataverse/api/Info.java index 4fe5cba5b9f..fd7824c15cf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Info.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Info.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse.api; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.SystemConfig; import javax.ejb.EJB; @@ -44,7 +45,7 @@ public Response getInfo() { @GET @Path("server") public Response getServer() { - return response( req -> ok(systemConfig.getDataverseServer())); + return response( req -> ok(JvmSettings.FQDN.lookup())); } @GET diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index 4bbcd653ac3..eb7632dd03c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -32,18 +32,15 @@ import edu.harvard.iq.dataverse.export.DDIExporter; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; -import static edu.harvard.iq.dataverse.util.SystemConfig.FQDN; -import static edu.harvard.iq.dataverse.util.SystemConfig.SITE_URL; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; +import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.xml.XmlPrinter; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStream; -import java.net.InetAddress; -import java.net.UnknownHostException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -1292,7 +1289,7 @@ private static void writeNotesElement(XMLStreamWriter xmlw, DatasetVersionDTO da // harvesting *all* files are encoded as otherMats; even tabular ones. private static void createOtherMats(XMLStreamWriter xmlw, List fileDtos) throws XMLStreamException { // The preferred URL for this dataverse, for cooking up the file access API links: - String dataverseUrl = getDataverseSiteUrl(); + String dataverseUrl = SystemConfig.getDataverseSiteUrlStatic(); for (FileDTO fileDTo : fileDtos) { // We'll continue using the scheme we've used before, in DVN2-3: non-tabular files are put into otherMat, @@ -1339,7 +1336,7 @@ private static void createOtherMats(XMLStreamWriter xmlw, List fileDtos private static void createOtherMatsFromFileMetadatas(XMLStreamWriter xmlw, List fileMetadatas) throws XMLStreamException { // The preferred URL for this dataverse, for cooking up the file access API links: - String dataverseUrl = getDataverseSiteUrl(); + String dataverseUrl = SystemConfig.getDataverseSiteUrlStatic(); for (FileMetadata fileMetadata : fileMetadatas) { // We'll continue using the scheme we've used before, in DVN2-3: non-tabular files are put into otherMat, @@ -1555,33 +1552,6 @@ private static void saveJsonToDisk(String datasetVersionAsJson) throws IOExcepti Files.write(Paths.get("/tmp/out.json"), datasetVersionAsJson.getBytes()); } - /** - * The "official", designated URL of the site; - * can be defined as a complete URL; or derived from the - * "official" hostname. If none of these options is set, - * defaults to the InetAddress.getLocalHOst() and https; - */ - private static String getDataverseSiteUrl() { - String hostUrl = System.getProperty(SITE_URL); - if (hostUrl != null && !"".equals(hostUrl)) { - return hostUrl; - } - String hostName = System.getProperty(FQDN); - if (hostName == null) { - try { - hostName = InetAddress.getLocalHost().getCanonicalHostName(); - } catch (UnknownHostException e) { - hostName = null; - } - } - - if (hostName != null) { - return "https://" + hostName; - } - - return "http://localhost:8080"; - } - @@ -1893,7 +1863,7 @@ private static void createVarDDI(XMLStreamWriter xmlw, DataVariable dv, FileMeta } private static void createFileDscr(XMLStreamWriter xmlw, DatasetVersion datasetVersion) throws XMLStreamException { - String dataverseUrl = getDataverseSiteUrl(); + String dataverseUrl = SystemConfig.getDataverseSiteUrlStatic(); for (FileMetadata fileMetadata : datasetVersion.getFileMetadatas()) { DataFile dataFile = fileMetadata.getDataFile(); diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/Xrecord.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/Xrecord.java index 7e115c78f06..4485b798658 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/Xrecord.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/Xrecord.java @@ -8,14 +8,12 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.export.ExportException; import edu.harvard.iq.dataverse.export.ExportService; -import static edu.harvard.iq.dataverse.util.SystemConfig.FQDN; -import static edu.harvard.iq.dataverse.util.SystemConfig.SITE_URL; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.net.InetAddress; -import java.net.UnknownHostException; + +import edu.harvard.iq.dataverse.util.SystemConfig; import org.apache.poi.util.ReplacingInputStream; /** @@ -149,7 +147,7 @@ private void writeMetadataStream(InputStream inputStream, OutputStream outputStr private String customMetadataExtensionRef(String identifier) { String ret = "<" + METADATA_FIELD + " directApiCall=\"" - + getDataverseSiteUrl() + + SystemConfig.getDataverseSiteUrlStatic() + DATAVERSE_EXTENDED_METADATA_API + "?exporter=" + DATAVERSE_EXTENDED_METADATA_FORMAT @@ -164,21 +162,4 @@ private String customMetadataExtensionRef(String identifier) { private boolean isExtendedDataverseMetadataMode(String formatName) { return DATAVERSE_EXTENDED_METADATA_FORMAT.equals(formatName); } - - private String getDataverseSiteUrl() { - String hostUrl = System.getProperty(SITE_URL); - if (hostUrl != null && !"".equals(hostUrl)) { - return hostUrl; - } - String hostName = System.getProperty(FQDN); - if (hostName == null) { - try { - hostName = InetAddress.getLocalHost().getCanonicalHostName(); - } catch (UnknownHostException e) { - return null; - } - } - hostUrl = "https://" + hostName; - return hostUrl; - } } diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index 223e4b86da9..8d2832980cc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -41,6 +41,8 @@ public enum JvmSettings { // GENERAL SETTINGS VERSION(PREFIX, "version"), BUILD(PREFIX, "build"), + FQDN(PREFIX, "fqdn"), + SITE_URL(PREFIX, "siteUrl"), ; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index bd27405fae5..635f47c5800 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -8,6 +8,7 @@ import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinAuthenticationProvider; import edu.harvard.iq.dataverse.authorization.providers.oauth2.AbstractOAuth2AuthenticationProvider; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.validation.PasswordValidatorUtil; import java.io.FileInputStream; @@ -23,6 +24,7 @@ import java.util.List; import java.util.Map; import java.util.MissingResourceException; +import java.util.Optional; import java.util.Properties; import java.util.ResourceBundle; import java.util.logging.Logger; @@ -61,23 +63,7 @@ public class SystemConfig { AuthenticationServiceBean authenticationService; public static final String DATAVERSE_PATH = "/dataverse/"; - - /** - * A JVM option for the advertised fully qualified domain name (hostname) of - * the Dataverse installation, such as "dataverse.example.com", which may - * differ from the hostname that the server knows itself as. - * - * The equivalent in DVN 3.x was "dvn.inetAddress". - */ - public static final String FQDN = "dataverse.fqdn"; - - /** - * A JVM option for specifying the "official" URL of the site. - * Unlike the FQDN option above, this would be a complete URL, - * with the protocol, port number etc. - */ - public static final String SITE_URL = "dataverse.siteUrl"; - + /** * A JVM option for where files are stored on the file system. */ @@ -340,32 +326,58 @@ public static int getMinutesUntilPasswordResetTokenExpires() { } /** - * The "official", designated URL of the site; - * can be defined as a complete URL; or derived from the - * "official" hostname. If none of these options is set, - * defaults to the InetAddress.getLocalHOst() and https; - * These are legacy JVM options. Will be eventualy replaced - * by the Settings Service configuration. + * Lookup (or construct) the designated URL of this instance from configuration. + * + * Can be defined as a complete URL via dataverse.siteUrl; or derived from the hostname + * dataverse.fqdn and HTTPS. If none of these options is set, defaults to the + * {@link InetAddress#getLocalHost} and HTTPS. + * + * NOTE: This method does not provide any validation. + * TODO: The behaviour of this method is subject to a later change, see + * https://github.com/IQSS/dataverse/issues/6636 + * + * @return The designated URL of this instance as per configuration. */ public String getDataverseSiteUrl() { return getDataverseSiteUrlStatic(); } + /** + * Lookup (or construct) the designated URL of this instance from configuration. + * + * Can be defined as a complete URL via dataverse.siteUrl; or derived from the hostname + * dataverse.fqdn and HTTPS. If none of these options is set, defaults to the + * {@link InetAddress#getLocalHost} and HTTPS. + * + * NOTE: This method does not provide any validation. + * TODO: The behaviour of this method is subject to a later change, see + * https://github.com/IQSS/dataverse/issues/6636 + * + * @return The designated URL of this instance as per configuration. + */ public static String getDataverseSiteUrlStatic() { - String hostUrl = System.getProperty(SITE_URL); - if (hostUrl != null && !"".equals(hostUrl)) { - return hostUrl; + // If dataverse.siteUrl has been configured, simply return it + Optional siteUrl = JvmSettings.SITE_URL.lookupOptional(); + if (siteUrl.isPresent()) { + return siteUrl.get(); } - String hostName = System.getProperty(FQDN); - if (hostName == null) { - try { - hostName = InetAddress.getLocalHost().getCanonicalHostName(); - } catch (UnknownHostException e) { - return null; - } + + // Other wise try to lookup dataverse.fqdn setting and default to HTTPS + Optional fqdn = JvmSettings.FQDN.lookupOptional(); + if (fqdn.isPresent()) { + return "https://" + fqdn.get(); + } + + // Last resort - get the servers local name and use it. + // BEWARE - this is dangerous. + // 1) A server might have a different name than your repository URL. + // 2) The underlying reverse DNS lookup might point to a different name than your repository URL. + // 3) If this server has multiple IPs assigned, which one will it be for the lookup? + try { + return "https://" + InetAddress.getLocalHost().getCanonicalHostName(); + } catch (UnknownHostException e) { + return null; } - hostUrl = "https://" + hostName; - return hostUrl; } /** @@ -375,22 +387,6 @@ public String getPageURLWithQueryString() { return PrettyContext.getCurrentInstance().getRequestURL().toURL() + PrettyContext.getCurrentInstance().getRequestQueryString().toQueryString(); } - /** - * The "official" server's fully-qualified domain name: - */ - public String getDataverseServer() { - // still reliese on a JVM option: - String fqdn = System.getProperty(FQDN); - if (fqdn == null) { - try { - fqdn = InetAddress.getLocalHost().getCanonicalHostName(); - } catch (UnknownHostException e) { - return null; - } - } - return fqdn; - } - public String getGuidesBaseUrl() { String saneDefault = "https://guides.dataverse.org"; String guidesBaseUrl = settingsService.getValueForKey(SettingsServiceBean.Key.GuidesBaseUrl, saneDefault); diff --git a/src/main/resources/META-INF/microprofile-config.properties b/src/main/resources/META-INF/microprofile-config.properties index 16298d83118..a9ee8236c7e 100644 --- a/src/main/resources/META-INF/microprofile-config.properties +++ b/src/main/resources/META-INF/microprofile-config.properties @@ -3,6 +3,11 @@ dataverse.version=${project.version} dataverse.build= +# Default only for containers! (keep mimicking the current behaviour - +# changing that is part of https://github.com/IQSS/dataverse/issues/6636) +%ct.dataverse.fqdn=localhost +%ct.dataverse.siteUrl=http://${dataverse.fqdn}:8080 + # DATABASE dataverse.db.host=localhost dataverse.db.port=5432 diff --git a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java index b5453e75fe5..7119dfaf834 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java @@ -6,9 +6,9 @@ import edu.harvard.iq.dataverse.license.LicenseServiceBean; import edu.harvard.iq.dataverse.mocks.MockDatasetFieldSvc; -import static edu.harvard.iq.dataverse.util.SystemConfig.SITE_URL; import static edu.harvard.iq.dataverse.util.SystemConfig.FILES_HIDE_SCHEMA_DOT_ORG_DOWNLOAD_URLS; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.json.JsonParser; import edu.harvard.iq.dataverse.util.json.JsonUtil; @@ -31,6 +31,8 @@ import javax.json.Json; import javax.json.JsonObject; import javax.json.JsonReader; + +import edu.harvard.iq.dataverse.util.testing.JvmSetting; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Test; @@ -64,6 +66,7 @@ public static void tearDownClass() { * Test of exportDataset method, of class SchemaDotOrgExporter. */ @Test + @JvmSetting(key = JvmSettings.SITE_URL, value = "https://librascholar.org") public void testExportDataset() throws Exception { File datasetVersionJson = new File("src/test/resources/json/dataset-finch2.json"); String datasetVersionAsJson = new String(Files.readAllBytes(Paths.get(datasetVersionJson.getAbsolutePath()))); @@ -92,7 +95,6 @@ public void testExportDataset() throws Exception { Dataverse dataverse = new Dataverse(); dataverse.setName("LibraScholar"); dataset.setOwner(dataverse); - System.setProperty(SITE_URL, "https://librascholar.org"); boolean hideFileUrls = false; if (hideFileUrls) { System.setProperty(FILES_HIDE_SCHEMA_DOT_ORG_DOWNLOAD_URLS, "true"); From 3f19c121dee708831d48c83a4fc819986379e819 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 27 Jun 2022 15:05:57 +0200 Subject: [PATCH 020/173] docs(settings): update fqdn and siteUrl desc - Notes about MPCONFIG usage. - Rewording to make it more clear how this shall be used. --- .../source/installation/config.rst | 44 ++++++++++++++----- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index ab0bad70206..65912e77245 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1376,30 +1376,52 @@ When changing values these values with ``asadmin``, you'll need to delete the ol It's also possible to change these values by stopping Payara, editing ``payara5/glassfish/domains/domain1/config/domain.xml``, and restarting Payara. +.. _dataverse.fqdn: + dataverse.fqdn ++++++++++++++ -If the Dataverse installation has multiple DNS names, this option specifies the one to be used as the "official" host name. For example, you may want to have dataverse.example.edu, and not the less appealing server-123.socsci.example.edu to appear exclusively in all the registered global identifiers, Data Deposit API records, etc. +The URL to access your Dataverse installation gets used in multiple places: + +- Email confirmation links +- Password reset links +- Generating a Private URL +- PID minting +- Exporting to Schema.org format (and showing JSON-LD in HTML's tag) +- Exporting to DDI format +- Which Dataverse installation an "external tool" should return to +- URLs embedded in SWORD API responses +- ... -The password reset feature requires ``dataverse.fqdn`` to be configured. +Usually it will follow the pattern ``https:///``. +The FQDN part of the your Dataverse installation URL can be determined by setting ``dataverse.fqdn``. -.. note:: +**Notes:** - Do note that whenever the system needs to form a service URL, by default, it will be formed with ``https://`` and port 443. I.e., - ``https://{dataverse.fqdn}/`` - If that does not suit your setup, you can define an additional option, ``dataverse.siteUrl``, explained below. +- The URL will default to using ``https://`` and no additional port information. If that does not suit your setup, you + can define an additional option, ``dataverse.siteUrl``, :ref:`explained below `, which always + takes precedence. +- Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_FQDN``. + Defaults to ``localhost`` when used with ``mp.config.profile=ct`` .. _dataverse.siteUrl: dataverse.siteUrl +++++++++++++++++ -.. note:: +Some environments may require using a different URL pattern to access your installation. You might need to use +HTTP without "S", a non-standard port and so on. This is especially useful in development or testing environments. + +You can provide a custom tailored site URL via ``dataverse.siteUrl``, which always takes precedence. +Example: ``dataverse.siteUrl=http://localhost:8080`` + +**Notes:** + +- This setting may be used in combination with variable replacement, referencing :ref:`dataverse.fqdn` with + ``./asadmin create-jvm-options "\-Ddataverse.siteUrl=http\://\${dataverse.fqdn}\:8080"`` +- Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SITEURL``. + Defaults to ``http://${dataverse.fqdn}:8080`` when used with ``mp.config.profile=ct`` - and specify the protocol and port number you would prefer to be used to advertise the URL for your Dataverse installation. - For example, configured in domain.xml: - ``-Ddataverse.fqdn=dataverse.example.edu`` - ``-Ddataverse.siteUrl=http://${dataverse.fqdn}:8080`` dataverse.files.directory +++++++++++++++++++++++++ From f7e7e4aed8e2e089ac7ce55bb583795230d6849e Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 22 Jun 2022 18:22:08 +0200 Subject: [PATCH 021/173] refactor(settings): replace lookups of dataverse.files.directory with MPCONFIG #7000 - Adding dataverse.files.directory equivalent to JvmSettings - Remove all System.getPropert("dataverse.files.directory") or similar - Add default with /tmp/dataverse via microprofile-config.properties as formerly seen at FileUtil and Dataset only - Refactor SwordConfigurationImpl to reuse the NoSuchElementException thrown by MPCONFIG - Refactor GoogleCloudSubmitToArchiveCommand to use the JvmSettings.lookup and create file stream in try-with-resources --- .../edu/harvard/iq/dataverse/Dataset.java | 9 ++-- .../iq/dataverse/EditDatafilesPage.java | 7 ++- .../datadeposit/SwordConfigurationImpl.java | 52 +++++++++---------- .../filesystem/FileRecordJobListener.java | 7 ++- .../importer/filesystem/FileRecordReader.java | 9 ++-- .../GoogleCloudSubmitToArchiveCommand.java | 31 +++++------ .../impl/ImportFromFileSystemCommand.java | 48 +++++++++-------- .../iq/dataverse/settings/JvmSettings.java | 4 ++ .../harvard/iq/dataverse/util/FileUtil.java | 8 ++- .../iq/dataverse/util/SystemConfig.java | 5 -- .../META-INF/microprofile-config.properties | 3 ++ 11 files changed, 94 insertions(+), 89 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java index a4f82d41bac..e2f00d0b54b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java @@ -33,6 +33,8 @@ import javax.persistence.Table; import javax.persistence.Temporal; import javax.persistence.TemporalType; + +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; @@ -528,11 +530,8 @@ private Collection getCategoryNames() { @Deprecated public Path getFileSystemDirectory() { Path studyDir = null; - - String filesRootDirectory = System.getProperty("dataverse.files.directory"); - if (filesRootDirectory == null || filesRootDirectory.equals("")) { - filesRootDirectory = "/tmp/files"; - } + + String filesRootDirectory = JvmSettings.FILES_DIRECTORY.lookup(); if (this.getAlternativePersistentIndentifiers() != null && !this.getAlternativePersistentIndentifiers().isEmpty()) { for (AlternativePersistentIdentifier api : this.getAlternativePersistentIndentifiers()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index f53e2377a69..a895c90dabe 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -31,6 +31,7 @@ import edu.harvard.iq.dataverse.ingest.IngestUtil; import edu.harvard.iq.dataverse.license.LicenseServiceBean; import edu.harvard.iq.dataverse.search.IndexServiceBean; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.Setting; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; @@ -2425,10 +2426,8 @@ public boolean isTemporaryPreviewAvailable(String fileSystemId, String mimeType) return false; } - String filesRootDirectory = System.getProperty("dataverse.files.directory"); - if (filesRootDirectory == null || filesRootDirectory.isEmpty()) { - filesRootDirectory = "/tmp/files"; - } + // Retrieve via MPCONFIG. Has sane default /tmp/dataverse from META-INF/microprofile-config.properties + String filesRootDirectory = JvmSettings.FILES_DIRECTORY.lookup(); String fileSystemName = filesRootDirectory + "/temp/" + fileSystemId; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordConfigurationImpl.java b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordConfigurationImpl.java index ce5f9415fcc..1e506c6a0b1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordConfigurationImpl.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordConfigurationImpl.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse.api.datadeposit; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.SystemConfig; import java.io.File; import java.util.Arrays; @@ -86,37 +87,32 @@ public boolean storeAndCheckBinary() { @Override public String getTempDirectory() { - String tmpFileDir = System.getProperty(SystemConfig.FILES_DIRECTORY); - if (tmpFileDir != null) { - String swordDirString = tmpFileDir + File.separator + "sword"; - File swordDirFile = new File(swordDirString); - /** - * @todo Do we really need this check? It seems like we do because - * if you create a dataset via the native API and then later try to - * upload a file via SWORD, the directory defined by - * dataverse.files.directory may not exist and we get errors deep in - * the SWORD library code. Could maybe use a try catch in the doPost - * method of our SWORDv2MediaResourceServlet. - */ - if (swordDirFile.exists()) { + // will throw a runtime exception when not found + String tmpFileDir = JvmSettings.FILES_DIRECTORY.lookup(); + + String swordDirString = tmpFileDir + File.separator + "sword"; + File swordDirFile = new File(swordDirString); + /** + * @todo Do we really need this check? It seems like we do because + * if you create a dataset via the native API and then later try to + * upload a file via SWORD, the directory defined by + * dataverse.files.directory may not exist and we get errors deep in + * the SWORD library code. Could maybe use a try catch in the doPost + * method of our SWORDv2MediaResourceServlet. + */ + if (swordDirFile.exists()) { + return swordDirString; + } else { + boolean mkdirSuccess = swordDirFile.mkdirs(); + if (mkdirSuccess) { + logger.info("Created directory " + swordDirString); return swordDirString; } else { - boolean mkdirSuccess = swordDirFile.mkdirs(); - if (mkdirSuccess) { - logger.info("Created directory " + swordDirString); - return swordDirString; - } else { - String msgForSwordUsers = ("Could not determine or create SWORD temp directory. Check logs for details."); - logger.severe(msgForSwordUsers + " Failed to create " + swordDirString); - // sadly, must throw RunTimeException to communicate with SWORD user - throw new RuntimeException(msgForSwordUsers); - } + String msgForSwordUsers = ("Could not determine or create SWORD temp directory. Check logs for details."); + logger.severe(msgForSwordUsers + " Failed to create " + swordDirString); + // sadly, must throw RunTimeException to communicate with SWORD user + throw new RuntimeException(msgForSwordUsers); } - } else { - String msgForSwordUsers = ("JVM option \"" + SystemConfig.FILES_DIRECTORY + "\" not defined. Check logs for details."); - logger.severe(msgForSwordUsers); - // sadly, must throw RunTimeException to communicate with SWORD user - throw new RuntimeException(msgForSwordUsers); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java index 6b82a665c17..ecb998c66af 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java @@ -57,6 +57,7 @@ import javax.inject.Named; import javax.servlet.http.HttpServletRequest; +import edu.harvard.iq.dataverse.settings.JvmSettings; import org.apache.commons.io.IOUtils; import java.io.FileReader; @@ -433,8 +434,10 @@ private void loadChecksumManifest() { manifest = checksumManifest; getJobLogger().log(Level.INFO, "Checksum manifest = " + manifest + " (FileSystemImportJob.xml property)"); } - // construct full path - String manifestAbsolutePath = System.getProperty("dataverse.files.directory") + + // Construct full path - retrieve base dir via MPCONFIG. + // (Has sane default /tmp/dataverse from META-INF/microprofile-config.properties) + String manifestAbsolutePath = JvmSettings.FILES_DIRECTORY.lookup() + SEP + dataset.getAuthority() + SEP + dataset.getIdentifier() + SEP + uploadFolder diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java index b3d3a7107a6..e3b67e9b0d2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java @@ -24,6 +24,7 @@ import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.batch.jobs.importer.ImportMode; +import edu.harvard.iq.dataverse.settings.JvmSettings; import org.apache.commons.io.filefilter.NotFileFilter; import org.apache.commons.io.filefilter.WildcardFileFilter; @@ -96,9 +97,11 @@ public void init() { @Override public void open(Serializable checkpoint) throws Exception { - - directory = new File(System.getProperty("dataverse.files.directory") - + SEP + dataset.getAuthority() + SEP + dataset.getIdentifier() + SEP + uploadFolder); + + // Retrieve via MPCONFIG. Has sane default /tmp/dataverse from META-INF/microprofile-config.properties + String baseDir = JvmSettings.FILES_DIRECTORY.lookup(); + + directory = new File(baseDir + SEP + dataset.getAuthority() + SEP + dataset.getIdentifier() + SEP + uploadFolder); // TODO: // The above goes directly to the filesystem directory configured by the // old "dataverse.files.directory" JVM option (otherwise used for temp diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java index 5d017173685..da2701a41e7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java @@ -1,16 +1,27 @@ package edu.harvard.iq.dataverse.engine.command.impl; +import com.google.auth.oauth2.ServiceAccountCredentials; +import com.google.cloud.storage.Blob; +import com.google.cloud.storage.Bucket; +import com.google.cloud.storage.Storage; +import com.google.cloud.storage.StorageException; +import com.google.cloud.storage.StorageOptions; import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DatasetLock.Reason; +import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.workflow.step.Failure; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; +import org.apache.commons.codec.binary.Hex; +import javax.json.Json; +import javax.json.JsonObjectBuilder; +import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.PipedInputStream; @@ -21,17 +32,6 @@ import java.util.Map; import java.util.logging.Logger; -import javax.json.Json; -import javax.json.JsonObjectBuilder; - -import org.apache.commons.codec.binary.Hex; -import com.google.auth.oauth2.ServiceAccountCredentials; -import com.google.cloud.storage.Blob; -import com.google.cloud.storage.Bucket; -import com.google.cloud.storage.Storage; -import com.google.cloud.storage.StorageException; -import com.google.cloud.storage.StorageOptions; - @RequiredPermissions(Permission.PublishDataset) public class GoogleCloudSubmitToArchiveCommand extends AbstractSubmitToArchiveCommand implements Command { @@ -56,10 +56,11 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_FAILURE); statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, "Bag not transferred"); - try { - FileInputStream fis = new FileInputStream(System.getProperty("dataverse.files.directory") + System.getProperty("file.separator") + "googlecloudkey.json"); + String cloudKeyFile = JvmSettings.FILES_DIRECTORY.lookup() + File.separator + "googlecloudkey.json"; + + try (FileInputStream cloudKeyStream = new FileInputStream(cloudKeyFile)) { storage = StorageOptions.newBuilder() - .setCredentials(ServiceAccountCredentials.fromStream(fis)) + .setCredentials(ServiceAccountCredentials.fromStream(cloudKeyStream)) .setProjectId(projectName) .build() .getService(); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java index 64beba82450..5f31ea756eb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java @@ -12,17 +12,20 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; -import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; -import java.io.File; -import java.util.Properties; -import java.util.logging.Level; -import java.util.logging.Logger; +import edu.harvard.iq.dataverse.settings.JvmSettings; + import javax.batch.operations.JobOperator; import javax.batch.operations.JobSecurityException; import javax.batch.operations.JobStartException; import javax.batch.runtime.BatchRuntime; import javax.json.JsonObject; import javax.json.JsonObjectBuilder; +import java.io.File; +import java.util.Properties; +import java.util.logging.Level; +import java.util.logging.Logger; + +import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; @RequiredPermissions(Permission.EditDataset) public class ImportFromFileSystemCommand extends AbstractCommand { @@ -69,18 +72,20 @@ public JsonObject execute(CommandContext ctxt) throws CommandException { logger.info(error); throw new IllegalCommandException(error, this); } - File directory = new File(System.getProperty("dataverse.files.directory") - + File.separator + dataset.getAuthority() + File.separator + dataset.getIdentifier()); - // TODO: - // The above goes directly to the filesystem directory configured by the - // old "dataverse.files.directory" JVM option (otherwise used for temp - // files only, after the Multistore implementation (#6488). - // We probably want package files to be able to use specific stores instead. - // More importantly perhaps, the approach above does not take into account - // if the dataset may have an AlternativePersistentIdentifier, that may be - // designated isStorageLocationDesignator() - i.e., if a different identifer - // needs to be used to name the storage directory, instead of the main/current - // persistent identifier above. + + File directory = new File( + String.join(File.separator, JvmSettings.FILES_DIRECTORY.lookup(), + dataset.getAuthority(), dataset.getIdentifier())); + + // TODO: The above goes directly to the filesystem directory configured by the + // old "dataverse.files.directory" JVM option (otherwise used for temp + // files only, after the Multistore implementation (#6488). + // We probably want package files to be able to use specific stores instead. + // More importantly perhaps, the approach above does not take into account + // if the dataset may have an AlternativePersistentIdentifier, that may be + // designated isStorageLocationDesignator() - i.e., if a different identifer + // needs to be used to name the storage directory, instead of the main/current + // persistent identifier above. if (!isValidDirectory(directory)) { String error = "Dataset directory is invalid. " + directory; logger.info(error); @@ -93,11 +98,10 @@ public JsonObject execute(CommandContext ctxt) throws CommandException { throw new IllegalCommandException(error, this); } - File uploadDirectory = new File(System.getProperty("dataverse.files.directory") - + File.separator + dataset.getAuthority() + File.separator + dataset.getIdentifier() - + File.separator + uploadFolder); - // TODO: - // see the comment above. + File uploadDirectory = new File(String.join(File.separator, JvmSettings.FILES_DIRECTORY.lookup(), + dataset.getAuthority(), dataset.getIdentifier(), uploadFolder)); + + // TODO: see the comment above. if (!isValidDirectory(uploadDirectory)) { String error = "Upload folder is not a valid directory."; logger.info(error); diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index 223e4b86da9..12e5e311278 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -42,6 +42,10 @@ public enum JvmSettings { VERSION(PREFIX, "version"), BUILD(PREFIX, "build"), + // FILES SETTINGS + SCOPE_FILES(PREFIX, "files"), + FILES_DIRECTORY(SCOPE_FILES, "directory"), + ; private static final String SCOPE_SEPARATOR = "."; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 893c62b3cb0..a2c55d41613 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -40,6 +40,7 @@ import edu.harvard.iq.dataverse.ingest.IngestServiceShapefileHelper; import edu.harvard.iq.dataverse.ingest.IngestableDataChecker; import edu.harvard.iq.dataverse.license.License; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.file.BagItFileHandler; import edu.harvard.iq.dataverse.util.file.CreateDataFileResult; import edu.harvard.iq.dataverse.util.file.BagItFileHandlerFactory; @@ -1389,11 +1390,8 @@ public static boolean canIngestAsTabular(String mimeType) { } public static String getFilesTempDirectory() { - String filesRootDirectory = System.getProperty("dataverse.files.directory"); - if (filesRootDirectory == null || filesRootDirectory.equals("")) { - filesRootDirectory = "/tmp/files"; - } - + + String filesRootDirectory = JvmSettings.FILES_DIRECTORY.lookup(); String filesTempDirectory = filesRootDirectory + "/temp"; if (!Files.exists(Paths.get(filesTempDirectory))) { diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index bd27405fae5..e9313e70218 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -78,11 +78,6 @@ public class SystemConfig { */ public static final String SITE_URL = "dataverse.siteUrl"; - /** - * A JVM option for where files are stored on the file system. - */ - public static final String FILES_DIRECTORY = "dataverse.files.directory"; - /** * Some installations may not want download URLs to their files to be * available in Schema.org JSON-LD output. diff --git a/src/main/resources/META-INF/microprofile-config.properties b/src/main/resources/META-INF/microprofile-config.properties index 16298d83118..ab219071767 100644 --- a/src/main/resources/META-INF/microprofile-config.properties +++ b/src/main/resources/META-INF/microprofile-config.properties @@ -3,6 +3,9 @@ dataverse.version=${project.version} dataverse.build= +# FILES +dataverse.files.directory=/tmp/dataverse + # DATABASE dataverse.db.host=localhost dataverse.db.port=5432 From 5c2c7022ad9f11234b0e33ddaf3a0aa2696ab154 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 22 Jun 2022 22:27:30 +0200 Subject: [PATCH 022/173] docs(settings): provide more detail for dataverse.files.directory --- doc/sphinx-guides/source/api/native-api.rst | 2 ++ doc/sphinx-guides/source/installation/config.rst | 16 +++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 339a291bf4d..6dd1bbab728 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -552,6 +552,8 @@ You should expect an HTTP 200 ("OK") response and JSON indicating the database I .. note:: Only a Dataverse installation account with superuser permissions is allowed to include files when creating a dataset via this API. Adding files this way only adds their file metadata to the database, you will need to manually add the physical files to the file system. +.. _api-import-dataset: + Import a Dataset into a Dataverse Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index ab0bad70206..89329ea3821 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -274,6 +274,8 @@ If you wish to change which store is used by default, you'll need to delete the It is also possible to set maximum file upload size limits per store. See the :ref:`:MaxFileUploadSizeInBytes` setting below. +.. _storage-files-dir: + File Storage ++++++++++++ @@ -1404,7 +1406,19 @@ dataverse.siteUrl dataverse.files.directory +++++++++++++++++++++++++ -This is how you configure the path Dataverse uses for temporary files. (File store specific dataverse.files.\.directory options set the permanent data storage locations.) +Please provide an absolute path to a directory backed by some mounted file system. This directory is used for a number +of purposes: + +1. ``/temp`` after uploading, data is temporarily stored here for ingest and/or before + shipping to the final storage destination. +2. ``/sword`` a place to store uploads via the :doc:`../api/sword` before transfer + to final storage location and/or ingest. +3. ``//`` data location for file system imports, see + :ref:`api-import-dataset`. +4. ``/googlecloudkey.json`` used with :ref:`Google Cloud Configuration` for BagIt exports. + +This directory might also be used for permanent storage of data, but this setting is independent from +:ref:`storage-files-dir` configuration. dataverse.auth.password-reset-timeout-in-minutes ++++++++++++++++++++++++++++++++++++++++++++++++ From d7ab9f6e5359356db3b01ab9e6f87347cf117fe7 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 27 Jun 2022 15:11:01 +0200 Subject: [PATCH 023/173] style: replace system prop 'file.separator' with File.separator --- .../batch/jobs/importer/filesystem/FileRecordJobListener.java | 3 ++- .../batch/jobs/importer/filesystem/FileRecordReader.java | 2 +- .../java/edu/harvard/iq/dataverse/batch/util/LoggingUtil.java | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java index ecb998c66af..7837474fc27 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java @@ -60,6 +60,7 @@ import edu.harvard.iq.dataverse.settings.JvmSettings; import org.apache.commons.io.IOUtils; +import java.io.File; import java.io.FileReader; import java.io.IOException; import java.sql.Timestamp; @@ -80,7 +81,7 @@ @Dependent public class FileRecordJobListener implements ItemReadListener, StepListener, JobListener { - public static final String SEP = System.getProperty("file.separator"); + public static final String SEP = File.separator; private static final UserNotification.Type notifyType = UserNotification.Type.FILESYSTEMIMPORT; diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java index e3b67e9b0d2..a4f8ffd2378 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java @@ -55,7 +55,7 @@ @Dependent public class FileRecordReader extends AbstractItemReader { - public static final String SEP = System.getProperty("file.separator"); + public static final String SEP = File.separator; @Inject JobContext jobContext; diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/util/LoggingUtil.java b/src/main/java/edu/harvard/iq/dataverse/batch/util/LoggingUtil.java index 4a778dc7abb..a2f76ca953d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/util/LoggingUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/util/LoggingUtil.java @@ -154,8 +154,8 @@ public static Logger getJobLogger(String jobId) { try { Logger jobLogger = Logger.getLogger("job-"+jobId); FileHandler fh; - String logDir = System.getProperty("com.sun.aas.instanceRoot") + System.getProperty("file.separator") - + "logs" + System.getProperty("file.separator") + "batch-jobs" + System.getProperty("file.separator"); + String logDir = System.getProperty("com.sun.aas.instanceRoot") + File.separator + + "logs" + File.separator + "batch-jobs" + File.separator; checkCreateLogDirectory( logDir ); fh = new FileHandler(logDir + "job-" + jobId + ".log"); logger.log(Level.INFO, "JOB LOG: " + logDir + "job-" + jobId + ".log"); From 2af2d7c6106d890cb7d01872ed66b99143929385 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 19 Sep 2022 14:52:28 +0200 Subject: [PATCH 024/173] fix(test): make UrlTokenUtilTest not assume site url For unknown reasons, the test assumed the site url / fqdn to be "https://librascholar.org", which might be coming from some test order side effect. Now the test sets the site URL setting to have control over the generated data. On a related note, this meant to upgrade the test from JUnit4 to JUnit5 plus some minor code cleanups. --- .../iq/dataverse/util/UrlTokenUtilTest.java | 35 +++++++++++-------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/util/UrlTokenUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/UrlTokenUtilTest.java index ffc6b813045..782890627e1 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/UrlTokenUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/UrlTokenUtilTest.java @@ -6,24 +6,25 @@ import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.authorization.users.ApiToken; -import static org.junit.Assert.assertEquals; -import static org.mockito.Mockito.when; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.testing.JvmSetting; +import org.junit.jupiter.api.Test; import java.util.ArrayList; import java.util.List; -import org.junit.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.mockito.Mock; -import org.mockito.junit.jupiter.MockitoExtension; +import static org.junit.jupiter.api.Assertions.assertEquals; public class UrlTokenUtilTest { @Test + @JvmSetting(key = JvmSettings.SITE_URL, value = "https://foobar") public void testGetToolUrlWithOptionalQueryParameters() { - + // given + String siteUrl = "https://foobar"; + DataFile dataFile = new DataFile(); - dataFile.setId(42l); + dataFile.setId(42L); FileMetadata fmd = new FileMetadata(); DatasetVersion dv = new DatasetVersion(); Dataset ds = new Dataset(); @@ -31,20 +32,26 @@ public void testGetToolUrlWithOptionalQueryParameters() { ds.setGlobalId(new GlobalId("doi:10.5072/FK2ABCDEF")); dv.setDataset(ds); fmd.setDatasetVersion(dv); - List fmdl = new ArrayList(); + List fmdl = new ArrayList<>(); fmdl.add(fmd); dataFile.setFileMetadatas(fmdl); + ApiToken apiToken = new ApiToken(); apiToken.setTokenString("7196b5ce-f200-4286-8809-03ffdbc255d7"); + + // when & then 1/2 URLTokenUtil urlTokenUtil = new URLTokenUtil(dataFile, apiToken, fmd, "en"); assertEquals("en", urlTokenUtil.replaceTokensWithValues("{localeCode}")); assertEquals("42 test en", urlTokenUtil.replaceTokensWithValues("{fileId} test {localeCode}")); assertEquals("42 test en", urlTokenUtil.replaceTokensWithValues("{fileId} test {localeCode}")); - - assertEquals("https://librascholar.org/api/files/42/metadata?key=" + apiToken.getTokenString(), urlTokenUtil.replaceTokensWithValues("{siteUrl}/api/files/{fileId}/metadata?key={apiToken}")); - + assertEquals( siteUrl + "/api/files/42/metadata?key=" + apiToken.getTokenString(), + urlTokenUtil.replaceTokensWithValues("{siteUrl}/api/files/{fileId}/metadata?key={apiToken}")); + + // when & then 2/2 URLTokenUtil urlTokenUtil2 = new URLTokenUtil(ds, apiToken, "en"); - assertEquals("https://librascholar.org/api/datasets/50?key=" + apiToken.getTokenString(), urlTokenUtil2.replaceTokensWithValues("{siteUrl}/api/datasets/{datasetId}?key={apiToken}")); - assertEquals("https://librascholar.org/api/datasets/:persistentId/?persistentId=doi:10.5072/FK2ABCDEF&key=" + apiToken.getTokenString(), urlTokenUtil2.replaceTokensWithValues("{siteUrl}/api/datasets/:persistentId/?persistentId={datasetPid}&key={apiToken}")); + assertEquals(siteUrl + "/api/datasets/50?key=" + apiToken.getTokenString(), + urlTokenUtil2.replaceTokensWithValues("{siteUrl}/api/datasets/{datasetId}?key={apiToken}")); + assertEquals(siteUrl + "/api/datasets/:persistentId/?persistentId=doi:10.5072/FK2ABCDEF&key=" + apiToken.getTokenString(), + urlTokenUtil2.replaceTokensWithValues("{siteUrl}/api/datasets/:persistentId/?persistentId={datasetPid}&key={apiToken}")); } } From cbc7f8af47e2dccfaa6d55e9f78c07166c2d3b5e Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 4 Jul 2022 18:54:46 +0200 Subject: [PATCH 025/173] feat(settings): add rserve properties via MPCONFIG #7000 --- .../edu/harvard/iq/dataverse/settings/JvmSettings.java | 8 ++++++++ .../resources/META-INF/microprofile-config.properties | 7 +++++++ 2 files changed, 15 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index 223e4b86da9..6c5131219ff 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -42,6 +42,14 @@ public enum JvmSettings { VERSION(PREFIX, "version"), BUILD(PREFIX, "build"), + // RSERVE CONNECTION + SCOPE_RSERVE(PREFIX, "rserve"), + RSERVE_HOST(SCOPE_RSERVE, "host"), + RSERVE_PORT(SCOPE_RSERVE, "port", "dataverse.ingest.rserve.port"), + RSERVE_USER(SCOPE_RSERVE, "user"), + RSERVE_PASSWORD(SCOPE_RSERVE, "password"), + RSERVE_TEMPDIR(SCOPE_RSERVE, "tempdir"), + ; private static final String SCOPE_SEPARATOR = "."; diff --git a/src/main/resources/META-INF/microprofile-config.properties b/src/main/resources/META-INF/microprofile-config.properties index 16298d83118..c7b907edb6c 100644 --- a/src/main/resources/META-INF/microprofile-config.properties +++ b/src/main/resources/META-INF/microprofile-config.properties @@ -8,3 +8,10 @@ dataverse.db.host=localhost dataverse.db.port=5432 dataverse.db.user=dataverse dataverse.db.name=dataverse + +# RSERVE +dataverse.rserve.host=localhost +dataverse.rserve.port=6311 +dataverse.rserve.username=rserve +dataverse.rserve.password=rserve +dataverse.rserve.tempdir=/tmp From 6732b4bc578ad0b2f410dbed6d482e377c86fde9 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 4 Jul 2022 18:54:57 +0200 Subject: [PATCH 026/173] refactor(rserve): introduce MPCONFIG settings retrieval #7000 1. Instead of reading the configuration from system properties only, switch to using MPCONFIG and JvmSettings fluent API. 2. Instead of saving the configuration in a static variable, retrieve the config from the constructor. This has 2 advantages: 1) no worries about execution order and MPCONFIG not yet ready, 2) update the readers with new config settings when changed (no need to restart). --- .../impl/plugins/rdata/RDATAFileReader.java | 35 ++++------ .../rserve/RemoteDataFrameService.java | 68 ++++++------------- 2 files changed, 33 insertions(+), 70 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java index c2899b29d1f..1ec0c389049 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java @@ -31,6 +31,7 @@ import javax.inject.Inject; // Rosuda Wrappers and Methods for R-calls to Rserve +import edu.harvard.iq.dataverse.settings.JvmSettings; import org.rosuda.REngine.REXP; import org.rosuda.REngine.REXPMismatchException; import org.rosuda.REngine.RList; @@ -88,10 +89,10 @@ public class RDATAFileReader extends TabularDataFileReader { static private String RSCRIPT_WRITE_DVN_TABLE = ""; // RServe static variables - private static String RSERVE_HOST = System.getProperty("dataverse.rserve.host"); - private static String RSERVE_USER = System.getProperty("dataverse.rserve.user"); - private static String RSERVE_PASSWORD = System.getProperty("dataverse.rserve.password"); - private static int RSERVE_PORT; + private final String RSERVE_HOST; + private final int RSERVE_PORT; + private final String RSERVE_USER; + private final String RSERVE_PASSWORD; // TODO: // we're not using these time/data formats for anything, are we? @@ -138,24 +139,6 @@ public class RDATAFileReader extends TabularDataFileReader { * This is primarily to construct the R-Script */ static { - /* - * Set defaults fallbacks for class properties - */ - if (RSERVE_HOST == null) - RSERVE_HOST = "localhost"; - - if (RSERVE_USER == null) - RSERVE_USER = "rserve"; - - if (RSERVE_PASSWORD == null) - RSERVE_PASSWORD = "rserve"; - - if (System.getProperty("dataverse.ingest.rserve.port") == null) - RSERVE_PORT = 6311; - else - RSERVE_PORT = Integer.parseInt(System.getProperty("dataverse.rserve.port")); - - // Load R Scripts into memory, so that we can run them via R-serve RSCRIPT_WRITE_DVN_TABLE = readLocalResource("scripts/write.table.R"); RSCRIPT_GET_DATASET = readLocalResource("scripts/get.dataset.R"); @@ -451,7 +434,13 @@ public RDATAFileReader(TabularDataFileReaderSpi originator) { super(originator); - + // These settings have sane defaults in resources/META-INF/microprofile-config.properties, + // ready to be overridden by a sysadmin. Every time a file would be read with this file reader, + // a new reader will be created, reading from the cached config source settings with minimal overhead. + this.RSERVE_HOST = JvmSettings.RSERVE_HOST.lookup(); + this.RSERVE_PORT = JvmSettings.RSERVE_PORT.lookup(Integer.class); + this.RSERVE_USER = JvmSettings.RSERVE_USER.lookup(); + this.RSERVE_PASSWORD = JvmSettings.RSERVE_PASSWORD.lookup(); LOG.fine("RDATAFileReader: INSIDE RDATAFileReader"); diff --git a/src/main/java/edu/harvard/iq/dataverse/rserve/RemoteDataFrameService.java b/src/main/java/edu/harvard/iq/dataverse/rserve/RemoteDataFrameService.java index f13b6f11434..df2e44ecb27 100644 --- a/src/main/java/edu/harvard/iq/dataverse/rserve/RemoteDataFrameService.java +++ b/src/main/java/edu/harvard/iq/dataverse/rserve/RemoteDataFrameService.java @@ -41,6 +41,7 @@ import java.util.Set; import java.util.logging.Logger; +import edu.harvard.iq.dataverse.settings.JvmSettings; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.RandomStringUtils; @@ -72,57 +73,33 @@ public class RemoteDataFrameService { private static String TMP_TABDATA_FILE_EXT = ".tab"; private static String TMP_RDATA_FILE_EXT = ".RData"; - - private static String RSERVE_HOST = null; - private static String RSERVE_USER = null; - private static String RSERVE_PWD = null; - private static int RSERVE_PORT = -1; + + // These settings have sane defaults in resources/META-INF/microprofile-config.properties, + // ready to be overridden by a sysadmin + private final String RSERVE_HOST; + private final String RSERVE_USER; + private final String RSERVE_PWD; + private final int RSERVE_PORT; + private final String RSERVE_TMP_DIR; private static String DATAVERSE_R_FUNCTIONS = "scripts/dataverse_r_functions.R"; private static String DATAVERSE_R_PREPROCESSING = "scripts/preprocess.R"; - - public static String LOCAL_TEMP_DIR = System.getProperty("java.io.tmpdir"); - public static String RSERVE_TMP_DIR=null; public String PID = null; public String tempFileNameIn = null; public String tempFileNameOut = null; - - static { - - RSERVE_TMP_DIR = System.getProperty("dataverse.rserve.tempdir"); - - if (RSERVE_TMP_DIR == null){ - RSERVE_TMP_DIR = "/tmp/"; - } - - RSERVE_HOST = System.getProperty("dataverse.rserve.host"); - if (RSERVE_HOST == null){ - RSERVE_HOST= "localhost"; - } - - RSERVE_USER = System.getProperty("dataverse.rserve.user"); - if (RSERVE_USER == null){ - RSERVE_USER= "rserve"; - } - - RSERVE_PWD = System.getProperty("dataverse.rserve.password"); - if (RSERVE_PWD == null){ - RSERVE_PWD= "rserve"; - } - - - if (System.getProperty("dataverse.rserve.port") == null ){ - RSERVE_PORT= 6311; - } else { - RSERVE_PORT = Integer.parseInt(System.getProperty("dataverse.rserve.port")); - } - - } - - public RemoteDataFrameService() { + // These settings have sane defaults in resources/META-INF/microprofile-config.properties, + // ready to be overridden by a sysadmin. Config sources have their own caches, so adding + // these here means the setting can be changed dynamically without too much overhead. + this.RSERVE_HOST = JvmSettings.RSERVE_HOST.lookup(); + this.RSERVE_USER = JvmSettings.RSERVE_USER.lookup(); + this.RSERVE_PWD = JvmSettings.RSERVE_PASSWORD.lookup(); + this.RSERVE_PORT = JvmSettings.RSERVE_PORT.lookup(Integer.class); + this.RSERVE_TMP_DIR = JvmSettings.RSERVE_TEMPDIR.lookup(); + + // initialization PID = RandomStringUtils.randomNumeric(6); @@ -703,15 +680,12 @@ public Map runDataFrameRequest(RJobRequest jobRequest, RConnecti public File transferRemoteFile(RConnection connection, String targetFilename, String tmpFilePrefix, String tmpFileExt, int fileSize) { - // set up a local temp file: - + // set up a local temp file: File tmpResultFile = null; - String resultFile = tmpFilePrefix + PID + "." + tmpFileExt; - RFileInputStream rInStream = null; OutputStream outbr = null; try { - tmpResultFile = new File(LOCAL_TEMP_DIR, resultFile); + tmpResultFile = File.createTempFile(tmpFilePrefix + PID, "."+tmpFileExt); outbr = new BufferedOutputStream(new FileOutputStream(tmpResultFile)); // open the input stream rInStream = connection.openFile(targetFilename); From d951f99bfc12440766add7f13cc1afb84f557448 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 4 Jul 2022 19:05:21 +0200 Subject: [PATCH 027/173] fix(settings): align Rserve tempdir default to docs #7000 The docs said the default is "/tmp/Rserve", while the code had "/tmp". Changing the code default to the documented one. --- src/main/resources/META-INF/microprofile-config.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/resources/META-INF/microprofile-config.properties b/src/main/resources/META-INF/microprofile-config.properties index c7b907edb6c..8d2793eadbf 100644 --- a/src/main/resources/META-INF/microprofile-config.properties +++ b/src/main/resources/META-INF/microprofile-config.properties @@ -14,4 +14,4 @@ dataverse.rserve.host=localhost dataverse.rserve.port=6311 dataverse.rserve.username=rserve dataverse.rserve.password=rserve -dataverse.rserve.tempdir=/tmp +dataverse.rserve.tempdir=/tmp/Rserve From 507ae82a0b0674cce8d23f77a196894194396ea9 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 4 Jul 2022 19:06:53 +0200 Subject: [PATCH 028/173] docs(settings): add Rserve MPCONFIG to guide #7000 --- .../source/installation/config.rst | 43 ++++++++++++++++--- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index ab0bad70206..ae0d2cb0b26 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1465,27 +1465,60 @@ Can also be set via *MicroProfile Config API* sources, e.g. the environment vari dataverse.rserve.host +++++++++++++++++++++ -Host name for Rserve, used for tasks that require use of R (to ingest RData files and to save tabular data as RData frames). +Host name for Rserve, used for tasks that require use of R (to ingest RData +files and to save tabular data as RData frames). + +Defaults to ``localhost``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment +variable ``DATAVERSE_RSERVE_HOST``. dataverse.rserve.port +++++++++++++++++++++ -Port number for Rserve, used for tasks that require use of R (to ingest RData files and to save tabular data as RData frames). +Port number for Rserve, used for tasks that require use of R (to ingest RData +files and to save tabular data as RData frames). + +Defaults to ``6311``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment +variable ``DATAVERSE_RSERVE_PORT``. dataverse.rserve.user +++++++++++++++++++++ -Username for Rserve, used for tasks that require use of R (to ingest RData files and to save tabular data as RData frames). +Username for Rserve, used for tasks that require use of R (to ingest RData +files and to save tabular data as RData frames). + +Defaults to ``rserve``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment +variable ``DATAVERSE_RSERVE_USER``. dataverse.rserve.password +++++++++++++++++++++++++ -Password for Rserve, used for tasks that require use of R (to ingest RData files and to save tabular data as RData frames). +Password for Rserve, used for tasks that require use of R (to ingest RData +files and to save tabular data as RData frames). + +Defaults to ``rserve``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment +variable ``DATAVERSE_RSERVE_PASSWORD``. dataverse.rserve.tempdir ++++++++++++++++++++++++ -Temporary directory used by Rserve (defaults to /tmp/Rserv). Note that this location is local to the host on which Rserv is running (specified in ``dataverse.rserve.host`` above). When talking to Rserve, Dataverse needs to know this location in order to generate absolute path names of the files on the other end. +Temporary directory used by Rserve (defaults to /tmp/Rserv). Note that this +location is local to the host on which Rserv is running (specified in +``dataverse.rserve.host`` above). When talking to Rserve, Dataverse needs to +know this location in order to generate absolute path names of the files on the +other end. + +Defaults to ``/tmp``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment +variable ``DATAVERSE_RSERVE_TEMPDIR``. .. _dataverse.dropbox.key: From 139f0f7d5c762533e4fdcec5dc65e487170e570f Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 8 Mar 2022 17:27:51 +0100 Subject: [PATCH 029/173] feat(ct-base): add new base container image in submodule --- modules/container-base/pom.xml | 90 ++++++++ .../container-base/src/main/docker/Dockerfile | 204 ++++++++++++++++++ .../src/main/docker/assembly.xml | 17 ++ .../src/main/docker/scripts/entrypoint.sh | 17 ++ .../init_1_generate_deploy_commands.sh | 65 ++++++ .../main/docker/scripts/startInForeground.sh | 89 ++++++++ modules/dataverse-parent/pom.xml | 51 +++++ 7 files changed, 533 insertions(+) create mode 100644 modules/container-base/pom.xml create mode 100644 modules/container-base/src/main/docker/Dockerfile create mode 100644 modules/container-base/src/main/docker/assembly.xml create mode 100644 modules/container-base/src/main/docker/scripts/entrypoint.sh create mode 100644 modules/container-base/src/main/docker/scripts/init_1_generate_deploy_commands.sh create mode 100644 modules/container-base/src/main/docker/scripts/startInForeground.sh diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml new file mode 100644 index 00000000000..8cb7e1ac795 --- /dev/null +++ b/modules/container-base/pom.xml @@ -0,0 +1,90 @@ + + + 4.0.0 + + + edu.harvard.iq + dataverse-parent + ${revision} + ../dataverse-parent + + + io.gdcc + container-base + ${packaging.type} + Container Base Image + This module provides an application server base image to be decorated with the Dataverse app. + + + + + pom + + + + + ct + + docker-build + + + + + + + org.apache.maven.plugins + maven-dependency-plugin + + + unpack + initialize + + unpack + + + + + fish.payara.distributions + payara + ${payara.version} + zip + false + ${project.build.directory} + + + + + + + + + + io.fabric8 + docker-maven-plugin + true + + + + base + %g/base:jdk${target.java.version} + ${ct.registry} + + Dockerfile + + openjdk:${target.java.version}-jre + + @ + + assembly.xml + + + + + + + + + + + \ No newline at end of file diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile new file mode 100644 index 00000000000..635fbd89142 --- /dev/null +++ b/modules/container-base/src/main/docker/Dockerfile @@ -0,0 +1,204 @@ +# Copyright 2019 Forschungszentrum Jülich GmbH +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# +################################################################################################################ +# +# THIS FILE IS TO BE USED WITH MAVEN DOCKER BUILD: +# mvn -Pct clean package docker:build +# +################################################################################################################ +# +# Some commands used are inspired by https://github.com/payara/Payara/tree/master/appserver/extras/docker-images. +# Most parts origin from older versions of https://github.com/gdcc/dataverse-kubernetes. +# +# We are not using upstream Payara images because: +# - Using same base image as Solr (https://hub.docker.com/_/solr) is reducing pulls +# - Their image is less optimised for production usage by design choices +# + +# Make the Java base image and version configurable (useful for trying newer Java versions and flavors) +ARG BASE_IMAGE="openjdk:11-jre" +FROM $BASE_IMAGE + +# Default payara ports to expose +# 4848: admin console +# 9009: debug port (JPDA) +# 8080: http +# 8181: https +EXPOSE 4848 9009 8080 8181 + +ENV HOME_DIR="/opt/payara" +ENV PAYARA_DIR="${HOME_DIR}/appserver" \ + SCRIPT_DIR="${HOME_DIR}/scripts" \ + CONFIG_DIR="${HOME_DIR}/config" \ + DEPLOY_DIR="${HOME_DIR}/deployments" \ + DOCROOT_DIR="/docroot" \ + SECRETS_DIR="/secrets" \ + DUMPS_DIR="/dumps" \ + PASSWORD_FILE="${HOME_DIR}/passwordFile" \ + ADMIN_USER="admin" \ + ADMIN_PASSWORD="admin" \ + DOMAIN_NAME="domain1" \ + PAYARA_ARGS="" +ENV PATH="${PATH}:${PAYARA_DIR}/bin" \ + DOMAIN_DIR="${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}" \ + DEPLOY_PROPS="" \ + PREBOOT_COMMANDS="${CONFIG_DIR}/pre-boot-commands.asadmin" \ + POSTBOOT_COMMANDS="${CONFIG_DIR}/post-boot-commands.asadmin" \ + JVM_ARGS="" \ + MEM_MAX_RAM_PERCENTAGE="70.0" \ + MEM_XSS="512k" \ + # Source: https://github.com/fabric8io-images/run-java-sh/blob/master/TUNING.md#recommandations + MEM_MIN_HEAP_FREE_RATIO="20" \ + MEM_MAX_HEAP_FREE_RATIO="40" \ + MEM_MAX_GC_PAUSE_MILLIS="500" \ + MEM_METASPACE_SIZE="256m" \ + MEM_MAX_METASPACE_SIZE="2g" \ + # Make heap dumps on OOM appear in DUMPS_DIR + ENABLE_DUMPS=0 \ + JVM_DUMPS_ARG="-XX:+HeapDumpOnOutOfMemoryError" + +ARG ESH_VERSION=0.3.1 +ARG ESH_CHECKSUM="1e0bd783f930cba13d6708b11c1ac844bbb1eddd02ac1666fc10d47eb9517bd7" +ARG JATTACH_VERSION="v2.0" +ARG JATTACH_CHECKSUM="989dc53279c7fb3ec399dbff1692647439286e5a4339c2849fd4323e998af7f8" +ARG PKGS="jq imagemagick curl unzip wget acl dirmngr gpg lsof procps netcat tini" +ARG ASADMIN="${PAYARA_DIR}/bin/asadmin --user=${ADMIN_USER} --passwordfile=${PASSWORD_FILE}" + +### PART 1: SYSTEM ### +USER root +WORKDIR / +SHELL ["/bin/bash", "-euo", "pipefail", "-c"] +RUN true && \ + # Create pathes + mkdir -p "${HOME_DIR}" "${PAYARA_DIR}" "${DEPLOY_DIR}" "${CONFIG_DIR}" "${SCRIPT_DIR}" && \ + mkdir -p "${DOCROOT_DIR}" "${SECRETS_DIR}" "${DUMPS_DIR}" && \ + # Create user + addgroup --gid 1000 payara && \ + adduser --system --uid 1000 --no-create-home --shell /bin/bash --home "${HOME_DIR}" --gecos "" --ingroup payara payara && \ + echo payara:payara | chpasswd && \ + # Set permissions + chown -R payara: "${HOME_DIR}" && \ + chown -R payara: "${DOCROOT_DIR}" "${SECRETS_DIR}" "${DUMPS_DIR}" + +# Installing the packages in an extra container layer for better caching +RUN true && \ + # Install packages + apt-get update -q && \ + apt-get install -qqy --no-install-recommends ${PKGS} && \ + # Download & check esh template script + curl -sSfL -o /usr/bin/esh "https://raw.githubusercontent.com/jirutka/esh/v${ESH_VERSION}/esh" && \ + echo "${ESH_CHECKSUM} /usr/bin/esh" | sha256sum -c - && \ + chmod +x /usr/bin/esh && \ + # Install jattach + curl -sSfL -o /usr/bin/jattach "https://github.com/apangin/jattach/releases/download/${JATTACH_VERSION}/jattach" && \ + echo "${JATTACH_CHECKSUM} /usr/bin/jattach" | sha256sum -c - && \ + chmod +x /usr/bin/jattach && \ + # Cleanup + rm -rf "/var/lib/apt/lists/*" + +### PART 2: PAYARA ### +# After setting up system, now configure Payara +USER payara +WORKDIR ${HOME_DIR} + +# Copy Payara from build context (cached by Maven) +COPY --chown=payara:payara maven/appserver ${PAYARA_DIR}/ + +# Copy the system (appserver level) scripts like entrypoint, etc +COPY --chown=payara:payara maven/scripts ${SCRIPT_DIR}/ + +# Configure the domain to be container and production ready +RUN true && \ + # Set admin password + echo "AS_ADMIN_PASSWORD=" > /tmp/password-change-file.txt && \ + echo "AS_ADMIN_NEWPASSWORD=${ADMIN_PASSWORD}" >> /tmp/password-change-file.txt && \ + echo "AS_ADMIN_PASSWORD=${ADMIN_PASSWORD}" >> ${PASSWORD_FILE} && \ + asadmin --user=${ADMIN_USER} --passwordfile=/tmp/password-change-file.txt change-admin-password --domain_name=${DOMAIN_NAME} && \ + # Start domain for configuration + ${ASADMIN} start-domain ${DOMAIN_NAME} && \ + # Allow access to admin with password only + ${ASADMIN} enable-secure-admin && \ + ### CONTAINER USAGE ENABLEMENT + # List & delete memory settings from domain + for MEMORY_JVM_OPTION in $(${ASADMIN} list-jvm-options | grep "Xm[sx]\|Xss\|NewRatio"); \ + do \ + ${ASADMIN} delete-jvm-options $(echo $MEMORY_JVM_OPTION | sed -e 's/:/\\:/g'); \ + done && \ + # Tweak memory settings for containers + ${ASADMIN} create-jvm-options "-XX\:+UseContainerSupport" && \ + ${ASADMIN} create-jvm-options "-XX\:MaxRAMPercentage=\${ENV=MEM_MAX_RAM_PERCENTAGE}" && \ + ${ASADMIN} create-jvm-options "-Xss\${ENV=MEM_XSS}" && \ + ${ASADMIN} create-jvm-options "-XX\:MinHeapFreeRatio=\${ENV=MEM_MIN_HEAP_FREE_RATIO}" && \ + ${ASADMIN} create-jvm-options "-XX\:MaxHeapFreeRatio=\${ENV=MEM_MAX_HEAP_FREE_RATIO}" && \ + ${ASADMIN} create-jvm-options "-XX\:HeapDumpPath=\${ENV=DUMPS_DIR}" && \ + # Set logging to console only for containers + ${ASADMIN} set-log-attributes com.sun.enterprise.server.logging.GFFileHandler.logtoFile=false && \ + ### PRODUCTION READINESS + ${ASADMIN} create-jvm-options '-XX\:+UseG1GC' && \ + ${ASADMIN} create-jvm-options '-XX\:+UseStringDeduplication' && \ + ${ASADMIN} create-jvm-options '-XX\:MaxGCPauseMillis=${ENV=MEM_MAX_GC_PAUSE_MILLIS}' && \ + ${ASADMIN} create-jvm-options '-XX\:MetaspaceSize=${ENV=MEM_METASPACE_SIZE}' && \ + ${ASADMIN} create-jvm-options '-XX\:MaxMetaspaceSize=${ENV=MEM_MAX_METASPACE_SIZE}' && \ + ${ASADMIN} create-jvm-options '-XX\:+IgnoreUnrecognizedVMOptions' && \ + # Enlarge thread pools + ${ASADMIN} set server-config.thread-pools.thread-pool.http-thread-pool.max-thread-pool-size="50" && \ + ${ASADMIN} set server-config.thread-pools.thread-pool.http-thread-pool.max-queue-size="" && \ + ${ASADMIN} set default-config.thread-pools.thread-pool.thread-pool-1.max-thread-pool-size="250" && \ + # Enable file caching + ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-1.http.file-cache.enabled="true" && \ + ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-2.http.file-cache.enabled="true" && \ + ${ASADMIN} set default-config.network-config.protocols.protocol.http-listener-1.http.file-cache.enabled="true" && \ + ${ASADMIN} set default-config.network-config.protocols.protocol.http-listener-2.http.file-cache.enabled="true" && \ + # Enlarge EJB pools (cannot do this for server-config as set does not create new entries) + ${ASADMIN} set default-config.ejb-container.max-pool-size="128" && \ + # Misc settings + ${ASADMIN} create-system-properties fish.payara.classloading.delegate="false" && \ + ${ASADMIN} create-system-properties jersey.config.client.readTimeout="300000" && \ + ${ASADMIN} create-system-properties jersey.config.client.connectTimeout="300000" && \ + ### DATAVERSE APPLICATION SPECIFICS + # Configure the MicroProfile directory config source to point to /secrets + ${ASADMIN} set-config-dir --directory="${SECRETS_DIR}" && \ + # Make request timeouts configurable via MPCONFIG (default to 900 secs = 15 min) + ${ASADMIN} set 'server-config.network-config.protocols.protocol.http-listener-1.http.request-timeout-seconds=${MPCONFIG=dataverse.http.timeout:900}' && \ + # TODO: what of the below 3 items can be deleted for container usage? + ${ASADMIN} create-network-listener --protocol=http-listener-1 --listenerport=8009 --jkenabled=true jk-connector && \ + ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-1.http.comet-support-enabled=true && \ + ${ASADMIN} create-system-properties javax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl && \ + # Always disable phoning home... + ${ASADMIN} disable-phone-home && \ + ### CLEANUP + # Stop domain + ${ASADMIN} stop-domain "${DOMAIN_NAME}" && \ + # Delete generated files + rm -rf \ + "/tmp/password-change-file.txt" \ + "${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/osgi-cache" \ + "${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/logs" + +# Make docroot of Payara reside in higher level directory for easier targeting +# Due to gdcc/dataverse-kubernetes#177: create the generated pathes so they are +# writeable by us. TBR with gdcc/dataverse-kubernetes#178. +RUN rm -rf "${DOMAIN_DIR}"/docroot && \ + ln -s "${DOCROOT_DIR}" "${DOMAIN_DIR}"/docroot && \ + mkdir -p "${DOMAIN_DIR}"/generated/jsp/dataverse + +# Set the entrypoint to tini (as a process supervisor) +ENTRYPOINT ["/usr/bin/tini", "--"] +# JSON syntax should be used, but bypassed shell. Thus re-add expansion via shell exec. +CMD ["sh", "-c", "${SCRIPT_DIR}/entrypoint.sh"] + +LABEL org.opencontainers.image.created="@git.build.time@" \ + org.opencontainers.image.authors="Research Data Management at FZJ " \ + org.opencontainers.image.url="https://k8s-docs.gdcc.io" \ + org.opencontainers.image.documentation="https://k8s-docs.gdcc.io" \ + org.opencontainers.image.source="https://github.com/gdcc/dataverse/tree/develop%2Bct/modules/container-base" \ + org.opencontainers.image.version="@project.version@" \ + org.opencontainers.image.revision="@git.commit.id.abbrev@" \ + org.opencontainers.image.vendor="Global Dataverse Community Consortium" \ + org.opencontainers.image.licenses="Apache-2.0" \ + org.opencontainers.image.title="dataverse-k8s :: Dataverse containerized" \ + org.opencontainers.image.description="This container image provides an application server tuned for Dataverse software" diff --git a/modules/container-base/src/main/docker/assembly.xml b/modules/container-base/src/main/docker/assembly.xml new file mode 100644 index 00000000000..afd5530fa60 --- /dev/null +++ b/modules/container-base/src/main/docker/assembly.xml @@ -0,0 +1,17 @@ + + + + + ${project.basedir}/target/payara5 + appserver + + + + ${project.basedir}/src/main/docker/scripts + scripts + 0755 + + + \ No newline at end of file diff --git a/modules/container-base/src/main/docker/scripts/entrypoint.sh b/modules/container-base/src/main/docker/scripts/entrypoint.sh new file mode 100644 index 00000000000..6f71dfe013c --- /dev/null +++ b/modules/container-base/src/main/docker/scripts/entrypoint.sh @@ -0,0 +1,17 @@ +#!/bin/bash +########################################################################################################## +# +# This script is a fork of https://github.com/payara/Payara/blob/master/appserver/extras/docker-images/ +# server-full/src/main/docker/bin/entrypoint.sh and licensed under CDDL 1.1 by the Payara Foundation. +# +########################################################################################################## + +for f in "${SCRIPT_DIR}"/init_* "${SCRIPT_DIR}"/init.d/*; do + case "$f" in + *.sh) echo "[Entrypoint] running $f"; . "$f" ;; + *) echo "[Entrypoint] ignoring $f" ;; + esac + echo +done + +exec "${SCRIPT_DIR}"/startInForeground.sh "${PAYARA_ARGS}" diff --git a/modules/container-base/src/main/docker/scripts/init_1_generate_deploy_commands.sh b/modules/container-base/src/main/docker/scripts/init_1_generate_deploy_commands.sh new file mode 100644 index 00000000000..e2d717af666 --- /dev/null +++ b/modules/container-base/src/main/docker/scripts/init_1_generate_deploy_commands.sh @@ -0,0 +1,65 @@ +#!/bin/bash +########################################################################################################## +# +# A script to append deploy commands to the post boot command file at +# $PAYARA_HOME/scripts/post-boot-commands.asadmin file. All applications in the +# $DEPLOY_DIR (either files or folders) will be deployed. +# The $POSTBOOT_COMMANDS file can then be used with the start-domain using the +# --postbootcommandfile parameter to deploy applications on startup. +# +# Usage: +# ./generate_deploy_commands.sh +# +# Optionally, any number of parameters of the asadmin deploy command can be +# specified as parameters to this script. +# E.g., to deploy applications with implicit CDI scanning disabled: +# +# ./generate_deploy_commands.sh --properties=implicitCdiEnabled=false +# +# Environment variables used: +# - $PREBOOT_COMMANDS - the pre boot command file. +# - $POSTBOOT_COMMANDS - the post boot command file. +# +# Note that many parameters to the deploy command can be safely used only when +# a single application exists in the $DEPLOY_DIR directory. +# +########################################################################################################## +# +# This script is a fork of https://github.com/payara/Payara/blob/master/appserver/extras/docker-images/ +# server-full/src/main/docker/bin/init_1_generate_deploy_commands.sh and licensed under CDDL 1.1 +# by the Payara Foundation. +# +########################################################################################################## + +# Check required variables are set +if [ -z "$DEPLOY_DIR" ]; then echo "Variable DEPLOY_DIR is not set."; exit 1; fi +if [ -z "$PREBOOT_COMMANDS" ]; then echo "Variable PREBOOT_COMMANDS is not set."; exit 1; fi +if [ -z "$POSTBOOT_COMMANDS" ]; then echo "Variable POSTBOOT_COMMANDS is not set."; exit 1; fi + +# Create pre and post boot command files if they don't exist +touch "$POSTBOOT_COMMANDS" +touch "$PREBOOT_COMMANDS" + +deploy() { + + if [ -z "$1" ]; then + echo "No deployment specified"; + exit 1; + fi + + DEPLOY_STATEMENT="deploy $DEPLOY_PROPS $1" + if grep -q "$1" "$POSTBOOT_COMMANDS"; then + echo "post boot commands already deploys $1"; + else + echo "Adding deployment target $1 to post boot commands"; + echo "$DEPLOY_STATEMENT" >> "$POSTBOOT_COMMANDS"; + fi +} + +# RAR files first +find "$DEPLOY_DIR" -mindepth 1 -maxdepth 1 -name "*.rar" -print0 \ + | while IFS= read -r -d '' file; do deploy "$file"; done + +# Then every other WAR, EAR, JAR or directory +find "$DEPLOY_DIR" -mindepth 1 -maxdepth 1 ! -name "*.rar" -a -name "*.war" -o -name "*.ear" -o -name "*.jar" -o -type d -print0 \ + | while IFS= read -r -d '' file; do deploy "$file"; done \ No newline at end of file diff --git a/modules/container-base/src/main/docker/scripts/startInForeground.sh b/modules/container-base/src/main/docker/scripts/startInForeground.sh new file mode 100644 index 00000000000..4843f6ae055 --- /dev/null +++ b/modules/container-base/src/main/docker/scripts/startInForeground.sh @@ -0,0 +1,89 @@ +#!/bin/bash +########################################################################################################## +# +# This script is to execute Payara Server in foreground, mainly in a docker environment. +# It allows to avoid running 2 instances of JVM, which happens with the start-domain --verbose command. +# +# Usage: +# Running +# startInForeground.sh +# is equivalent to running +# asadmin start-domain +# +# It's possible to use any arguments of the start-domain command as arguments to startInForeground.sh +# +# Environment variables used: +# - $ADMIN_USER - the username to use for the asadmin utility. +# - $PASSWORD_FILE - the password file to use for the asadmin utility. +# - $PREBOOT_COMMANDS - the pre boot command file. +# - $POSTBOOT_COMMANDS - the post boot command file. +# - $DOMAIN_NAME - the name of the domain to start. +# - $JVM_ARGS - extra JVM options to pass to the Payara Server instance. +# - $AS_ADMIN_MASTERPASSWORD - the master password for the Payara Server instance. +# +# This script executes the asadmin tool which is expected at ~/appserver/bin/asadmin. +# +########################################################################################################## +# +# This script is a fork of https://github.com/payara/Payara/blob/master/appserver/ +# extras/docker-images/server-full/src/main/docker/bin/startInForeground.sh and licensed under CDDL 1.1 +# by the Payara Foundation. +# +########################################################################################################## + +# Check required variables are set +if [ -z "$ADMIN_USER" ]; then echo "Variable ADMIN_USER is not set."; exit 1; fi +if [ -z "$PASSWORD_FILE" ]; then echo "Variable PASSWORD_FILE is not set."; exit 1; fi +if [ -z "$PREBOOT_COMMANDS" ]; then echo "Variable PREBOOT_COMMANDS is not set."; exit 1; fi +if [ -z "$POSTBOOT_COMMANDS" ]; then echo "Variable POSTBOOT_COMMANDS is not set."; exit 1; fi +if [ -z "$DOMAIN_NAME" ]; then echo "Variable DOMAIN_NAME is not set."; exit 1; fi + +# Check if dumps are enabled - add arg to JVM_ARGS in this case +if [ -n "${ENABLE_DUMPS}" ] && [ "${ENABLE_DUMPS}" = "1" ]; then + JVM_ARGS="${JVM_DUMPS_ARG} ${JVM_ARGS}" +fi + +# The following command gets the command line to be executed by start-domain +# - print the command line to the server with --dry-run, each argument on a separate line +# - remove -read-string argument +# - surround each line except with parenthesis to allow spaces in paths +# - remove lines before and after the command line and squash commands on a single line + +# Create pre and post boot command files if they don't exist +touch "$POSTBOOT_COMMANDS" +touch "$PREBOOT_COMMANDS" + +# shellcheck disable=SC2068 +# -- Using $@ is necessary here as asadmin cannot deal with options enclosed in ""! +OUTPUT=$("${PAYARA_DIR}"/bin/asadmin --user="${ADMIN_USER}" --passwordfile="${PASSWORD_FILE}" start-domain --dry-run --prebootcommandfile="${PREBOOT_COMMANDS}" --postbootcommandfile="${POSTBOOT_COMMANDS}" $@ "$DOMAIN_NAME") +STATUS=$? +if [ "$STATUS" -ne 0 ] + then + echo ERROR: "$OUTPUT" >&2 + exit 1 +fi + +COMMAND=$(echo "$OUTPUT"\ + | sed -n -e '2,/^$/p'\ + | sed "s|glassfish.jar|glassfish.jar $JVM_ARGS |g") + +echo Executing Payara Server with the following command line: +echo "$COMMAND" | tr ' ' '\n' +echo + +# Run the server in foreground - read master password from variable or file or use the default "changeit" password + +set +x +if test "$AS_ADMIN_MASTERPASSWORD"x = x -a -f "$PASSWORD_FILE" + then + # shellcheck disable=SC1090 + source "$PASSWORD_FILE" +fi +if test "$AS_ADMIN_MASTERPASSWORD"x = x + then + AS_ADMIN_MASTERPASSWORD=changeit +fi +echo "AS_ADMIN_MASTERPASSWORD=$AS_ADMIN_MASTERPASSWORD" > /tmp/masterpwdfile +# shellcheck disable=SC2086 +# -- Unquoted exec var is necessary, as otherwise things get escaped that may not be escaped (parameters for Java) +exec ${COMMAND} < /tmp/masterpwdfile diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index 14b84f80279..4db2232be7d 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -182,6 +182,10 @@ 3.0.0-M5 3.3.0 3.1.2 + + + 0.39.1 + ghcr.io @@ -244,6 +248,11 @@ + + io.fabric8 + docker-maven-plugin + ${fabric8-dmp.version} + @@ -315,4 +324,46 @@ --> + + + ct + + + 5.2022.1 + + + + + + + io.github.git-commit-id + git-commit-id-maven-plugin + 5.0.0 + + + retrieve-git-details + + revision + + initialize + + + + ${project.basedir}/../../.git + UTC + 8 + false + + + + + + + + From 2319a4787e0c4e41b633382ed7c9684130933be8 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 16 Jun 2022 21:22:41 +0200 Subject: [PATCH 030/173] feat(ct-base): remove the esh tool Will be replaced with a capability to make API endpoints for authentication providers read from MPCONFIG sources. --- modules/container-base/src/main/docker/Dockerfile | 6 ------ 1 file changed, 6 deletions(-) diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile index 635fbd89142..491c0747ada 100644 --- a/modules/container-base/src/main/docker/Dockerfile +++ b/modules/container-base/src/main/docker/Dockerfile @@ -61,8 +61,6 @@ ENV PATH="${PATH}:${PAYARA_DIR}/bin" \ ENABLE_DUMPS=0 \ JVM_DUMPS_ARG="-XX:+HeapDumpOnOutOfMemoryError" -ARG ESH_VERSION=0.3.1 -ARG ESH_CHECKSUM="1e0bd783f930cba13d6708b11c1ac844bbb1eddd02ac1666fc10d47eb9517bd7" ARG JATTACH_VERSION="v2.0" ARG JATTACH_CHECKSUM="989dc53279c7fb3ec399dbff1692647439286e5a4339c2849fd4323e998af7f8" ARG PKGS="jq imagemagick curl unzip wget acl dirmngr gpg lsof procps netcat tini" @@ -89,10 +87,6 @@ RUN true && \ # Install packages apt-get update -q && \ apt-get install -qqy --no-install-recommends ${PKGS} && \ - # Download & check esh template script - curl -sSfL -o /usr/bin/esh "https://raw.githubusercontent.com/jirutka/esh/v${ESH_VERSION}/esh" && \ - echo "${ESH_CHECKSUM} /usr/bin/esh" | sha256sum -c - && \ - chmod +x /usr/bin/esh && \ # Install jattach curl -sSfL -o /usr/bin/jattach "https://github.com/apangin/jattach/releases/download/${JATTACH_VERSION}/jattach" && \ echo "${JATTACH_CHECKSUM} /usr/bin/jattach" | sha256sum -c - && \ From f0202cb2c177c5ebeeb176c58c8b27256d32697b Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 17 Jun 2022 10:29:37 +0200 Subject: [PATCH 031/173] chore(deps): update container plugin and payara version for containers --- modules/dataverse-parent/pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index 4db2232be7d..fa693f8a8ac 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -184,7 +184,7 @@ 3.1.2 - 0.39.1 + 0.40.1 ghcr.io @@ -334,7 +334,7 @@ See also: https://github.com/IQSS/dataverse/issues/8048 See also: https://github.com/payara/Payara/issues/5368 --> - 5.2022.1 + 5.2022.2 From 2dc0596d8634cadecb691b95a39ba5a3355fcd99 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Aug 2022 13:54:41 +0200 Subject: [PATCH 032/173] fix(ct-base): unpack Payara to target/payara Payara 5 defaults to a "payara5" topmost dir, Payara 6 to "payara6". To avoid adding different directories in the assembly, cut the number from the directories name when unpacking. This does not prevent you from doing stupid things like not cleaning before switching the version leading to an unknown state of old and new libs, etc. --- modules/container-base/pom.xml | 6 ++++++ modules/container-base/src/main/docker/assembly.xml | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml index 8cb7e1ac795..765a4c72843 100644 --- a/modules/container-base/pom.xml +++ b/modules/container-base/pom.xml @@ -51,6 +51,12 @@ zip false ${project.build.directory} + + + ^payara\d + payara + + diff --git a/modules/container-base/src/main/docker/assembly.xml b/modules/container-base/src/main/docker/assembly.xml index afd5530fa60..9fc62d49fa1 100644 --- a/modules/container-base/src/main/docker/assembly.xml +++ b/modules/container-base/src/main/docker/assembly.xml @@ -4,7 +4,7 @@ - ${project.basedir}/target/payara5 + ${project.basedir}/target/payara appserver From 246f8b8cbfd18356c6f2cb63481d1fa02afad390 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Aug 2022 14:03:19 +0200 Subject: [PATCH 033/173] fix(ct-base): migrate base image from OpenJDK to Eclipse Temurin There was an ongoing discussion that the Docker Hub Image "openjdk" is not backed by any official supported project but complete goodwill of Oracle shipping their JRE/JDK. There is no "real" release of OpenJDK . There exist only real distributions like Oracle JDK, Eclipse Temurin, Azul JDK, AWS Corretto etc (see https://whichjdk.com). As for this reason the "openjdk" image has been deprecated, switching to Eclipse Temurin JRE here. See also: https://github.com/docker-library/openjdk/issues/505 --- modules/container-base/pom.xml | 2 +- modules/container-base/src/main/docker/Dockerfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml index 765a4c72843..5ebaa9ea323 100644 --- a/modules/container-base/pom.xml +++ b/modules/container-base/pom.xml @@ -78,7 +78,7 @@ Dockerfile - openjdk:${target.java.version}-jre + eclipse-temurin:${target.java.version}-jre @ diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile index 491c0747ada..2fed83db59f 100644 --- a/modules/container-base/src/main/docker/Dockerfile +++ b/modules/container-base/src/main/docker/Dockerfile @@ -20,7 +20,7 @@ # # Make the Java base image and version configurable (useful for trying newer Java versions and flavors) -ARG BASE_IMAGE="openjdk:11-jre" +ARG BASE_IMAGE="eclipse-temurin:11-jre" FROM $BASE_IMAGE # Default payara ports to expose From 76ea50871bafe028d1edad35f441e7731398ed00 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Aug 2022 14:05:06 +0200 Subject: [PATCH 034/173] chore(deps): update Docker Maven Plugin to the latest release --- modules/dataverse-parent/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index fa693f8a8ac..eaa09b61bd7 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -184,7 +184,7 @@ 3.1.2 - 0.40.1 + 0.40.2 ghcr.io From f62dee2ec6a5dd237e2fbc10346bdebeb6a3c2f1 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 11 Aug 2022 15:13:48 +0200 Subject: [PATCH 035/173] feat(ct-base): enable multiarch image build via docker buildx With the rise of Apple M1/M2 silicons, we need to provide ARM64 based images in addition to AMD64. --- modules/container-base/pom.xml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml index 5ebaa9ea323..add8a120a58 100644 --- a/modules/container-base/pom.xml +++ b/modules/container-base/pom.xml @@ -76,6 +76,12 @@ %g/base:jdk${target.java.version} ${ct.registry} + + + linux/arm64 + linux/amd64 + + Dockerfile eclipse-temurin:${target.java.version}-jre From 72935d481e1e1ab260e763a000bfef172629cc16 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Aug 2022 12:08:13 +0200 Subject: [PATCH 036/173] chore(ct-base): add maintainer details to POM --- modules/container-base/pom.xml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml index add8a120a58..015ebba598d 100644 --- a/modules/container-base/pom.xml +++ b/modules/container-base/pom.xml @@ -16,6 +16,18 @@ Container Base Image This module provides an application server base image to be decorated with the Dataverse app. + + + poikilotherm + Oliver Bertuch + github@bertuch.eu + Europe/Berlin + + maintainer + + + + From 17d8b53bb985fc77faebc8273b84012fac2bb525 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Aug 2022 12:09:21 +0200 Subject: [PATCH 037/173] docs(ct-base): update OCI tag labels --- modules/container-base/src/main/docker/Dockerfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile index 2fed83db59f..036e2f17831 100644 --- a/modules/container-base/src/main/docker/Dockerfile +++ b/modules/container-base/src/main/docker/Dockerfile @@ -187,12 +187,12 @@ CMD ["sh", "-c", "${SCRIPT_DIR}/entrypoint.sh"] LABEL org.opencontainers.image.created="@git.build.time@" \ org.opencontainers.image.authors="Research Data Management at FZJ " \ - org.opencontainers.image.url="https://k8s-docs.gdcc.io" \ - org.opencontainers.image.documentation="https://k8s-docs.gdcc.io" \ - org.opencontainers.image.source="https://github.com/gdcc/dataverse/tree/develop%2Bct/modules/container-base" \ + org.opencontainers.image.url="https://guides.dataverse.org/en/latest/container/" \ + org.opencontainers.image.documentation="https://guides.dataverse.org/en/latest/container/" \ + org.opencontainers.image.source="https://github.com/IQSS/dataverse/tree/develop/modules/container-base" \ org.opencontainers.image.version="@project.version@" \ org.opencontainers.image.revision="@git.commit.id.abbrev@" \ org.opencontainers.image.vendor="Global Dataverse Community Consortium" \ org.opencontainers.image.licenses="Apache-2.0" \ - org.opencontainers.image.title="dataverse-k8s :: Dataverse containerized" \ + org.opencontainers.image.title="Dataverse Base Image" \ org.opencontainers.image.description="This container image provides an application server tuned for Dataverse software" From 0a9947bd6868b9b45314b6fe0cfc918c48ed4eeb Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 12 Aug 2022 12:11:14 +0200 Subject: [PATCH 038/173] feat(ct-base): add debug/develop mode script --- .../container-base/src/main/docker/Dockerfile | 4 +- .../init_1_generate_devmode_commands.sh | 61 +++++++++++++++++++ 2 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 modules/container-base/src/main/docker/scripts/init_1_generate_devmode_commands.sh diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile index 036e2f17831..fe44fc61847 100644 --- a/modules/container-base/src/main/docker/Dockerfile +++ b/modules/container-base/src/main/docker/Dockerfile @@ -59,7 +59,9 @@ ENV PATH="${PATH}:${PAYARA_DIR}/bin" \ MEM_MAX_METASPACE_SIZE="2g" \ # Make heap dumps on OOM appear in DUMPS_DIR ENABLE_DUMPS=0 \ - JVM_DUMPS_ARG="-XX:+HeapDumpOnOutOfMemoryError" + JVM_DUMPS_ARG="-XX:+HeapDumpOnOutOfMemoryError" \ + ENABLE_JMX=0 \ + ENABLE_JDWP=0 ARG JATTACH_VERSION="v2.0" ARG JATTACH_CHECKSUM="989dc53279c7fb3ec399dbff1692647439286e5a4339c2849fd4323e998af7f8" diff --git a/modules/container-base/src/main/docker/scripts/init_1_generate_devmode_commands.sh b/modules/container-base/src/main/docker/scripts/init_1_generate_devmode_commands.sh new file mode 100644 index 00000000000..9d71e3bb81b --- /dev/null +++ b/modules/container-base/src/main/docker/scripts/init_1_generate_devmode_commands.sh @@ -0,0 +1,61 @@ +#!/bin/bash + +set -euo pipefail + +###### ###### ###### ###### ###### ###### ###### ###### ###### ###### ###### +# This script enables different development options, like a JMX connector +# usable with VisualVM, JRebel hot-reload support and JDWP debugger service. +# Enable it by adding env vars on startup (e.g. via ConfigMap) +# +# As this script is "sourced" from entrypoint.sh, we can manipulate env vars +# for the parent shell before executing Payara. +###### ###### ###### ###### ###### ###### ###### ###### ###### ###### ###### + +# 0. Init variables +ENABLE_JMX=${ENABLE_JMX:-0} +ENABLE_JDWP=${ENABLE_JDWP:-0} + +DV_PREBOOT=${PAYARA_DIR}/dataverse_preboot +echo "# Dataverse preboot configuration for Payara" > "${DV_PREBOOT}" + +# 1. Configure JMX (enabled by default on port 8686, but requires SSL) +# See also https://blog.payara.fish/monitoring-payara-server-with-jconsole +# To still use it, you can use a sidecar container proxying or using JMX via localhost without SSL. +if [ "${ENABLE_JMX}" = "1" ]; then + echo "Enabling unsecured JMX on 0.0.0.0:8686. You'll need a sidecar for this, as access is allowed from same machine only (without SSL)." + { \ + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.jvm=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.connector-service=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.connector-connection-pool=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.jdbc-connection-pool=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.web-services-container=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.ejb-container=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.thread-pool=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.http-service=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.security=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.jms-service=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.jersey=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.transaction-service=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.jpa=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.web-container=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.orb=HIGH" + echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.deployment=HIGH" + #echo "set configs.config.server-config.admin-service.jmx-connector.system.address=127.0.0.1" + echo "set configs.config.server-config.admin-service.jmx-connector.system.security-enabled=false" + } >> "${DV_PREBOOT}" +fi + +# 2. Enable JDWP via debugging switch +if [ "${ENABLE_JDWP}" = "1" ]; then + echo "Enabling JDWP remote debugging support via asadmin debugging switch." + export PAYARA_ARGS="${PAYARA_ARGS} --debug=true" +fi + +# 3. Add the commands to the existing preboot file, but insert BEFORE deployment +TMP_PREBOOT=$(mktemp) +cat "${DV_PREBOOT}" "${PREBOOT_COMMANDS}" > "${TMP_PREBOOT}" +mv "${TMP_PREBOOT}" "${PREBOOT_COMMANDS}" +echo "DEBUG: preboot contains the following commands:" +echo "--------------------------------------------------" +cat "${PREBOOT_COMMANDS}" +echo "--------------------------------------------------" \ No newline at end of file From 2e812dcc15413d5814072b86971b924ee13824e4 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 22 Aug 2022 13:50:48 +0200 Subject: [PATCH 039/173] deps(ct-base): update to jattach v2.1 --- modules/container-base/src/main/docker/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile index fe44fc61847..d13808c3272 100644 --- a/modules/container-base/src/main/docker/Dockerfile +++ b/modules/container-base/src/main/docker/Dockerfile @@ -63,8 +63,8 @@ ENV PATH="${PATH}:${PAYARA_DIR}/bin" \ ENABLE_JMX=0 \ ENABLE_JDWP=0 -ARG JATTACH_VERSION="v2.0" -ARG JATTACH_CHECKSUM="989dc53279c7fb3ec399dbff1692647439286e5a4339c2849fd4323e998af7f8" +ARG JATTACH_VERSION="v2.1" +ARG JATTACH_CHECKSUM="07885fdc782e02e7302c6d190f54c3930afa10a38140365adf54076ec1086a8e" ARG PKGS="jq imagemagick curl unzip wget acl dirmngr gpg lsof procps netcat tini" ARG ASADMIN="${PAYARA_DIR}/bin/asadmin --user=${ADMIN_USER} --passwordfile=${PASSWORD_FILE}" From 7e836c70dd44a538bf1fdd0d73045730da053951 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 23 Aug 2022 00:10:24 +0200 Subject: [PATCH 040/173] chore(ct-base): add JMX to exposed ports and make it default enabled as in Payara --- modules/container-base/src/main/docker/Dockerfile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile index d13808c3272..ba459607826 100644 --- a/modules/container-base/src/main/docker/Dockerfile +++ b/modules/container-base/src/main/docker/Dockerfile @@ -25,10 +25,11 @@ FROM $BASE_IMAGE # Default payara ports to expose # 4848: admin console -# 9009: debug port (JPDA) +# 9009: debug port (JDWP) # 8080: http # 8181: https -EXPOSE 4848 9009 8080 8181 +# 8686: JMX +EXPOSE 4848 9009 8080 8181 8686 ENV HOME_DIR="/opt/payara" ENV PAYARA_DIR="${HOME_DIR}/appserver" \ @@ -60,7 +61,7 @@ ENV PATH="${PATH}:${PAYARA_DIR}/bin" \ # Make heap dumps on OOM appear in DUMPS_DIR ENABLE_DUMPS=0 \ JVM_DUMPS_ARG="-XX:+HeapDumpOnOutOfMemoryError" \ - ENABLE_JMX=0 \ + ENABLE_JMX=1 \ ENABLE_JDWP=0 ARG JATTACH_VERSION="v2.1" From fe7b2d06148e6a2e6d6b2939f366de9ea2162cff Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 23 Aug 2022 00:11:08 +0200 Subject: [PATCH 041/173] docs(ct): add container guide to guides index --- doc/sphinx-guides/source/container/index.rst | 26 ++++++++++++++++++++ doc/sphinx-guides/source/index.rst | 7 ++++-- 2 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 doc/sphinx-guides/source/container/index.rst diff --git a/doc/sphinx-guides/source/container/index.rst b/doc/sphinx-guides/source/container/index.rst new file mode 100644 index 00000000000..1bf86f16f43 --- /dev/null +++ b/doc/sphinx-guides/source/container/index.rst @@ -0,0 +1,26 @@ +Container Guide +=============== + +**Contents:** + +.. toctree:: + + base-image + app-image + +Running Dataverse software in containers is quite different than in a :doc:`classic installation <../installation/prep>`. + +Both approaches have pros and cons. These days (2022) containers are very often used for development and testing, +but there is an ever rising move for running applications in the cloud using container technology. + +**NOTE:** +**As the "Institute for Quantitative Social Sciences" at Harvard is running their installations in the classic +deployment way, the container support is mostly created and maintained by the Dataverse community.** + +This guide is *not* about installation on technology like Docker Swarm, Kubernetes, Rancher or other +solutions to run containers in production. There is the `Dataverse on K8s project `_ for this +purpose. + +This guide focuses on describing the container images managed from the main Dataverse repository (again: by the +community, not IQSS), their features and limitations. Instructions on how to build the images yourself, how to +extend them and how to use them for development purposes may be found in respective subpages. \ No newline at end of file diff --git a/doc/sphinx-guides/source/index.rst b/doc/sphinx-guides/source/index.rst index f7e81756e5b..f15a973544d 100755 --- a/doc/sphinx-guides/source/index.rst +++ b/doc/sphinx-guides/source/index.rst @@ -19,17 +19,20 @@ These documentation guides are for the |version| version of Dataverse. To find g installation/index developers/index style/index + container/index How the Guides Are Organized ---------------------------- The guides are documentation that explain how to use Dataverse, which are divided into the following sections: User Guide, -Installation Guide, Developer Guide, API Guide and Style Guide. The User Guide is further divided into primary activities: finding & using +Installation Guide, Developer Guide, API Guide, Style Guide and Container Guide. +The User Guide is further divided into primary activities: finding & using data, adding Datasets, administering dataverses or Datasets, and Dataset exploration/visualizations. Details on all of the above tasks can be found in the Users Guide. The Installation Guide is for people or organizations who want to host their -own Dataverse. The Developer Guide contains instructions for +own Dataverse. The Container Guide adds to this information on container-based installations. +The Developer Guide contains instructions for people who want to contribute to the Open Source Dataverse project or who want to modify the code to suit their own needs. Finally, the API Guide is for Developers that work on other applications and are interested in connecting with Dataverse through our APIs. From a93dbbdb4c5d2cfed80a13f265238a59f551999a Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 23 Aug 2022 00:12:35 +0200 Subject: [PATCH 042/173] docs(ct-base): add extensive base image module documentation --- .../source/container/base-image.rst | 229 ++++++++++++++++++ 1 file changed, 229 insertions(+) create mode 100644 doc/sphinx-guides/source/container/base-image.rst diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst new file mode 100644 index 00000000000..4f441f79ad7 --- /dev/null +++ b/doc/sphinx-guides/source/container/base-image.rst @@ -0,0 +1,229 @@ +Application Base Image +====================== + +Within the main repository, you may find the base image's files at ``/modules/container-base``. +This Maven module uses the `Maven Docker Plugin `_ to build and ship the image. + +Contents +++++++++ + +The base image provides: + +- `Eclipse Temurin JRE using Java 11 `_ +- `Payara Community Application Server `_ +- CLI tools necessary to run Dataverse (i. e. ``curl`` or ``jq`` - see also :doc:`../installation/prerequisites` in Installation Guide) +- Linux tools for analysis, monitoring and so on +- `Jattach `_ + +This image is created as a "multi-arch image", supporting the most common architectures Dataverse usually runs on: +AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2). + +It inherits being built on an Ubuntu environment from the upstream +`base image of Eclipse Temurin `_. +You are free to change the JRE/JDK image to your liking (see below). + + + +Build Instructions +++++++++++++++++++ + +Assuming you have `Docker `_, `Docker Desktop `_, +`Moby `_ or some remote Docker host configured, up and running from here on. + +Simply execute the Maven modules packaging target with activated "container profile. Either from the projects Git root: + +``mvn -Pct -f modules/container-base package`` + +Or move to the module and execute: + +``cd modules/container-base && mvn -Pct package`` + +Some additional notes, using Maven parameters to change the build and use ...: + +- ... a different Payara version: add ``-Dpayara.version=V.YYYY.R``. +- | ... a different Temurin JRE version ``A``: add ``-Dtarget.java.version=A`` (i.e. ``11``, ``17``, ...). + | *Note:* must resolve to an available Docker tag ``A-jre`` of Eclipse Temurin! +- ... a different Java Distribution: add ``-Ddocker.buildArg.BASE_IMAGE="name:tag"`` with precise reference to an + image available from local or remote (e. g. Docker Hub). + + + +Tunables +++++++++ + +The base image provides a Payara domain suited for production use, but can also be used during development. +Many settings have been carefully selected for best performance and stability of the Dataverse application. + +As with any service, you should always monitor any metrics and make use of the tuning capabilities the base image +provides. These are mostly based on environment variables (very common with containers) and provide sane defaults. + +.. list-table:: + :align: left + :width: 100 + :widths: 10 10 10 50 + :header-rows: 1 + + * - Env. variable + - Default + - Type + - Description + * - ``DEPLOY_PROPS`` + - (empty) + - String + - Set to add arguments to generated `asadmin deploy` commands. + * - ``PREBOOT_COMMANDS`` + - [preboot]_ + - Abs. path + - Provide path to file with ``asadmin`` commands to run **before** boot of application server. + See also `Pre/postboot script docs`_. + * - ``POSTBOOT_COMMANDS`` + - [postboot]_ + - Abs. path + - Provide path to file with ``asadmin`` commands to run **after** boot of application server. + See also `Pre/postboot script docs`_. + * - ``JVM_ARGS`` + - (empty) + - String + - Additional arguments to pass to application server's JVM on start. + * - ``MEM_MAX_RAM_PERCENTAGE`` + - ``70.0`` + - Percentage + - Maximum amount of container's allocated RAM to be used as heap space. + Make sure to leave some room for native memory, OS overhead etc! + * - ``MEM_XSS`` + - ``512k`` + - Size + - Tune the maximum JVM stack size. + * - ``MEM_MIN_HEAP_FREE_RATIO`` + - ``20`` + - Integer + - Make the heap shrink aggressively and grow conservatively. See also `run-java-sh recommendations`_. + * - ``MEM_MAX_HEAP_FREE_RATIO`` + - ``40`` + - Integer + - Make the heap shrink aggressively and grow conservatively. See also `run-java-sh recommendations`_. + * - ``MEM_MAX_GC_PAUSE_MILLIS`` + - ``500`` + - Milliseconds + - Shorter pause times might result in lots of collections causing overhead without much gain. + This needs monitoring and tuning. It's a complex matter. + * - ``MEM_METASPACE_SIZE`` + - ``256m`` + - Size + - Initial size of memory reserved for class metadata, also used as trigger to run a garbage collection + once passing this size. + * - ``MEM_MAX_METASPACE_SIZE`` + - ``2g`` + - Size + - The metaspace's size will not outgrow this limit. + * - ``ENABLE_DUMPS`` + - ``0`` + - Bool, ``0|1`` + - If enabled, the argument(s) given in ``JVM_DUMP_ARG`` will be added to the JVM starting up. + This means it will enable dumping the heap to ``${DUMPS_DIR}`` (see below) in "out of memory" cases. + (You should back this location with disk space / ramdisk, so it does not write into an overlay filesystem!) + * - ``JVM_DUMPS_ARG`` + - [dump-option]_ + - String + - Can be fine tuned for more grained controls of dumping behaviour. + * - ``ENABLE_JMX`` + - ``1`` + - Bool, ``0|1`` + - Enable JMX - Payara enables this by default, hard to deactivate. + * - ``ENABLE_JDWP`` + - ``0`` + - Bool, ``0|1`` + - Enable the "Java Debug Wire Protocol" to attach a remote debugger to the JVM in this container. + Listens on port 9009 when enabled. Search the internet for numerous tutorials to use it. + * - ``DATAVERSE_HTTP_TIMEOUT`` + - ``900`` + - Seconds + - See :ref:`:ApplicationServerSettings` ``http.request-timeout-seconds``. + + *Note:* can also be set using any other `MicroProfile Config Sources`_ available via ``dataverse.http.timeout``. + + +.. [preboot] ``${CONFIG_DIR}/pre-boot-commands.asadmin`` +.. [postboot] ``${CONFIG_DIR}/post-boot-commands.asadmin`` +.. [dump-option] ``-XX:+HeapDumpOnOutOfMemoryError`` + + + +Locations ++++++++++ + +This environment variables represent certain locations and might be reused in your scripts etc. +These variables aren't meant to be reconfigurable and reflect state in the filesystem layout! + +.. list-table:: + :align: left + :width: 100 + :widths: 10 10 50 + :header-rows: 1 + + * - Env. variable + - Value + - Description + * - ``HOME_DIR`` + - ``/opt/payara`` + - Home base to Payara and the application + * - ``PAYARA_DIR`` + - ``${HOME_DIR}/appserver`` + - Installation directory of Payara server + * - ``SCRIPT_DIR`` + - ``${HOME_DIR}/scripts`` + - Any scripts like the container entrypoint, init scripts, etc + * - ``CONFIG_DIR`` + - ``${HOME_DIR}/config`` + - Payara Server configurations like pre/postboot command files go here + (Might be reused for Dataverse one day) + * - ``DEPLOY_DIR`` + - ``${HOME_DIR}/deployments`` + - Any EAR or WAR file, exploded WAR directory etc are autodeployed on start + * - ``DOCROOT_DIR`` + - ``/docroot`` + - Mount a volume here to store i18n language bundle files, sitemaps, images for Dataverse collections, logos, + custom themes and stylesheets, etc here. You might need to replicate this data or place on shared file storage. + * - ``SECRETS_DIR`` + - ``/secrets`` + - Mount secrets or other here, being picked up automatically by + `Directory Config Source `_. + See also various :doc:`../installation/config` options involving secrets. + * - ``DUMPS_DIR`` + - ``/dumps`` + - Default location where heap dumps will be stored (see above). + You should mount some storage here (disk or ephemeral). + * - ``DOMAIN_DIR`` + - ``${PAYARA_DIR}/glassfish`` ``/domains/${DOMAIN_NAME}`` + - Path to root of the Payara domain applications will be deployed into. Usually ``${DOMAIN_NAME}`` will be ``domain1``. + + + +Exposed Ports ++++++++++++++ + +The default ports that are exposed by this image are: + +- 8080 - HTTP listener +- 8181 - HTTPS listener +- 4848 - Admin Service HTTPS listener +- 8686 - JMX listener +- 9009 - "Java Debug Wire Protocol" port (when ``ENABLE_JDWP=1``) + + + +Hints ++++++ + +By default, ``domain1`` is enabled to use the ``G1GC`` garbage collector. + +For running a Java application within a Linux based container, the support for CGroups is essential. It has been +included and activated by default since Java 8u192, Java 11 LTS and later. If you are interested in more details, +you can read about those in a few places like https://developers.redhat.com/articles/2022/04/19/java-17-whats-new-openjdks-container-awareness, +https://www.eclipse.org/openj9/docs/xxusecontainersupport, etc. The other memory defaults are inspired +from `run-java-sh recommendations`_. + + +.. _Pre/postboot script docs: https://docs.payara.fish/community/docs/Technical%20Documentation/Payara%20Micro%20Documentation/Payara%20Micro%20Configuration%20and%20Management/Micro%20Management/Asadmin%20Commands/Pre%20and%20Post%20Boot%20Commands.html +.. _MicroProfile Config Sources: https://docs.payara.fish/community/docs/Technical%20Documentation/MicroProfile/Config/Overview.html +.. _run-java-sh recommendations: https://github.com/fabric8io-images/run-java-sh/blob/master/TUNING.md#recommandations \ No newline at end of file From 67db02ff0249720c47e3025820c30fb6d737ec83 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 24 Aug 2022 15:08:30 +0200 Subject: [PATCH 043/173] docs(ct-base): remove reference to not (yet) existing docs page --- doc/sphinx-guides/source/container/index.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/sphinx-guides/source/container/index.rst b/doc/sphinx-guides/source/container/index.rst index 1bf86f16f43..801ded7d0a5 100644 --- a/doc/sphinx-guides/source/container/index.rst +++ b/doc/sphinx-guides/source/container/index.rst @@ -6,7 +6,6 @@ Container Guide .. toctree:: base-image - app-image Running Dataverse software in containers is quite different than in a :doc:`classic installation <../installation/prep>`. From d5f80754e0ebf1ed56d34c1d7dbbe3d5fdc49b4a Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 24 Aug 2022 17:38:29 +0200 Subject: [PATCH 044/173] docs(ct-base): add Docker Hub Eclipse Temurin tag search example --- doc/sphinx-guides/source/container/base-image.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst index 4f441f79ad7..4333bf38d5c 100644 --- a/doc/sphinx-guides/source/container/base-image.rst +++ b/doc/sphinx-guides/source/container/base-image.rst @@ -43,6 +43,7 @@ Some additional notes, using Maven parameters to change the build and use ...: - ... a different Payara version: add ``-Dpayara.version=V.YYYY.R``. - | ... a different Temurin JRE version ``A``: add ``-Dtarget.java.version=A`` (i.e. ``11``, ``17``, ...). | *Note:* must resolve to an available Docker tag ``A-jre`` of Eclipse Temurin! + (See also `Docker Hub search example `_) - ... a different Java Distribution: add ``-Ddocker.buildArg.BASE_IMAGE="name:tag"`` with precise reference to an image available from local or remote (e. g. Docker Hub). From 5e61241a27229fdbe7ce6fb7e84c520b609fdb33 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 26 Aug 2022 18:26:30 +0200 Subject: [PATCH 045/173] style(ct-base): incorporate requested changes by @pdurbin - Change order of guides - Remove unnecessary quotes from IQSS - Add TOC to base image docs - Add flag again about community support only to base image docs --- doc/sphinx-guides/source/container/base-image.rst | 14 ++++++++++++-- doc/sphinx-guides/source/container/index.rst | 5 +++-- doc/sphinx-guides/source/index.rst | 2 +- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst index 4333bf38d5c..ac64323eeea 100644 --- a/doc/sphinx-guides/source/container/base-image.rst +++ b/doc/sphinx-guides/source/container/base-image.rst @@ -1,11 +1,21 @@ Application Base Image ====================== +.. contents:: |toctitle| + :local: + Within the main repository, you may find the base image's files at ``/modules/container-base``. This Maven module uses the `Maven Docker Plugin `_ to build and ship the image. -Contents -++++++++ +**NOTE: This image is created, maintained and supported by the Dataverse community on a best-effort basis.** +IQSS will not offer you support how to deploy or run it, please reach out to the community for help on using it. +You might be interested in taking a look at :doc:`../developers/containers`, linking you to some (community-based) +efforts. + + + +Image Contents +++++++++++++++ The base image provides: diff --git a/doc/sphinx-guides/source/container/index.rst b/doc/sphinx-guides/source/container/index.rst index 801ded7d0a5..f6c99bfc19e 100644 --- a/doc/sphinx-guides/source/container/index.rst +++ b/doc/sphinx-guides/source/container/index.rst @@ -13,8 +13,9 @@ Both approaches have pros and cons. These days (2022) containers are very often but there is an ever rising move for running applications in the cloud using container technology. **NOTE:** -**As the "Institute for Quantitative Social Sciences" at Harvard is running their installations in the classic -deployment way, the container support is mostly created and maintained by the Dataverse community.** +**As the Institute for Quantitative Social Sciences (IQSS) at Harvard is running their installations in the classic +deployment way, the container support is mostly created and maintained by the Dataverse community on a best-effort +basis.** This guide is *not* about installation on technology like Docker Swarm, Kubernetes, Rancher or other solutions to run containers in production. There is the `Dataverse on K8s project `_ for this diff --git a/doc/sphinx-guides/source/index.rst b/doc/sphinx-guides/source/index.rst index f15a973544d..cbfafb419ab 100755 --- a/doc/sphinx-guides/source/index.rst +++ b/doc/sphinx-guides/source/index.rst @@ -18,8 +18,8 @@ These documentation guides are for the |version| version of Dataverse. To find g api/index installation/index developers/index - style/index container/index + style/index How the Guides Are Organized ---------------------------- From a3a70998b9fcacc1a96e8357d459cba489425785 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 26 Aug 2022 18:29:30 +0200 Subject: [PATCH 046/173] feat(ct-base): make image names configurable and rename Add new Maven properties to choose a different Java base image and change the name of the target base image when people customize it. Also changes the build arg for the Java base image name. With this, the image name changes to follow the same convention as the Java base image. --- doc/sphinx-guides/source/container/base-image.rst | 4 +++- modules/container-base/pom.xml | 7 ++++--- modules/container-base/src/main/docker/Dockerfile | 4 ++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst index ac64323eeea..834381e6779 100644 --- a/doc/sphinx-guides/source/container/base-image.rst +++ b/doc/sphinx-guides/source/container/base-image.rst @@ -50,11 +50,13 @@ Or move to the module and execute: Some additional notes, using Maven parameters to change the build and use ...: +- | ... a different image name and tag: add ``-Dbase.image=name:tag``. + | *Note:* default is ``gdcc/base:${target.java.version}-jre`` - ... a different Payara version: add ``-Dpayara.version=V.YYYY.R``. - | ... a different Temurin JRE version ``A``: add ``-Dtarget.java.version=A`` (i.e. ``11``, ``17``, ...). | *Note:* must resolve to an available Docker tag ``A-jre`` of Eclipse Temurin! (See also `Docker Hub search example `_) -- ... a different Java Distribution: add ``-Ddocker.buildArg.BASE_IMAGE="name:tag"`` with precise reference to an +- ... a different Java Distribution: add ``-Djava.image="name:tag"`` with precise reference to an image available from local or remote (e. g. Docker Hub). diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml index 015ebba598d..f8e97bb4349 100644 --- a/modules/container-base/pom.xml +++ b/modules/container-base/pom.xml @@ -39,6 +39,8 @@ ct docker-build + gdcc/base:${target.java.version}-jre + eclipse-temurin:${target.java.version}-jre @@ -85,8 +87,7 @@ base - %g/base:jdk${target.java.version} - ${ct.registry} + ${base.image} @@ -96,7 +97,7 @@ Dockerfile - eclipse-temurin:${target.java.version}-jre + ${java.image} @ diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile index ba459607826..6fdc790a21a 100644 --- a/modules/container-base/src/main/docker/Dockerfile +++ b/modules/container-base/src/main/docker/Dockerfile @@ -20,8 +20,8 @@ # # Make the Java base image and version configurable (useful for trying newer Java versions and flavors) -ARG BASE_IMAGE="eclipse-temurin:11-jre" -FROM $BASE_IMAGE +ARG JAVA_IMAGE="eclipse-temurin:11-jre" +FROM $JAVA_IMAGE # Default payara ports to expose # 4848: admin console From 06d31fde25c3bfa812339c0afad94b7a83e92e59 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 26 Aug 2022 18:34:40 +0200 Subject: [PATCH 047/173] fix(ct-base): make container build use install not package goal By switching to `mvn install` instead of `mvn package`, we allow the main image carrying the application to declare a dependency on the container-base module (to make sure it get's built alongside, as we might want to change the Payara version!) This commits also adds the Maven install plugin to the parent POM for versioning plus to the container-base POM for having the target available. (This is a necessary workaround for a Maven Docker Plugin shortcoming.) --- .../source/container/base-image.rst | 4 ++-- modules/container-base/pom.xml | 19 +++++++++++++++++++ modules/dataverse-parent/pom.xml | 6 ++++++ 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst index 834381e6779..585fe1184e7 100644 --- a/doc/sphinx-guides/source/container/base-image.rst +++ b/doc/sphinx-guides/source/container/base-image.rst @@ -42,11 +42,11 @@ Assuming you have `Docker `_, `Docker D Simply execute the Maven modules packaging target with activated "container profile. Either from the projects Git root: -``mvn -Pct -f modules/container-base package`` +``mvn -Pct -f modules/container-base install`` Or move to the module and execute: -``cd modules/container-base && mvn -Pct package`` +``cd modules/container-base && mvn -Pct install`` Some additional notes, using Maven parameters to change the build and use ...: diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml index f8e97bb4349..0e8f24a781b 100644 --- a/modules/container-base/pom.xml +++ b/modules/container-base/pom.xml @@ -108,6 +108,25 @@ + + + + maven-install-plugin + + + default-install + install + + install + + + + diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index eaa09b61bd7..411ce85b2fa 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -178,6 +178,7 @@ 3.2.2 3.3.2 3.2.0 + 3.0.0-M1 3.0.0-M5 3.0.0-M5 3.3.0 @@ -226,6 +227,11 @@ maven-dependency-plugin ${maven-dependency-plugin.version} + + org.apache.maven.plugins + maven-install-plugin + ${maven-install-plugin.version} + org.apache.maven.plugins maven-surefire-plugin From 98ad9361843519b3f904ecc3df5d7b877802c30a Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 26 Aug 2022 18:37:28 +0200 Subject: [PATCH 048/173] fix(ct-base): flatten container-base POM By using the flattening POM plugin, the installed POM will not carry references to the dataverse-parent module. This reference is a) unnecessary and b) troublesome because of the ${revision} hack. (And we do not provide it as a dependency from Central/...) --- modules/container-base/.gitignore | 1 + modules/container-base/pom.xml | 36 +++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 modules/container-base/.gitignore diff --git a/modules/container-base/.gitignore b/modules/container-base/.gitignore new file mode 100644 index 00000000000..d75620abf70 --- /dev/null +++ b/modules/container-base/.gitignore @@ -0,0 +1 @@ +.flattened-pom.xml diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml index 0e8f24a781b..cee3989661a 100644 --- a/modules/container-base/pom.xml +++ b/modules/container-base/pom.xml @@ -108,6 +108,42 @@ + + + + org.codehaus.mojo + flatten-maven-plugin + 1.2.7 + + true + oss + + remove + remove + + + + + + flatten + process-resources + + flatten + + + + + flatten.clean + clean + + clean + + + + 0.40.2 - ghcr.io From 64f84ea461d0fc8d1e4147b1bdcb8b86c2bafcd0 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 31 Aug 2022 01:56:51 +0200 Subject: [PATCH 063/173] style(ct-base): make up base image name from tag and add default With defaulting to develop, we rest on using any build of the image during experimentation etc to go with a (local) develop tag. Removing the Java version from the tag makes it easier to use and reflects the nature of it. It aligns image builds with the release schema of the actual application while still allowing for experiments and having different sources of truth for released and develop code. --- modules/container-base/pom.xml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml index 12eb3b137ff..67e2c2f9911 100644 --- a/modules/container-base/pom.xml +++ b/modules/container-base/pom.xml @@ -39,7 +39,8 @@ ct docker-build - gdcc/base:${target.java.version}-jre + gdcc/base:${base.image.tag} + develop eclipse-temurin:${target.java.version}-jre 1000 1000 From 5a986af6cc7651fd43ec5a4207349dab17b6651e Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 31 Aug 2022 01:57:16 +0200 Subject: [PATCH 064/173] chore(deps): make container profile use Payara 5.2022.3 --- modules/dataverse-parent/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index 7a3b71fb68c..86b46817635 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -340,7 +340,7 @@ See also: https://github.com/IQSS/dataverse/issues/8048 See also: https://github.com/payara/Payara/issues/5368 --> - 5.2022.2 + 5.2022.3 From 65f9d6356b8caca3ddd54e323c838e6b9749f3cc Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 31 Aug 2022 02:02:32 +0200 Subject: [PATCH 065/173] feat(ct-base): enable base image pushes for master and develop branch - Make pushes to develop or master branch release a container image to Docker Hub by default (can be changed / extended). - Defaulting to the develop tag by default makes it more reusable for depending workflows based on pull requests. - Moving all multi-arch building to only happen on pushes, as it will be done during push/deploy phase only and those need credentials only avail in git push context running at repo owner of CI action. - Removing the Java version matrix parameter, too - we are gonna stick with what is default for releasing the images as they are meant to be a somewhat reliable base. It's still open for experiments. --- .github/workflows/container_base_push.yml | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index fc23b30d8ad..82c7a376ae0 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -5,16 +5,21 @@ on: push: branches: - 'develop' + - 'master' paths: - 'modules/container-base/**' - 'modules/dataverse-parent/pom.xml' pull_request: branches: - 'develop' + - 'master' paths: - 'modules/container-base/**' - 'modules/dataverse-parent/pom.xml' +env: + IMAGE_TAG: develop + REGISTRY: docker.io jobs: build: @@ -45,14 +50,21 @@ jobs: key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} restore-keys: ${{ runner.os }}-m2 - - name: Set up QEMU for multi-arch builds - uses: docker/setup-qemu-action@v2 - - name: Build base container image - run: mvn -f modules/container-base -Pct package -Dtarget.java.version=${{ matrix.jdk }} + - name: Build base container image with local architecture + run: mvn -f modules/container-base -Pct package - if: ${{ github.event_name == 'push' }} # run only if this is a push - PRs have no access to secrets name: Log in to the Container registry uses: docker/login-action@v1 with: username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} \ No newline at end of file + password: ${{ secrets.DOCKERHUB_TOKEN }} + - if: ${{ github.event_name == 'push' }} # run only if this is a push - multi-arch makes no sense with PR + name: Set up QEMU for multi-arch builds + uses: docker/setup-qemu-action@v2 + - name: Re-set image tag based on branch + if: ${{ github.ref == 'master' }} + run: echo "IMAGE_TAG=release" + - if: ${{ github.event_name == 'push' }} # run only if this is a push - tag push will only succeed in upstream + name: Deploy multi-arch base container image to Docker Hub + run: mvn -f modules/container-base -Pct deploy -Dbase.image.tag=${{ env.IMAGE_TAG }} -Ddocker.registry=${{ env.REGISTRY }} From 8f39ef2c6e564af53756895a0115e0d58f24d602 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 14 Sep 2022 16:42:29 +0200 Subject: [PATCH 066/173] style(ct-base): upgrade Dockerfile with heredocs #8932 Instead of using "&& \" style continuation of a RUN layer, newer Docker versions (since 2021) allow usage of heredocs. Also move some ARG to more suitable places --- .../container-base/src/main/docker/Dockerfile | 158 ++++++++++-------- 1 file changed, 85 insertions(+), 73 deletions(-) diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile index caec4ee6619..68b9da13c67 100644 --- a/modules/container-base/src/main/docker/Dockerfile +++ b/modules/container-base/src/main/docker/Dockerfile @@ -67,43 +67,47 @@ ENV PATH="${PATH}:${PAYARA_DIR}/bin" \ ENABLE_JDWP=0 \ ENABLE_RELOAD=0 -ARG JATTACH_VERSION="v2.1" -ARG JATTACH_CHECKSUM="07885fdc782e02e7302c6d190f54c3930afa10a38140365adf54076ec1086a8e" -ARG PKGS="jq imagemagick curl unzip wget acl dirmngr gpg lsof procps netcat tini" -ARG ASADMIN="${PAYARA_DIR}/bin/asadmin --user=${ADMIN_USER} --passwordfile=${PASSWORD_FILE}" - ### PART 1: SYSTEM ### ARG UID=1000 ARG GID=1000 USER root WORKDIR / SHELL ["/bin/bash", "-euo", "pipefail", "-c"] -RUN true && \ +RUN <> /tmp/password-change-file.txt && \ - echo "AS_ADMIN_PASSWORD=${ADMIN_PASSWORD}" >> ${PASSWORD_FILE} && \ - asadmin --user=${ADMIN_USER} --passwordfile=/tmp/password-change-file.txt change-admin-password --domain_name=${DOMAIN_NAME} && \ + echo "AS_ADMIN_PASSWORD=" > /tmp/password-change-file.txt + echo "AS_ADMIN_NEWPASSWORD=${ADMIN_PASSWORD}" >> /tmp/password-change-file.txt + echo "AS_ADMIN_PASSWORD=${ADMIN_PASSWORD}" >> ${PASSWORD_FILE} + asadmin --user=${ADMIN_USER} --passwordfile=/tmp/password-change-file.txt change-admin-password --domain_name=${DOMAIN_NAME} # Start domain for configuration - ${ASADMIN} start-domain ${DOMAIN_NAME} && \ + ${ASADMIN} start-domain ${DOMAIN_NAME} # Allow access to admin with password only - ${ASADMIN} enable-secure-admin && \ + ${ASADMIN} enable-secure-admin + ### CONTAINER USAGE ENABLEMENT # List & delete memory settings from domain - for MEMORY_JVM_OPTION in $(${ASADMIN} list-jvm-options | grep "Xm[sx]\|Xss\|NewRatio"); \ - do \ - ${ASADMIN} delete-jvm-options $(echo $MEMORY_JVM_OPTION | sed -e 's/:/\\:/g'); \ - done && \ + for MEMORY_JVM_OPTION in $(${ASADMIN} list-jvm-options | grep "Xm[sx]\|Xss\|NewRatio"); + do + ${ASADMIN} delete-jvm-options $(echo $MEMORY_JVM_OPTION | sed -e 's/:/\\:/g'); + done # Tweak memory settings for containers - ${ASADMIN} create-jvm-options "-XX\:+UseContainerSupport" && \ - ${ASADMIN} create-jvm-options "-XX\:MaxRAMPercentage=\${ENV=MEM_MAX_RAM_PERCENTAGE}" && \ - ${ASADMIN} create-jvm-options "-Xss\${ENV=MEM_XSS}" && \ - ${ASADMIN} create-jvm-options "-XX\:MinHeapFreeRatio=\${ENV=MEM_MIN_HEAP_FREE_RATIO}" && \ - ${ASADMIN} create-jvm-options "-XX\:MaxHeapFreeRatio=\${ENV=MEM_MAX_HEAP_FREE_RATIO}" && \ - ${ASADMIN} create-jvm-options "-XX\:HeapDumpPath=\${ENV=DUMPS_DIR}" && \ + ${ASADMIN} create-jvm-options "-XX\:+UseContainerSupport" + ${ASADMIN} create-jvm-options "-XX\:MaxRAMPercentage=\${ENV=MEM_MAX_RAM_PERCENTAGE}" + ${ASADMIN} create-jvm-options "-Xss\${ENV=MEM_XSS}" + ${ASADMIN} create-jvm-options "-XX\:MinHeapFreeRatio=\${ENV=MEM_MIN_HEAP_FREE_RATIO}" + ${ASADMIN} create-jvm-options "-XX\:MaxHeapFreeRatio=\${ENV=MEM_MAX_HEAP_FREE_RATIO}" + ${ASADMIN} create-jvm-options "-XX\:HeapDumpPath=\${ENV=DUMPS_DIR}" # Set logging to console only for containers - ${ASADMIN} set-log-attributes com.sun.enterprise.server.logging.GFFileHandler.logtoFile=false && \ + ${ASADMIN} set-log-attributes com.sun.enterprise.server.logging.GFFileHandler.logtoFile=false \ + ### PRODUCTION READINESS - ${ASADMIN} create-jvm-options '-XX\:+UseG1GC' && \ - ${ASADMIN} create-jvm-options '-XX\:+UseStringDeduplication' && \ - ${ASADMIN} create-jvm-options '-XX\:+DisableExplicitGC' && \ - ${ASADMIN} create-jvm-options '-XX\:MaxGCPauseMillis=${ENV=MEM_MAX_GC_PAUSE_MILLIS}' && \ - ${ASADMIN} create-jvm-options '-XX\:MetaspaceSize=${ENV=MEM_METASPACE_SIZE}' && \ - ${ASADMIN} create-jvm-options '-XX\:MaxMetaspaceSize=${ENV=MEM_MAX_METASPACE_SIZE}' && \ - ${ASADMIN} create-jvm-options '-XX\:+IgnoreUnrecognizedVMOptions' && \ + ${ASADMIN} create-jvm-options '-XX\:+UseG1GC' + ${ASADMIN} create-jvm-options '-XX\:+UseStringDeduplication' + ${ASADMIN} create-jvm-options '-XX\:+DisableExplicitGC' + ${ASADMIN} create-jvm-options '-XX\:MaxGCPauseMillis=${ENV=MEM_MAX_GC_PAUSE_MILLIS}' + ${ASADMIN} create-jvm-options '-XX\:MetaspaceSize=${ENV=MEM_METASPACE_SIZE}' + ${ASADMIN} create-jvm-options '-XX\:MaxMetaspaceSize=${ENV=MEM_MAX_METASPACE_SIZE}' + ${ASADMIN} create-jvm-options '-XX\:+IgnoreUnrecognizedVMOptions' # Disable autodeploy and hot reload - ${ASADMIN} set configs.config.server-config.admin-service.das-config.dynamic-reload-enabled="false" && \ - ${ASADMIN} set configs.config.server-config.admin-service.das-config.autodeploy-enabled="false" && \ + ${ASADMIN} set configs.config.server-config.admin-service.das-config.dynamic-reload-enabled="false" + ${ASADMIN} set configs.config.server-config.admin-service.das-config.autodeploy-enabled="false" # Enlarge thread pools - ${ASADMIN} set server-config.thread-pools.thread-pool.http-thread-pool.max-thread-pool-size="50" && \ - ${ASADMIN} set server-config.thread-pools.thread-pool.http-thread-pool.max-queue-size="" && \ - ${ASADMIN} set default-config.thread-pools.thread-pool.thread-pool-1.max-thread-pool-size="250" && \ + ${ASADMIN} set server-config.thread-pools.thread-pool.http-thread-pool.max-thread-pool-size="50" + ${ASADMIN} set server-config.thread-pools.thread-pool.http-thread-pool.max-queue-size="" + ${ASADMIN} set default-config.thread-pools.thread-pool.thread-pool-1.max-thread-pool-size="250" # Enable file caching - ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-1.http.file-cache.enabled="true" && \ - ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-2.http.file-cache.enabled="true" && \ - ${ASADMIN} set default-config.network-config.protocols.protocol.http-listener-1.http.file-cache.enabled="true" && \ - ${ASADMIN} set default-config.network-config.protocols.protocol.http-listener-2.http.file-cache.enabled="true" && \ + ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-1.http.file-cache.enabled="true" + ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-2.http.file-cache.enabled="true" + ${ASADMIN} set default-config.network-config.protocols.protocol.http-listener-1.http.file-cache.enabled="true" + ${ASADMIN} set default-config.network-config.protocols.protocol.http-listener-2.http.file-cache.enabled="true" # Disable the HTTPS listener (we are always fronting our appservers with a reverse proxy handling SSL) - ${ASADMIN} set configs.config.server-config.network-config.network-listeners.network-listener.http-listener-2.enabled="false" && \ - # Enlarge and tune EJB pools (cannot do this for server-config as set does not create new entries) \ - ${ASADMIN} set default-config.ejb-container.pool-resize-quantity="2" && \ - ${ASADMIN} set default-config.ejb-container.max-pool-size="128" && \ - ${ASADMIN} set default-config.ejb-container.steady-pool-size="10" && \ + ${ASADMIN} set configs.config.server-config.network-config.network-listeners.network-listener.http-listener-2.enabled="false" + # Enlarge and tune EJB pools (cannot do this for server-config as set does not create new entries) + ${ASADMIN} set default-config.ejb-container.pool-resize-quantity="2" + ${ASADMIN} set default-config.ejb-container.max-pool-size="128" + ${ASADMIN} set default-config.ejb-container.steady-pool-size="10" # Misc settings - ${ASADMIN} create-system-properties fish.payara.classloading.delegate="false" && \ - ${ASADMIN} create-system-properties jersey.config.client.readTimeout="300000" && \ - ${ASADMIN} create-system-properties jersey.config.client.connectTimeout="300000" && \ + ${ASADMIN} create-system-properties fish.payara.classloading.delegate="false" + ${ASADMIN} create-system-properties jersey.config.client.readTimeout="300000" + ${ASADMIN} create-system-properties jersey.config.client.connectTimeout="300000" \ + ### DATAVERSE APPLICATION SPECIFICS # Configure the MicroProfile directory config source to point to /secrets - ${ASADMIN} set-config-dir --directory="${SECRETS_DIR}" && \ + ${ASADMIN} set-config-dir --directory="${SECRETS_DIR}" # Make request timeouts configurable via MPCONFIG (default to 900 secs = 15 min) - ${ASADMIN} set 'server-config.network-config.protocols.protocol.http-listener-1.http.request-timeout-seconds=${MPCONFIG=dataverse.http.timeout:900}' && \ + ${ASADMIN} set 'server-config.network-config.protocols.protocol.http-listener-1.http.request-timeout-seconds=${MPCONFIG=dataverse.http.timeout:900}' # TODO: what of the below 3 items can be deleted for container usage? - ${ASADMIN} create-network-listener --protocol=http-listener-1 --listenerport=8009 --jkenabled=true jk-connector && \ - ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-1.http.comet-support-enabled=true && \ - ${ASADMIN} create-system-properties javax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl && \ + ${ASADMIN} create-network-listener --protocol=http-listener-1 --listenerport=8009 --jkenabled=true jk-connector + ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-1.http.comet-support-enabled=true + ${ASADMIN} create-system-properties javax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl # Always disable phoning home... - ${ASADMIN} disable-phone-home && \ + ${ASADMIN} disable-phone-home \ + ### CLEANUP # Stop domain - ${ASADMIN} stop-domain "${DOMAIN_NAME}" && \ - # Disable JSP servlet dynamic reloads \ - sed -i 's#org.apache.jasper.servlet.JspServlet#org.apache.jasper.servlet.JspServlet\n \n development\n false\n \n \n genStrAsCharArray\n true\n #' "${DOMAIN_DIR}/config/default-web.xml" && \ + ${ASADMIN} stop-domain "${DOMAIN_NAME}" + # Disable JSP servlet dynamic reloads + sed -i 's#org.apache.jasper.servlet.JspServlet#org.apache.jasper.servlet.JspServlet\n \n development\n false\n \n \n genStrAsCharArray\n true\n #' "${DOMAIN_DIR}/config/default-web.xml" # Cleanup old CA certificates to avoid unnecessary log clutter during startup - ${SCRIPT_DIR}/removeExpiredCaCerts.sh && \ + ${SCRIPT_DIR}/removeExpiredCaCerts.sh # Delete generated files rm -rf \ "/tmp/password-change-file.txt" \ "${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/osgi-cache" \ "${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/logs" +EOF # Make docroot of Payara reside in higher level directory for easier targeting # Due to gdcc/dataverse-kubernetes#177: create the generated pathes so they are # writeable by us. TBR with gdcc/dataverse-kubernetes#178. -RUN rm -rf "${DOMAIN_DIR}"/docroot && \ - ln -s "${DOCROOT_DIR}" "${DOMAIN_DIR}"/docroot && \ +RUN < Date: Wed, 14 Sep 2022 21:18:28 +0200 Subject: [PATCH 067/173] feat,fix(ct-base): add extension point for background script #8932 By moving from tini to dumb-init, we can offer a new extension point: if an application image extending this base image provides an executable script at ${SCRIPT_DIR}/startInBackground.sh, it will be executed after the init scripts and in parallel to the application server. By adding ${SCRIPT_DIR} to $PATH, we can now also skip variable expansion, fixing a bug: formerly, the "exec" in entrypoint.sh and startInForeground.sh where not replacing the shell properly. The switch to dumb-init makes sure signals will be transferred also to any background processes! --- .../container-base/src/main/docker/Dockerfile | 10 +++++----- .../src/main/docker/scripts/entrypoint.sh | 17 ++++++++++++++++- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile index 68b9da13c67..c56abb975e2 100644 --- a/modules/container-base/src/main/docker/Dockerfile +++ b/modules/container-base/src/main/docker/Dockerfile @@ -46,7 +46,7 @@ ENV PAYARA_DIR="${HOME_DIR}/appserver" \ ADMIN_PASSWORD="admin" \ DOMAIN_NAME="domain1" \ PAYARA_ARGS="" -ENV PATH="${PATH}:${PAYARA_DIR}/bin" \ +ENV PATH="${PATH}:${PAYARA_DIR}/bin:${SCRIPT_DIR}" \ DOMAIN_DIR="${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}" \ DEPLOY_PROPS="" \ PREBOOT_COMMANDS="${CONFIG_DIR}/pre-boot-commands.asadmin" \ @@ -88,7 +88,7 @@ EOF ARG JATTACH_VERSION="v2.1" ARG JATTACH_CHECKSUM="07885fdc782e02e7302c6d190f54c3930afa10a38140365adf54076ec1086a8e" -ARG PKGS="jq imagemagick curl unzip wget acl dirmngr gpg lsof procps netcat tini" +ARG PKGS="jq imagemagick curl unzip wget acl dirmngr gpg lsof procps netcat dumb-init" # Installing the packages in an extra container layer for better caching RUN < Date: Wed, 14 Sep 2022 21:32:51 +0200 Subject: [PATCH 068/173] docs(ct-base): document startInBackground.sh #8932 --- .../source/container/base-image.rst | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst index 524ef8a7fbe..3f7b3b46c85 100644 --- a/doc/sphinx-guides/source/container/base-image.rst +++ b/doc/sphinx-guides/source/container/base-image.rst @@ -23,7 +23,8 @@ The base image provides: - `Payara Community Application Server `_ - CLI tools necessary to run Dataverse (i. e. ``curl`` or ``jq`` - see also :doc:`../installation/prerequisites` in Installation Guide) - Linux tools for analysis, monitoring and so on -- `Jattach `_ +- `Jattach `__ (attach to running JVM) +- `dumb-init `__ (see :ref:`below ` for details) This image is created as a "multi-arch image", supporting the most common architectures Dataverse usually runs on: AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2). @@ -246,6 +247,22 @@ its sources plus uncached scheduled nightly builds to make sure security updates Note: for the Github Action to be able to push to Docker Hub, two repository secrets (DOCKERHUB_USERNAME, DOCKERHUB_TOKEN) have been added by IQSS admins to their repository. +.. _base-entrypoint: + +Entry & Extension Points +++++++++++++++++++++++++ + +The entrypoint shell script provided by this base image will by default ensure to: + +- Run any scripts named ``${SCRIPT_DIR}/init_*`` or in ``${SCRIPT_DIR}/init.d/*`` directory for initialization + **before** the application server starts. +- Run an executable script ``${SCRIPT_DIR}/startInBackground.sh`` in the background - if present. +- Run the application server startup scripting in foreground (``${SCRIPT_DIR}/startInForeground.sh``). + +If you need to create some scripting that runs in parallel under supervision of `dumb-init `_, +e.g. to wait for the application to deploy before executing something, this is your point of extension: simply provide +the ``${SCRIPT_DIR}/startInBackground.sh`` executable script with your application image. + Other Hints From f8bf73479708a0d1cfb6882db9a118e12d70d34d Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 14 Sep 2022 21:50:06 +0200 Subject: [PATCH 069/173] ci(shellcheck,shellspec): split ShellCheck and ShellSpec To avoid unnecessary Shellspec runs for scripts that have no such tests, branch out the Shellcheck part of it into different workflow. Also make "bash" explicit as the container base image using an "unknown shebang" via dumb-init, but it's simply bash. --- .github/workflows/shellcheck.yml | 24 ++++++++++++++++++++++++ .github/workflows/shellspec.yml | 14 -------------- 2 files changed, 24 insertions(+), 14 deletions(-) create mode 100644 .github/workflows/shellcheck.yml diff --git a/.github/workflows/shellcheck.yml b/.github/workflows/shellcheck.yml new file mode 100644 index 00000000000..2d910f54127 --- /dev/null +++ b/.github/workflows/shellcheck.yml @@ -0,0 +1,24 @@ +name: "Shellcheck" +on: + push: + paths: + - conf/solr/** + - modules/container-base/** + pull_request: + paths: + - conf/solr/** + - modules/container-base/** +jobs: + shellcheck: + name: Shellcheck + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: shellcheck + uses: reviewdog/action-shellcheck@v1 + with: + github_token: ${{ secrets.github_token }} + reporter: github-pr-review # Change reporter. + fail_on_error: true + # Container base image uses dumb-init shebang, so nail to using bash + shellcheck_flags: "--shell=bash --external-sources" \ No newline at end of file diff --git a/.github/workflows/shellspec.yml b/.github/workflows/shellspec.yml index 2b127a7be5c..5c251cfc897 100644 --- a/.github/workflows/shellspec.yml +++ b/.github/workflows/shellspec.yml @@ -4,29 +4,15 @@ on: paths: - tests/shell/** - conf/solr/** - - modules/container-base/** # add more when more specs are written relying on data pull_request: paths: - tests/shell/** - conf/solr/** - - modules/container-base/** # add more when more specs are written relying on data env: SHELLSPEC_VERSION: 0.28.1 jobs: - shellcheck: - name: Shellcheck - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: shellcheck - uses: reviewdog/action-shellcheck@v1 - with: - github_token: ${{ secrets.github_token }} - reporter: github-pr-review # Change reporter. - fail_on_error: true - exclude: "./tests/shell/*" shellspec-ubuntu: name: "Ubuntu" runs-on: ubuntu-latest From 626b4951cfbf163895ce75e605b4daec455e0aae Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 20 Sep 2022 00:22:38 +0200 Subject: [PATCH 070/173] docs(ct-base): clarify support image tags #8932 Adding notes about the image tags produced by the community for reuse in the community. Document final tagging strategy, using the branch name (develop/main) instead of the Java version or sth. Reshape the automated builds and publishing part to be included in the supported tags and build instructions section to reduce text complexity and group matching parts together. --- .../source/container/base-image.rst | 40 +++++++++++++------ 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst index 3f7b3b46c85..ea54ecbebd2 100644 --- a/doc/sphinx-guides/source/container/base-image.rst +++ b/doc/sphinx-guides/source/container/base-image.rst @@ -12,6 +12,17 @@ IQSS will not offer you support how to deploy or run it, please reach out to the You might be interested in taking a look at :doc:`../developers/containers`, linking you to some (community-based) efforts. +Supported Image Tags +++++++++++++++++++++ + +This image is sourced within the main upstream code repository of the Dataverse software. Development and maintenance +happens there (again, by the community). Community supported image tags are based on the two most important branches: + +- ``develop`` representing the unstable state of affairs in Dataverse's development branch + (`Dockerfile `__) +- ``release`` representing the latest stable release in Dataverse's main branch + (`Dockerfile `__) + Image Contents @@ -51,8 +62,12 @@ Or move to the module and execute: Some additional notes, using Maven parameters to change the build and use ...: +- | ... a different tag only: add ``-Dbase.image.tag=tag``. + | *Note:* default is ``develop`` - | ... a different image name and tag: add ``-Dbase.image=name:tag``. - | *Note:* default is ``gdcc/base:${target.java.version}-jre`` + | *Note:* default is ``gdcc/base:${base.image.tag}`` +- ... a different image registry than *Docker Hub*: add ``-Ddocker.registry=registry.example.org`` (see also + `DMP docs on registries `__) - ... a different Payara version: add ``-Dpayara.version=V.YYYY.R``. - | ... a different Temurin JRE version ``A``: add ``-Dtarget.java.version=A`` (i.e. ``11``, ``17``, ...). | *Note:* must resolve to an available Docker tag ``A-jre`` of Eclipse Temurin! @@ -61,6 +76,17 @@ Some additional notes, using Maven parameters to change the build and use ...: image available from local or remote (e. g. Docker Hub). - ... a different UID/GID for the ``payara`` user/group: add ``-Dbase.image.uid=1234`` (or ``.gid``) +Automated Builds & Publishing +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To make reusing most simple, the image is built with a Github Action within the IQSS repository and then pushed +to `Docker Hub gdcc/base repository `_. It is built and pushed on every edit to +its sources plus uncached scheduled nightly builds to make sure security updates are finding their way in. + +*Note:* For the Github Action to be able to push to Docker Hub, two repository secrets +(DOCKERHUB_USERNAME, DOCKERHUB_TOKEN) have been added by IQSS admins to their repository. + + Tunables ++++++++ @@ -234,18 +260,6 @@ The HTTPS listener (on port 8181) becomes deactivated during the build, as we wi application server and handle SSL/TLS termination at this point. Save the memory and some CPU cycles! -Publishing and Updates -++++++++++++++++++++++ - -This image is sourced within the main upstream code repository of the Dataverse software. Development and maintenance -happens there (again, by the community). - -To make reusing most simple, the image is built with a Github Action within the IQSS repository and then pushed -to `Docker Hub gdcc/base repository `_. It is built and pushed on every edit to -its sources plus uncached scheduled nightly builds to make sure security updates are finding their way in. - -Note: for the Github Action to be able to push to Docker Hub, two repository secrets -(DOCKERHUB_USERNAME, DOCKERHUB_TOKEN) have been added by IQSS admins to their repository. .. _base-entrypoint: From 77592113f310d314d7de11b372a60cf3b4e08600 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 20 Sep 2022 00:27:06 +0200 Subject: [PATCH 071/173] style,docs(ct-base): small word adjusts for some build options --- doc/sphinx-guides/source/container/base-image.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst index ea54ecbebd2..3e83af23bfb 100644 --- a/doc/sphinx-guides/source/container/base-image.rst +++ b/doc/sphinx-guides/source/container/base-image.rst @@ -70,10 +70,10 @@ Some additional notes, using Maven parameters to change the build and use ...: `DMP docs on registries `__) - ... a different Payara version: add ``-Dpayara.version=V.YYYY.R``. - | ... a different Temurin JRE version ``A``: add ``-Dtarget.java.version=A`` (i.e. ``11``, ``17``, ...). - | *Note:* must resolve to an available Docker tag ``A-jre`` of Eclipse Temurin! + | *Note:* must resolve to an available image tag ``A-jre`` of Eclipse Temurin! (See also `Docker Hub search example `_) - ... a different Java Distribution: add ``-Djava.image="name:tag"`` with precise reference to an - image available from local or remote (e. g. Docker Hub). + image available local or remote. - ... a different UID/GID for the ``payara`` user/group: add ``-Dbase.image.uid=1234`` (or ``.gid``) Automated Builds & Publishing From 2141bcafae5fea8ac2414a0aecede81b988a7306 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 20 Sep 2022 00:48:16 +0200 Subject: [PATCH 072/173] docs(ct-base): add notes about multiarch builds #8932 Addin description on requirements to build cross platform added as subsection of the build instructions seemed valuable. --- .../source/container/base-image.rst | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst index 3e83af23bfb..41d88c97e2d 100644 --- a/doc/sphinx-guides/source/container/base-image.rst +++ b/doc/sphinx-guides/source/container/base-image.rst @@ -37,8 +37,7 @@ The base image provides: - `Jattach `__ (attach to running JVM) - `dumb-init `__ (see :ref:`below ` for details) -This image is created as a "multi-arch image", supporting the most common architectures Dataverse usually runs on: -AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2). +This image is created as a "multi-arch image", see :ref:`below `. It inherits being built on an Ubuntu environment from the upstream `base image of Eclipse Temurin `_. @@ -86,6 +85,24 @@ its sources plus uncached scheduled nightly builds to make sure security updates *Note:* For the Github Action to be able to push to Docker Hub, two repository secrets (DOCKERHUB_USERNAME, DOCKERHUB_TOKEN) have been added by IQSS admins to their repository. +.. _base-multiarch: + +Processor Architecture and Multiarch +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This image is created as a "multi-arch image", supporting the most common architectures Dataverse usually runs on: +AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2), by using Maven Docker Plugin's *BuildX* mode. + +Building the image via ``mvn -Pct package`` or ``mvn -Pct install`` as above will only build for the architecture of +the Docker maschine's CPU. + +Only ``mvn -Pct deploy`` will trigger building on all enabled architectures. +Yet, to enable building with non-native code on your build machine, you will need to setup a cross-platform builder. + +On Linux, you should install `qemu-user-static `__ (preferably via +your package management) on the host and run ``docker run --rm --privileged multiarch/qemu-user-static --reset -p yes`` +to enable that builder. The Docker plugin will setup everything else for you. + Tunables @@ -290,8 +307,6 @@ you can read about those in a few places like https://developers.redhat.com/arti https://www.eclipse.org/openj9/docs/xxusecontainersupport, etc. The other memory defaults are inspired from `run-java-sh recommendations`_. -*Note: the build process used the newer ``buildx`` feature of Docker to provide multiarch images.* - .. _Pre/postboot script docs: https://docs.payara.fish/community/docs/Technical%20Documentation/Payara%20Micro%20Documentation/Payara%20Micro%20Configuration%20and%20Management/Micro%20Management/Asadmin%20Commands/Pre%20and%20Post%20Boot%20Commands.html From 276b3b5159471bd44cff99bfb1b9e6b279634b4a Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 20 Sep 2022 10:49:19 +0200 Subject: [PATCH 073/173] feat(ct-base): add wait-for script to image Many scripts shipped with an app image might rely on the availability of an external service, API or simply the database or search index. Adding a standard script here to make it easier to wait for their availability. --- doc/sphinx-guides/source/container/base-image.rst | 1 + modules/container-base/src/main/docker/Dockerfile | 11 +++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst index 41d88c97e2d..197f4175538 100644 --- a/doc/sphinx-guides/source/container/base-image.rst +++ b/doc/sphinx-guides/source/container/base-image.rst @@ -35,6 +35,7 @@ The base image provides: - CLI tools necessary to run Dataverse (i. e. ``curl`` or ``jq`` - see also :doc:`../installation/prerequisites` in Installation Guide) - Linux tools for analysis, monitoring and so on - `Jattach `__ (attach to running JVM) +- `wait-for `__ (tool to "wait for" a service to be available) - `dumb-init `__ (see :ref:`below ` for details) This image is created as a "multi-arch image", see :ref:`below `. diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile index c56abb975e2..cafeb2ffb59 100644 --- a/modules/container-base/src/main/docker/Dockerfile +++ b/modules/container-base/src/main/docker/Dockerfile @@ -88,6 +88,8 @@ EOF ARG JATTACH_VERSION="v2.1" ARG JATTACH_CHECKSUM="07885fdc782e02e7302c6d190f54c3930afa10a38140365adf54076ec1086a8e" +ARG WAIT_FOR_VERSION="v2.2.3" +ARG WAIT_FOR_CHECKSUM="70271181be69cd2c7265b2746f97fccfd7e8aa1059894138a775369c23589ff4" ARG PKGS="jq imagemagick curl unzip wget acl dirmngr gpg lsof procps netcat dumb-init" # Installing the packages in an extra container layer for better caching @@ -95,12 +97,17 @@ RUN < Date: Thu, 22 Sep 2022 14:17:58 +0200 Subject: [PATCH 074/173] chore(deps): remove Payara version from Maven ct profile With the merge of #8949 the custom version is no longer necessary. --- modules/dataverse-parent/pom.xml | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index 9326ba71263..ce4dfb56257 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -334,13 +334,7 @@ ct - - 5.2022.3 + From 3ea4e92b48452c3785f3e7c60df4acdf40f8bd1e Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 30 Sep 2022 09:21:16 -0400 Subject: [PATCH 075/173] todo is done --- src/main/java/edu/harvard/iq/dataverse/api/Files.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java index 9dc0c3be524..d1ecd2d8824 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java @@ -231,7 +231,6 @@ public Response replaceFileInDataset( if (null == contentDispositionHeader) { if (optionalFileParams.hasStorageIdentifier()) { newStorageIdentifier = optionalFileParams.getStorageIdentifier(); - // ToDo - check that storageIdentifier is valid if (optionalFileParams.hasFileName()) { newFilename = optionalFileParams.getFileName(); if (optionalFileParams.hasMimetype()) { From cb5007a6a5ad46e27dce34dbd5c2bd16bdc9044e Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 30 Sep 2022 09:21:37 -0400 Subject: [PATCH 076/173] add getjsonarray --- .../java/edu/harvard/iq/dataverse/util/json/JsonUtil.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java index f4a3c635f8b..21ff0e03773 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java @@ -63,4 +63,10 @@ public static javax.json.JsonObject getJsonObject(String serializedJson) { return Json.createReader(rdr).readObject(); } } + + public static javax.json.JsonArray getJsonArray(String serializedJson) { + try (StringReader rdr = new StringReader(serializedJson)) { + return Json.createReader(rdr).readArray(); + } + } } From e06ec36b2a4a78e8c64e42858542faaccf62841b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 30 Sep 2022 10:04:55 -0400 Subject: [PATCH 077/173] Add /replaceFiles call refactor to make multifile a separate boolean remove unused LicenseBean from constructor updated /addFiles logic to use clone refactored steps 70/80 to work for multi-replace. i.e. by tracking filesToDelete and the physical files to delete. replace local Json readers with JsonUtil method move sanity check on file deletes to DataFileServiceBean --- .../iq/dataverse/DataFileServiceBean.java | 4 + .../iq/dataverse/EditDatafilesPage.java | 3 +- .../harvard/iq/dataverse/api/Datasets.java | 77 +++- .../edu/harvard/iq/dataverse/api/Files.java | 3 +- .../datasetutility/AddReplaceFileHelper.java | 415 +++++++++++++----- 5 files changed, 375 insertions(+), 127 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index 0b935183182..7da06f36be4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -1544,6 +1544,10 @@ public void finalizeFileDelete(Long dataFileId, String storageLocation) throws I throw new IOException("Attempted to permanently delete a physical file still associated with an existing DvObject " + "(id: " + dataFileId + ", location: " + storageLocation); } + if(storageLocation == null || storageLocation.isBlank()) { + throw new IOException("Attempted to delete a physical file with no location " + + "(id: " + dataFileId + ", location: " + storageLocation); + } StorageIO directStorageAccess = DataAccess.getDirectStorageIO(storageLocation); directStorageAccess.delete(); } diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index 6cf294ffd6d..f5e137a1981 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -586,8 +586,7 @@ public String init() { datafileService, permissionService, commandEngine, - systemConfig, - licenseServiceBean); + systemConfig); fileReplacePageHelper = new FileReplacePageHelper(addReplaceFileHelper, dataset, diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index aff543e643c..ed54704c4a1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2451,8 +2451,7 @@ public Response addFileToDataset(@PathParam("id") String idSupplied, fileService, permissionSvc, commandEngine, - systemConfig, - licenseSvc); + systemConfig); //------------------- @@ -3387,14 +3386,84 @@ public Response addFilesToDataset(@PathParam("id") String idSupplied, this.fileService, this.permissionSvc, this.commandEngine, - this.systemConfig, - this.licenseSvc + this.systemConfig ); return addFileHelper.addFiles(jsonData, dataset, authUser); } + /** + * Replace multiple Files to an existing Dataset + * + * @param idSupplied + * @param jsonData + * @return + */ + @POST + @Path("{id}/replaceFiles") + @Consumes(MediaType.MULTIPART_FORM_DATA) + public Response replaceFilesInDataset(@PathParam("id") String idSupplied, + @FormDataParam("jsonData") String jsonData) { + + if (!systemConfig.isHTTPUpload()) { + return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); + } + + // ------------------------------------- + // (1) Get the user from the API key + // ------------------------------------- + User authUser; + try { + authUser = findUserOrDie(); + } catch (WrappedResponse ex) { + return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.addreplace.error.auth") + ); + } + + // ------------------------------------- + // (2) Get the Dataset Id + // ------------------------------------- + Dataset dataset; + + try { + dataset = findDatasetOrDie(idSupplied); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + + dataset.getLocks().forEach(dl -> { + logger.info(dl.toString()); + }); + + //------------------------------------ + // (2a) Make sure dataset does not have package file + // -------------------------------------- + + for (DatasetVersion dv : dataset.getVersions()) { + if (dv.isHasPackageFile()) { + return error(Response.Status.FORBIDDEN, + BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile") + ); + } + } + + DataverseRequest dvRequest = createDataverseRequest(authUser); + + AddReplaceFileHelper addFileHelper = new AddReplaceFileHelper( + dvRequest, + this.ingestService, + this.datasetService, + this.fileService, + this.permissionSvc, + this.commandEngine, + this.systemConfig + ); + + return addFileHelper.replaceFiles(jsonData, dataset, authUser); + + } + /** * API to find curation assignments and statuses * diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java index d1ecd2d8824..ecb40af19f8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java @@ -256,8 +256,7 @@ public Response replaceFileInDataset( this.fileService, this.permissionSvc, this.commandEngine, - this.systemConfig, - this.licenseSvc); + this.systemConfig); // (5) Run "runReplaceFileByDatasetId" long fileToReplaceId = 0; diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index 8e7922fd83b..207f1e309be 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -26,20 +26,22 @@ import edu.harvard.iq.dataverse.engine.command.impl.RestrictFileCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; -import edu.harvard.iq.dataverse.license.LicenseServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.file.CreateDataFileResult; import edu.harvard.iq.dataverse.util.json.JsonPrinter; +import edu.harvard.iq.dataverse.util.json.JsonUtil; + import java.io.IOException; import java.io.InputStream; -import java.io.StringReader; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.logging.Level; @@ -47,10 +49,10 @@ import javax.ejb.EJBException; import javax.json.Json; import javax.json.JsonArrayBuilder; +import javax.json.JsonNumber; import javax.json.JsonObject; import javax.json.JsonArray; import javax.json.JsonObjectBuilder; -import javax.json.JsonReader; import javax.validation.ConstraintViolation; import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; @@ -114,10 +116,9 @@ public class AddReplaceFileHelper{ public static String FILE_ADD_OPERATION = "FILE_ADD_OPERATION"; public static String FILE_REPLACE_OPERATION = "FILE_REPLACE_OPERATION"; public static String FILE_REPLACE_FORCE_OPERATION = "FILE_REPLACE_FORCE_OPERATION"; - public static String MULTIPLEFILES_ADD_OPERATION = "MULTIPLEFILES_ADD_OPERATION"; - + private String currentOperation; - + boolean multifile = false; // ----------------------------------- // All the needed EJBs, passed to the constructor // ----------------------------------- @@ -127,8 +128,6 @@ public class AddReplaceFileHelper{ private PermissionServiceBean permissionService; private EjbDataverseEngine commandEngine; private SystemConfig systemConfig; - private LicenseServiceBean licenseServiceBean; - // ----------------------------------- // Instance variables directly added // ----------------------------------- @@ -144,10 +143,6 @@ public class AddReplaceFileHelper{ // -- Optional private DataFile fileToReplace; // step 25 - // ----------------------------------- - // Instance variables derived from other input - // ----------------------------------- - private User user; private DatasetVersion workingVersion; private DatasetVersion clone; List initialFileList; @@ -256,13 +251,12 @@ public void resetFileHelper(){ * @param dvRequest */ public AddReplaceFileHelper(DataverseRequest dvRequest, - IngestServiceBean ingestService, + IngestServiceBean ingestService, DatasetServiceBean datasetService, DataFileServiceBean fileService, PermissionServiceBean permissionService, EjbDataverseEngine commandEngine, - SystemConfig systemConfig, - LicenseServiceBean licenseServiceBean){ + SystemConfig systemConfig){ // --------------------------------- // make sure DataverseRequest isn't null and has a user @@ -304,16 +298,12 @@ public AddReplaceFileHelper(DataverseRequest dvRequest, this.permissionService = permissionService; this.commandEngine = commandEngine; this.systemConfig = systemConfig; - this.licenseServiceBean = licenseServiceBean; - - - initErrorHandling(); // Initiate instance vars this.dataset = null; this.dvRequest = dvRequest; - this.user = dvRequest.getUser(); + dvRequest.getUser(); } @@ -336,7 +326,7 @@ public boolean runAddFileByDataset(Dataset chosenDataset, } - public boolean runAddFileByDataset(Dataset chosenDataset, + private boolean runAddFileByDataset(Dataset chosenDataset, String newFileName, String newFileContentType, String newStorageIdentifier, @@ -348,12 +338,8 @@ public boolean runAddFileByDataset(Dataset chosenDataset, initErrorHandling(); - if(multipleFiles) { - this.currentOperation = MULTIPLEFILES_ADD_OPERATION; - } - else { - this.currentOperation = FILE_ADD_OPERATION; - } + multifile=multipleFiles; + this.currentOperation = FILE_ADD_OPERATION; if (!this.step_001_loadDataset(chosenDataset)){ return false; @@ -393,6 +379,11 @@ public boolean runAddFile(Dataset dataset, }*/ + public boolean runForceReplaceFile(long fileToReplaceId, String newFilename, String newFileContentType, + String newStorageIdentifier, InputStream newFileInputStream, OptionalFileParams optionalFileParams) { + return runForceReplaceFile(fileToReplaceId, newFilename, newFileContentType, + newStorageIdentifier, newFileInputStream, optionalFileParams, false); + } /** * After the constructor, this method is called to replace a file * @@ -403,16 +394,18 @@ public boolean runAddFile(Dataset dataset, * @param newFileInputStream * @return */ - public boolean runForceReplaceFile(Long oldFileId, + private boolean runForceReplaceFile(Long oldFileId, String newFileName, String newFileContentType, String newStorageIdentifier, InputStream newFileInputStream, - OptionalFileParams optionalFileParams){ + OptionalFileParams optionalFileParams, + boolean multipleFiles){ msgt(">> runForceReplaceFile"); initErrorHandling(); + multifile=multipleFiles; this.currentOperation = FILE_REPLACE_FORCE_OPERATION; @@ -432,16 +425,25 @@ public boolean runForceReplaceFile(Long oldFileId, } - public boolean runReplaceFile(Long oldFileId, + public boolean runReplaceFile(long fileToReplaceId, String newFilename, String newFileContentType, + String newStorageIdentifier, InputStream newFileInputStream, OptionalFileParams optionalFileParams) { + return runReplaceFile(fileToReplaceId, newFilename, newFileContentType, + newStorageIdentifier, newFileInputStream, optionalFileParams, false); + + } + + private boolean runReplaceFile(Long oldFileId, String newFileName, String newFileContentType, String newStorageIdentifier, InputStream newFileInputStream, - OptionalFileParams optionalFileParams){ + OptionalFileParams optionalFileParams, + boolean multipleFiles){ msgt(">> runReplaceFile"); initErrorHandling(); + multifile=multipleFiles; this.currentOperation = FILE_REPLACE_OPERATION; if (oldFileId==null){ @@ -759,19 +761,15 @@ private boolean runAddReplacePhase2(boolean tabIngest){ return false; } - - if (this.isFileReplaceOperation()){ + if (this.isFileReplaceOperation()) { msgt("step_080_run_update_dataset_command_for_replace"); - if (!this.step_080_run_update_dataset_command_for_replace()){ - return false; + if (!this.step_080_run_update_dataset_command_for_replace()) { + return false; } - - }else{ + } else if (!multifile) { msgt("step_070_run_update_dataset_command"); - if (!this.isMultipleFilesAddOperation()) { - if (!this.step_070_run_update_dataset_command()) { - return false; - } + if (!this.step_070_run_update_dataset_command()) { + return false; } } @@ -834,16 +832,6 @@ public boolean isFileAddOperation(){ return this.currentOperation.equals(FILE_ADD_OPERATION); } - /** - * Is this a multiple files add operation ? - * @return - */ - - public boolean isMultipleFilesAddOperation(){ - - return this.currentOperation.equals(MULTIPLEFILES_ADD_OPERATION); - } - /** * Initialize error handling vars */ @@ -1201,7 +1189,10 @@ private boolean step_030_createNewFilesViaIngest(){ // Load the working version of the Dataset workingVersion = dataset.getEditVersion(); - clone = workingVersion.cloneDatasetVersion(); + if(!multifile) { + //Don't repeatedly update the clone (losing changes) in multifile case + clone = workingVersion.cloneDatasetVersion(); + } try { CreateDataFileResult result = FileUtil.createDataFiles(workingVersion, this.newFileInputStream, @@ -1292,9 +1283,6 @@ private boolean step_040_auto_checkForDuplicates(){ // Initialize new file list this.finalFileList = new ArrayList<>(); - String warningMessage = null; - - if (isFileReplaceOperation() && this.fileToReplace == null){ // This error shouldn't happen if steps called correctly this.addErrorSevere(getBundleErr("existing_file_to_replace_is_null") + " (This error shouldn't happen if steps called in sequence....checkForFileReplaceDuplicate)"); @@ -1511,10 +1499,7 @@ private boolean step_050_checkForConstraintViolations(){ return true; } - // ----------------------------------------------------------- - // violations found: gather all error messages - // ----------------------------------------------------------- - List errMsgs = new ArrayList<>(); + new ArrayList<>(); for (ConstraintViolation violation : constraintViolations) { /* for 8859 return conflict response status if the validation fails @@ -1605,70 +1590,81 @@ private boolean step_060_addFilesViaIngestService(boolean tabIngest){ return true; } + List filesToDelete = new ArrayList(); + Map deleteFileStorageLocations = new HashMap<>(); /** * Create and run the update dataset command * * @return */ - private boolean step_070_run_update_dataset_command(){ - - if (this.hasError()){ + private boolean step_070_run_update_dataset_command() { + //Note -only single file operations and multifile replace call this, multifile add does not + if (this.hasError()) { return false; } - Command update_cmd; + Command update_cmd = null; String deleteStorageLocation = null; - long deleteFileId=-1; - if(isFileReplaceOperation()) { - List filesToDelete = new ArrayList(); + long deleteFileId = -1; + if (isFileReplaceOperation()) { + if (!multifile) { + filesToDelete.clear(); + deleteFileStorageLocations.clear(); + } filesToDelete.add(fileToReplace.getFileMetadata()); - - if(!fileToReplace.isReleased()) { - //If file is only in draft version, also need to delete the physical file - deleteStorageLocation = fileService.getPhysicalFileToDelete(fileToReplace); - deleteFileId=fileToReplace.getId(); + + if (!fileToReplace.isReleased()) { + // If file is only in draft version, also need to delete the physical file + deleteStorageLocation = fileService.getPhysicalFileToDelete(fileToReplace); + deleteFileId = fileToReplace.getId(); + deleteFileStorageLocations.put(deleteFileId, deleteStorageLocation); + } + if (!multifile) { + // Adding the file to the delete list for the command will delete this + // filemetadata and, if the file hasn't been released, the datafile itself. + update_cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, filesToDelete, clone); } - //Adding the file to the delete list for the command will delete this filemetadata and, if the file hasn't been released, the datafile itself. - update_cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, filesToDelete, clone); } else { - update_cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, clone); + update_cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, clone); } - ((UpdateDatasetVersionCommand) update_cmd).setValidateLenient(true); - - try { - // Submit the update dataset command - // and update the local dataset object - // - dataset = commandEngine.submit(update_cmd); - } catch (CommandException ex) { - /** - * @todo Add a test to exercise this error. - */ - this.addErrorSevere(getBundleErr("add.add_file_error")); - logger.severe(ex.getMessage()); - return false; - }catch (EJBException ex) { - /** - * @todo Add a test to exercise this error. - */ - this.addErrorSevere("add.add_file_error (see logs)"); - logger.severe(ex.getMessage()); - return false; + if (!multifile) { + //Avoid NPE in multifile replace case + ((UpdateDatasetVersionCommand) update_cmd).setValidateLenient(true); } - //Sanity check - if(isFileReplaceOperation()) { - if (deleteStorageLocation != null) { - // Finalize the delete of the physical file - // (File service will double-check that the datafile no - // longer exists in the database, before proceeding to - // delete the physical file) - try { - fileService.finalizeFileDelete(deleteFileId, deleteStorageLocation); - } catch (IOException ioex) { - logger.warning("Failed to delete the physical file associated with the deleted datafile id=" - + deleteFileId + ", storage location: " + deleteStorageLocation); - } + if (!multifile) { + try { + // Submit the update dataset command + // and update the local dataset object + // + dataset = commandEngine.submit(update_cmd); + } catch (CommandException ex) { + /** + * @todo Add a test to exercise this error. + */ + this.addErrorSevere(getBundleErr("add.add_file_error")); + logger.severe(ex.getMessage()); + return false; + } catch (EJBException ex) { + /** + * @todo Add a test to exercise this error. + */ + this.addErrorSevere("add.add_file_error (see logs)"); + logger.severe(ex.getMessage()); + return false; + } + } + + if (isFileReplaceOperation() && !multifile) { + // Finalize the delete of the physical file + // (File service will double-check that the datafile no + // longer exists in the database, before proceeding to + // delete the physical file) + try { + fileService.finalizeFileDelete(deleteFileId, deleteStorageLocation); + } catch (IOException ioex) { + logger.warning("Failed to delete the physical file associated with the deleted datafile id=" + + deleteFileId + ", storage location: " + deleteStorageLocation); } } return true; @@ -1766,7 +1762,7 @@ private boolean step_080_run_update_dataset_command_for_replace(){ } /* - * Go through the final file list, settting the rootFileId and previousFileId + * Go through the final file list, setting the rootFileId and previousFileId */ for (DataFile df : finalFileList) { df.setPreviousDataFileId(fileToReplace.getId()); @@ -1927,7 +1923,7 @@ private boolean step_100_startIngestJobs(){ //return true; //} - if (!this.isMultipleFilesAddOperation()) { + if (!multifile) { msg("pre ingest start"); // start the ingest! ingestService.startIngestJobsForDataset(dataset, dvRequest.getAuthenticatedUser()); @@ -2021,6 +2017,13 @@ public void setDuplicateFileWarning(String duplicateFileWarning) { this.duplicateFileWarning = duplicateFileWarning; } + /** Add multiple pre-positioned files listed in the jsonData. Works with direct upload, Globus, and other out-of-band methods. + * + * @param jsonData - an array of jsonData entries (one per file) using the single add file jsonData format + * @param dataset + * @param authUser + * @return + */ public Response addFiles(String jsonData, Dataset dataset, User authUser) { msgt("(addFilesToDataset) jsonData: " + jsonData.toString()); @@ -2033,15 +2036,14 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { // ----------------------------------------------------------- // Read jsonData and Parse files information from jsondata : // ----------------------------------------------------------- - try (StringReader rdr = new StringReader(jsonData)) { - JsonReader dbJsonReader = Json.createReader(rdr); - filesJson = dbJsonReader.readArray(); - dbJsonReader.close(); + try { + filesJson = JsonUtil.getJsonArray(jsonData); if (filesJson != null) { totalNumberofFiles = filesJson.getValuesAs(JsonObject.class).size(); - + workingVersion = dataset.getEditVersion(); + clone = workingVersion.cloneDatasetVersion(); for (JsonObject fileJson : filesJson.getValuesAs(JsonObject.class)) { OptionalFileParams optionalFileParams = null; @@ -2131,7 +2133,7 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { } try { - Command cmd = new UpdateDatasetVersionCommand(dataset, dvRequest); + Command cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, clone); ((UpdateDatasetVersionCommand) cmd).setValidateLenient(true); commandEngine.submit(cmd); } catch (CommandException ex) { @@ -2140,9 +2142,6 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { dataset = datasetService.find(dataset.getId()); - List s = dataset.getFiles(); - for (DataFile dataFile : s) { - } //ingest job ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) authUser); @@ -2166,6 +2165,184 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { .add("status", STATUS_OK) .add("data", Json.createObjectBuilder().add("Files", jarr).add("Result", result)).build() ).build(); } + + /** + * Replace multiple files with prepositioned replacements as listed in the + * jsonData. Works with direct upload, Globus, and other out-of-band methods. + * + * @param jsonData - must include fileToReplaceId key with file ID and may include forceReplace key with true/false(default) + * @param dataset + * @param authUser + * @return + */ + + public Response replaceFiles(String jsonData, Dataset dataset, User authUser) { + msgt("(replaceFilesInDataset) jsonData: " + jsonData.toString()); + + JsonArrayBuilder jarr = Json.createArrayBuilder(); + + JsonArray filesJson = null; + + int totalNumberofFiles = 0; + int successNumberofFiles = 0; + // ----------------------------------------------------------- + // Read jsonData and Parse files information from jsondata : + // ----------------------------------------------------------- + try { + filesJson = JsonUtil.getJsonArray(jsonData); + + + if (filesJson != null) { + totalNumberofFiles = filesJson.getValuesAs(JsonObject.class).size(); + workingVersion = dataset.getEditVersion(); + clone = workingVersion.cloneDatasetVersion(); + for (JsonObject fileJson : filesJson.getValuesAs(JsonObject.class)) { + boolean forceReplace = false; + // (2a) Check for optional "forceReplace" + if ((fileJson.containsKey("forceReplace"))) { + forceReplace = fileJson.getBoolean("forceReplace", false); + } + long fileToReplaceId = -1; + JsonNumber ftri = fileJson.getJsonNumber("fileToReplaceId"); + if(ftri !=null) { + fileToReplaceId = ftri.longValueExact(); + } + + OptionalFileParams optionalFileParams = null; + try { + // (2b) Load up optional params via JSON + // - Will skip extra attributes which includes fileToReplaceId and forceReplace + optionalFileParams = new OptionalFileParams(fileJson.toString()); + + String newFilename = null; + String newFileContentType = null; + String newStorageIdentifier = null; + if ((fileToReplaceId !=-1) && optionalFileParams.hasStorageIdentifier()) { + newStorageIdentifier = optionalFileParams.getStorageIdentifier(); + newStorageIdentifier = DataAccess.expandStorageIdentifierIfNeeded(newStorageIdentifier); + if(!DataAccess.uploadToDatasetAllowed(dataset, newStorageIdentifier)) { + addErrorSevere("Dataset store configuration does not allow provided storageIdentifier."); + } + if (optionalFileParams.hasFileName()) { + newFilename = optionalFileParams.getFileName(); + if (optionalFileParams.hasMimetype()) { + newFileContentType = optionalFileParams.getMimeType(); + } + } + + msgt("REPLACE! = " + newFilename); + if (!hasError()) { + if (forceReplace){ + runForceReplaceFile(fileToReplaceId, + newFilename, + newFileContentType, + newStorageIdentifier, + null, + optionalFileParams, true); + }else{ + runReplaceFile(fileToReplaceId, + newFilename, + newFileContentType, + newStorageIdentifier, + null, + optionalFileParams, true); + } + } + if (hasError()) { + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("storageIdentifier", newStorageIdentifier) + .add("errorMessage", getHttpErrorCode().toString() +":"+ getErrorMessagesAsString("\n")) + .add("fileDetails", fileJson); + jarr.add(fileoutput); + } else { + JsonObject successresult = getSuccessResultAsJsonObjectBuilder().build(); + String duplicateWarning = getDuplicateFileWarning(); + + if (duplicateWarning != null && !duplicateWarning.isEmpty()) { + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("storageIdentifier", newStorageIdentifier) + .add("warningMessage", getDuplicateFileWarning()) + .add("fileDetails", successresult.getJsonArray("files").getJsonObject(0)); + jarr.add(fileoutput); + } else { + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("storageIdentifier", newStorageIdentifier) + .add("successMessage", "Replaced successfully in the dataset") + .add("fileDetails", successresult.getJsonArray("files").getJsonObject(0)); + jarr.add(fileoutput); + } + successNumberofFiles = successNumberofFiles + 1; + } + } else { + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("errorMessage", "You must provide a fileToReplaceId, storageidentifier, filename, and mimetype.") + .add("fileDetails", fileJson); + + jarr.add(fileoutput); + } + + } catch (DataFileTagException ex) { + Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, ex); + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("errorCode", Response.Status.BAD_REQUEST.getStatusCode()) + .add("message", ex.getMessage()) + .add("fileDetails", fileJson); + jarr.add(fileoutput); + + } + catch (NoFilesException ex) { + Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, ex); + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("errorCode", Response.Status.BAD_REQUEST.getStatusCode()) + .add("message", BundleUtil.getStringFromBundle("NoFileException! Serious Error! See administrator!")) + .add("fileDetails", fileJson); + jarr.add(fileoutput); + } + + }// End of adding files + + DatasetLock eipLock = dataset.getLockFor(DatasetLock.Reason.EditInProgress); + if (eipLock == null) { + logger.warning("Dataset not locked for EditInProgress "); + } else { + datasetService.removeDatasetLocks(dataset, DatasetLock.Reason.EditInProgress); + logger.info("Removed EditInProgress lock "); + } + + try { + Command cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, filesToDelete, clone); + ((UpdateDatasetVersionCommand) cmd).setValidateLenient(true); + commandEngine.submit(cmd); + } catch (CommandException ex) { + return error(Response.Status.INTERNAL_SERVER_ERROR, "CommandException updating DatasetVersion from addFiles job: " + ex.getMessage()); + } + + fileService.finalizeFileDeletes(deleteFileStorageLocations); + + dataset = datasetService.find(dataset.getId()); + + //ingest job + ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) authUser); + + } + } + catch ( javax.json.stream.JsonParsingException ex) { + ex.printStackTrace(); + return error(BAD_REQUEST, "Json Parsing Exception :" + ex.getMessage()); + } + catch (Exception e) { + e.printStackTrace(); + return error(BAD_REQUEST, e.getMessage()); + } + + JsonObjectBuilder result = Json.createObjectBuilder() + .add("Total number of files", totalNumberofFiles) + .add("Number of files successfully replaced", successNumberofFiles); + + return Response.ok().entity(Json.createObjectBuilder() + .add("status", STATUS_OK) + .add("data", Json.createObjectBuilder().add("Files", jarr).add("Result", result)).build() ).build(); + } protected static Response error(Response.Status sts, String msg ) { return Response.status(sts) From e6bd5b3d63f4655a48080cdcda284e7507f9fd3f Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 30 Sep 2022 10:27:54 -0400 Subject: [PATCH 078/173] docs --- .../developers/s3-direct-upload-api.rst | 38 ++++++++++++++++++- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst b/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst index 3dc73ce6a0c..b29b3421900 100644 --- a/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst +++ b/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst @@ -122,7 +122,7 @@ To add multiple Uploaded Files to the Dataset --------------------------------------------- Once the files exists in the s3 bucket, a final API call is needed to add all the files to the Dataset. In this API call, additional metadata is added using the "jsonData" parameter. -jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must also include values for: +jsonData for this call is an array of objects that normally include information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must also include values for: * "description" - A description of the file * "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset @@ -154,7 +154,7 @@ Replacing an existing file in the Dataset ----------------------------------------- Once the file exists in the s3 bucket, a final API call is needed to register it as a replacement of an existing file. This call is the same call used to replace a file to a Dataverse installation but, rather than sending the file bytes, additional metadata is added using the "jsonData" parameter. -jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, whether to allow the mimetype to change (forceReplace=true), etc. For direct uploads, the jsonData object must also include values for: +jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, whether to allow the mimetype to change (forceReplace=true), etc. For direct uploads, the jsonData object must include values for: * "storageIdentifier" - String, as specified in prior calls * "fileName" - String @@ -178,3 +178,37 @@ Note that the API call does not validate that the file matches the hash value su Note that this API call can be used independently of the others, e.g. supporting use cases in which the file already exists in S3/has been uploaded via some out-of-band method. With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifer must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. + +Replacing multiple existing files in the Dataset +------------------------------------------------ + +Once the replacement files exist in the s3 bucket, a final API call is needed to register them as replacements for existing files. In this API call, additional metadata is added using the "jsonData" parameter. +jsonData for this call is array of objects that normally include information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must include some additional values: + +* "fileToReplaceId" - the id of the file being replaced +* "forceReplace" - whether to replace a file with one of a different mimetype (optional, default is false) +* "description" - A description of the file +* "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset +* "storageIdentifier" - String +* "fileName" - String +* "mimeType" - String +* "fixity/checksum" either: + + * "md5Hash" - String with MD5 hash value, or + * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings + + +The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512 + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV + export JSON_DATA="[{'fileToReplaceId': 10, 'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42', 'fileName':'file1.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123456'}}, \ + {'fileToReplaceId': 10, 'forceReplace': true, 'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53', 'fileName':'file2.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123789'}}]" + + curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/replaceFiles?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA" + +Note that this API call can be used independently of the others, e.g. supporting use cases in which the files already exists in S3/has been uploaded via some out-of-band method. +With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifer must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. From 088cf8ac0248466b03bc2ae07e6c1d1439154f62 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 30 Sep 2022 10:31:24 -0400 Subject: [PATCH 079/173] release note --- doc/release-notes/9005-replaceFiles-api-call | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 doc/release-notes/9005-replaceFiles-api-call diff --git a/doc/release-notes/9005-replaceFiles-api-call b/doc/release-notes/9005-replaceFiles-api-call new file mode 100644 index 00000000000..b1df500251e --- /dev/null +++ b/doc/release-notes/9005-replaceFiles-api-call @@ -0,0 +1,3 @@ +9005 + +DIrect upload and out-of-band uploads can now be used to replace multiple files with one API call (complementing the prior ability to add multiple new files) \ No newline at end of file From 4ffccdb08675f92b3f6e2c46059b9d75ba97b077 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 30 Sep 2022 13:43:33 -0400 Subject: [PATCH 080/173] fix replaceFiles and remove hasError checks that block further changes hasError is not cleared where it was being used causing one error to skip all further add/replace calls and report that error for all subsequent files --- .../datasetutility/AddReplaceFileHelper.java | 32 +++++++------------ 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index 207f1e309be..efb05558b40 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -2067,10 +2067,9 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { } msgt("ADD! = " + newFilename); - if (!hasError()) { - runAddFileByDataset(dataset, newFilename, newFileContentType, newStorageIdentifier, - null, optionalFileParams, true); - } + + runAddFileByDataset(dataset, newFilename, newFileContentType, newStorageIdentifier, null, + optionalFileParams, true); if (hasError()) { JsonObjectBuilder fileoutput = Json.createObjectBuilder() .add("storageIdentifier", newStorageIdentifier) @@ -2176,9 +2175,10 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { * @return */ - public Response replaceFiles(String jsonData, Dataset dataset, User authUser) { + public Response replaceFiles(String jsonData, Dataset ds, User authUser) { msgt("(replaceFilesInDataset) jsonData: " + jsonData.toString()); + this.dataset = ds; JsonArrayBuilder jarr = Json.createArrayBuilder(); JsonArray filesJson = null; @@ -2231,22 +2231,12 @@ public Response replaceFiles(String jsonData, Dataset dataset, User authUser) { } msgt("REPLACE! = " + newFilename); - if (!hasError()) { - if (forceReplace){ - runForceReplaceFile(fileToReplaceId, - newFilename, - newFileContentType, - newStorageIdentifier, - null, - optionalFileParams, true); - }else{ - runReplaceFile(fileToReplaceId, - newFilename, - newFileContentType, - newStorageIdentifier, - null, - optionalFileParams, true); - } + if (forceReplace) { + runForceReplaceFile(fileToReplaceId, newFilename, newFileContentType, + newStorageIdentifier, null, optionalFileParams, true); + } else { + runReplaceFile(fileToReplaceId, newFilename, newFileContentType, newStorageIdentifier, + null, optionalFileParams, true); } if (hasError()) { JsonObjectBuilder fileoutput = Json.createObjectBuilder() From 9d2fc0585c136c21109fb624002438d562246c75 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 30 Sep 2022 13:45:07 -0400 Subject: [PATCH 081/173] relocate/rename entry for the /addFiles, /replaceFiles in native-api the title Add File Metadata has been misunderstood to mean the call can change the metadata for existing files which it can't. The entry was also in the File section when it is a dataset-level call --- doc/sphinx-guides/source/api/native-api.rst | 49 +++------------------ 1 file changed, 7 insertions(+), 42 deletions(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 93e1c36f179..e634bee37c9 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -1511,6 +1511,13 @@ The fully expanded example above (without environment variables) looks like this curl -H X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST https://demo.dataverse.org/api/datasets/:persistentId/add?persistentId=doi:10.5072/FK2/J8SJZB -F 'jsonData={"description":"A remote image.","storageIdentifier":"trsa://themes/custom/qdr/images/CoreTrustSeal-logo-transparent.png","checksumType":"MD5","md5Hash":"509ef88afa907eaf2c17c1c8d8fde77e","label":"testlogo.png","fileName":"testlogo.png","mimeType":"image/png"}' +Adding Files To a Dataset via Other Tools +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In some circumstances, it may be useful to move or copy files into Dataverse's storage manually or via external tools and then add then to a dataset (i.e. without involving Dataverse in the file transfer itself). +Two API calls are available for this use case to add files to a dataset or to replace files that were already in the dataset. +These calls were developed as part of Dataverse's direct upload mechanism and are detailed in :doc:`/developers/s3-direct-upload-api`. + Report the data (file) size of a Dataset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -2348,48 +2355,6 @@ The fully expanded example above (without environment variables) looks like this Note: The ``id`` returned in the json response is the id of the file metadata version. - -Adding File Metadata -~~~~~~~~~~~~~~~~~~~~ - -This API call requires a ``jsonString`` expressing the metadata of multiple files. It adds file metadata to the database table where the file has already been copied to the storage. - -The jsonData object includes values for: - -* "description" - A description of the file -* "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset -* "storageIdentifier" - String -* "fileName" - String -* "mimeType" - String -* "fixity/checksum" either: - - * "md5Hash" - String with MD5 hash value, or - * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings - -.. note:: See :ref:`curl-examples-and-environment-variables` if you are unfamiliar with the use of ``export`` below. - -A curl example using an ``PERSISTENT_ID`` - -* ``SERVER_URL`` - e.g. https://demo.dataverse.org -* ``API_TOKEN`` - API endpoints require an API token that can be passed as the X-Dataverse-key HTTP header. For more details, see the :doc:`auth` section. -* ``PERSISTENT_IDENTIFIER`` - Example: ``doi:10.5072/FK2/7U7YBV`` - -.. code-block:: bash - - export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx - export SERVER_URL=https://demo.dataverse.org - export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV - export JSON_DATA="[{'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42', 'fileName':'file1.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123456'}}, \ - {'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53', 'fileName':'file2.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123789'}}]" - - curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/addFiles?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA" - -The fully expanded example above (without environment variables) looks like this: - -.. code-block:: bash - - curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST https://demo.dataverse.org/api/datasets/:persistentId/addFiles?persistentId=doi:10.5072/FK2/7U7YBV -F jsonData='[{"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123456"}}, {"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123789"}}]' - Updating File Metadata ~~~~~~~~~~~~~~~~~~~~~~ From fcf107279dcc0c7b208c68d5b2fd9deb3d0d11cd Mon Sep 17 00:00:00 2001 From: j-n-c Date: Tue, 18 Oct 2022 16:32:44 +0100 Subject: [PATCH 082/173] #9074 - Added support for building sphynx docs using python 3.10+ --- doc/sphinx-guides/requirements.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/requirements.txt b/doc/sphinx-guides/requirements.txt index 4488c54cd5e..eb9f952d013 100755 --- a/doc/sphinx-guides/requirements.txt +++ b/doc/sphinx-guides/requirements.txt @@ -1,5 +1,7 @@ -# current version as of this writing -Sphinx==3.5.4 +# Necessary workaround for building Sphynx guides with Python 3.10+ versions +Sphinx==3.5.4 ; python_version < '3.10' +Sphinx==5.3.0 ; python_version >= '3.10' + # Necessary workaround for ReadTheDocs for Sphinx 3.x - unnecessary as of Sphinx 4.5+ Jinja2>=3.0.2,<3.1 From 26e9861fe586503b4bd485cdb1c5d1b00fd7662b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 21 Oct 2022 16:48:11 -0400 Subject: [PATCH 083/173] Add dvwebloader as upload option --- .../edu/harvard/iq/dataverse/DatasetPage.java | 27 ++++++++++++++++++- .../harvard/iq/dataverse/SettingsWrapper.java | 9 +++++++ .../settings/SettingsServiceBean.java | 4 ++- .../iq/dataverse/util/SystemConfig.java | 12 ++++++++- src/main/java/propertyFiles/Bundle.properties | 4 +++ src/main/webapp/editFilesFragment.xhtml | 6 ++++- src/main/webapp/resources/css/structure.css | 1 + 7 files changed, 59 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 0a8db69bf5b..750636fec45 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -56,6 +56,7 @@ import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.URLTokenUtil; import edu.harvard.iq.dataverse.validation.URLValidator; import edu.harvard.iq.dataverse.workflows.WorkflowComment; @@ -1845,7 +1846,9 @@ public boolean globusUploadSupported() { return settingsWrapper.isGlobusUpload() && settingsWrapper.isGlobusEnabledStorageDriver(dataset.getEffectiveStorageDriverId()); } - + public boolean webloaderUploadSupported() { + return settingsWrapper.isWebloaderUpload() && StorageIO.isDirectUploadEnabled(dataset.getEffectiveStorageDriverId()); + } private String init(boolean initFull) { @@ -6062,4 +6065,26 @@ public void startGlobusTransfer() { } PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken)); } + + public String getWebloaderUrlForDataset(Dataset d) { + String localeCode = session.getLocaleCode(); + ApiToken apiToken = null; + User user = session.getUser(); + + if (user instanceof AuthenticatedUser) { + apiToken = authService.findApiTokenByUser((AuthenticatedUser) user); + + if ((apiToken == null) || (apiToken.getExpireTime().before(new Date()))) { + logger.fine("Created apiToken for user: " + user.getIdentifier()); + apiToken = authService.generateApiTokenForUser((AuthenticatedUser) user); + } + } + // Use URLTokenUtil for params currently in common with external tools. + URLTokenUtil tokenUtil = new URLTokenUtil(d, apiToken, localeCode); + String appUrl; + appUrl = settingsService.getValueForKey(SettingsServiceBean.Key.WebloaderUrl) + + "?datasetPid={datasetPid}&siteUrl={siteUrl}&key={apiToken}&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}"; + return tokenUtil.replaceTokensWithValues(appUrl); + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java index aa40423000d..bf36f265743 100644 --- a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java @@ -107,6 +107,8 @@ public class SettingsWrapper implements java.io.Serializable { private Boolean rsyncOnly = null; + private Boolean webloaderUpload = null; + private String metricsUrl = null; private Boolean dataFilePIDSequentialDependent = null; @@ -338,6 +340,13 @@ public String getGlobusAppUrl() { } + public boolean isWebloaderUpload() { + if (webloaderUpload == null) { + webloaderUpload = systemConfig.isWebloaderUpload(); + } + return webloaderUpload; + } + public boolean isRsyncOnly() { if (rsyncOnly == null) { String downloadMethods = getValueForKey(SettingsServiceBean.Key.DownloadMethods); diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 50e29d2a333..371463fb215 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -563,7 +563,9 @@ Whether Harvesting (OAI) service is enabled /* * Allow a custom JavaScript to control values of specific fields. */ - ControlledVocabularyCustomJavaScript + ControlledVocabularyCustomJavaScript, + + WebloaderUrl ; @Override diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index 7abd0d02065..62dcbfc8ab0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -862,7 +862,13 @@ public enum FileUploadMethods { * Upload through Globus of large files */ - GLOBUS("globus") + GLOBUS("globus"), + + /** + * Upload folders of files through dvwebloader app + */ + + WEBLOADER("dvwebloader"); ; @@ -999,6 +1005,10 @@ public boolean isRsyncUpload(){ public boolean isGlobusUpload(){ return getMethodAvailable(FileUploadMethods.GLOBUS.toString(), true); } + + public boolean isWebloaderUpload(){ + return getMethodAvailable(FileUploadMethods.WEBLOADER.toString(), true); + } // Controls if HTTP upload is enabled for both GUI and API. public boolean isHTTPUpload(){ diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 8a4fdeb9e28..1019ec5d3e8 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -1671,6 +1671,10 @@ file.finishGlobus=Globus Transfer has finished file.downloadFromGlobus=Download through Globus file.globus.transfer=Globus Transfer file.globus.of=of: +file.fromWebloader.tip=Upload a folder of files. This method retains the relative path structure on from your local machine. (Using it will cancel any other types of uploads in progress on this page.) +file.fromWebloaderAfterCreate.tip=This option will be enabled after this dataset is created. +file.fromWebloader=Upload a Folder + file.api.httpDisabled=File upload via HTTP is not available for this installation of Dataverse. file.api.alreadyHasPackageFile=File upload via HTTP disabled since this dataset already contains a package file. file.replace.original=Original File diff --git a/src/main/webapp/editFilesFragment.xhtml b/src/main/webapp/editFilesFragment.xhtml index 0fd5bf48fb7..40937008ab8 100644 --- a/src/main/webapp/editFilesFragment.xhtml +++ b/src/main/webapp/editFilesFragment.xhtml @@ -158,7 +158,11 @@ widgetVar="fileUploadWidget"> - +
+

#{bundle['file.webloader.tip']}

+

#{bundle['file.webloaderAfterCreate.tip']}

+ +

#{bundle['file.fromDropbox.tip']}

diff --git a/src/main/webapp/resources/css/structure.css b/src/main/webapp/resources/css/structure.css index c184c46cee9..65489431d65 100644 --- a/src/main/webapp/resources/css/structure.css +++ b/src/main/webapp/resources/css/structure.css @@ -883,6 +883,7 @@ div.panel-body.read-terms{max-height:220px; overflow-y:scroll; width:100%; backg #dragdropMsg {padding:20px;font-size:1.3em;color:#808080;text-align:center;} .dropin-btn-status.ui-icon {background: url("https://www.dropbox.com/static/images/widgets/dbx-saver-status.png") no-repeat;} .globus-btn.ui-icon {background: url("https://docs.globus.org/images/home/transfer.png") no-repeat;background-size:contain;display:inline-block;} +.webloader-btn.ui-icon {background: url("resources/images/folders.png") no-repeat;background-size:contain;display:inline-block;} /* VERSIONS */ From 85fc67d0ee870b938108347109a3719bb550d000 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 25 Oct 2022 14:38:12 -0400 Subject: [PATCH 084/173] bug fixes, refactor, styling --- .../edu/harvard/iq/dataverse/DatasetPage.java | 22 ++++------- .../iq/dataverse/EditDatafilesPage.java | 22 +++++++++++ .../AuthenticationServiceBean.java | 10 +++++ .../iq/dataverse/util/WebloaderUtil.java | 36 ++++++++++++++++++ src/main/webapp/dataset.xhtml | 1 + src/main/webapp/editFilesFragment.xhtml | 15 ++++---- src/main/webapp/editdatafiles.xhtml | 1 + src/main/webapp/resources/css/structure.css | 2 +- src/main/webapp/resources/images/folders.png | Bin 0 -> 787 bytes 9 files changed, 87 insertions(+), 22 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/util/WebloaderUtil.java create mode 100644 src/main/webapp/resources/images/folders.png diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 750636fec45..05069d34c67 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -57,6 +57,7 @@ import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.URLTokenUtil; +import edu.harvard.iq.dataverse.util.WebloaderUtil; import edu.harvard.iq.dataverse.validation.URLValidator; import edu.harvard.iq.dataverse.workflows.WorkflowComment; @@ -6068,23 +6069,16 @@ public void startGlobusTransfer() { public String getWebloaderUrlForDataset(Dataset d) { String localeCode = session.getLocaleCode(); - ApiToken apiToken = null; User user = session.getUser(); - if (user instanceof AuthenticatedUser) { - apiToken = authService.findApiTokenByUser((AuthenticatedUser) user); - - if ((apiToken == null) || (apiToken.getExpireTime().before(new Date()))) { - logger.fine("Created apiToken for user: " + user.getIdentifier()); - apiToken = authService.generateApiTokenForUser((AuthenticatedUser) user); - } + ApiToken apiToken = authService.getValidApiTokenForUser((AuthenticatedUser) user); + return WebloaderUtil.getWebloaderUrl(d, apiToken, localeCode, + settingsService.getValueForKey(SettingsServiceBean.Key.WebloaderUrl)); + } else { + // Shouldn't normally happen (seesion timeout? bug?) + logger.warning("getWebloaderUrlForDataset called for non-Authenticated user"); + return null; } - // Use URLTokenUtil for params currently in common with external tools. - URLTokenUtil tokenUtil = new URLTokenUtil(d, apiToken, localeCode); - String appUrl; - appUrl = settingsService.getValueForKey(SettingsServiceBean.Key.WebloaderUrl) - + "?datasetPid={datasetPid}&siteUrl={siteUrl}&key={apiToken}&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}"; - return tokenUtil.replaceTokensWithValues(appUrl); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index 6cf294ffd6d..9845fa16526 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -5,7 +5,9 @@ import edu.harvard.iq.dataverse.api.AbstractApiBean; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.datasetutility.AddReplaceFileHelper; import edu.harvard.iq.dataverse.datasetutility.FileSizeChecker; @@ -36,6 +38,8 @@ import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.JsfHelper; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.URLTokenUtil; +import edu.harvard.iq.dataverse.util.WebloaderUtil; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.EjbUtil; import edu.harvard.iq.dataverse.util.FileMetadataUtil; @@ -3067,6 +3071,10 @@ public boolean globusUploadSupported() { return settingsWrapper.isGlobusUpload() && settingsWrapper.isGlobusEnabledStorageDriver(dataset.getEffectiveStorageDriverId()); } + + public boolean webloaderUploadSupported() { + return settingsWrapper.isWebloaderUpload() && StorageIO.isDirectUploadEnabled(dataset.getEffectiveStorageDriverId()); + } private void populateFileMetadatas() { fileMetadatas = new ArrayList<>(); @@ -3106,4 +3114,18 @@ public void setFileAccessRequest(boolean fileAccessRequest) { public boolean isHasPublicStore() { return settingsWrapper.isTrueForKey(SettingsServiceBean.Key.PublicInstall, StorageIO.isPublicStore(dataset.getEffectiveStorageDriverId())); } + + public String getWebloaderUrlForDataset(Dataset d) { + String localeCode = session.getLocaleCode(); + User user = session.getUser(); + if (user instanceof AuthenticatedUser) { + ApiToken apiToken = authService.getValidApiTokenForUser((AuthenticatedUser) user); + return WebloaderUtil.getWebloaderUrl(d, apiToken, localeCode, + settingsService.getValueForKey(SettingsServiceBean.Key.WebloaderUrl)); + } else { + // Shouldn't normally happen (seesion timeout? bug?) + logger.warning("getWebloaderUrlForDataset called for non-Authenticated user"); + return null; + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java index b242cd2936f..f7b88147c05 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java @@ -938,4 +938,14 @@ public List getWorkflowCommentsByAuthenticatedUser(Authenticat return query.getResultList(); } + public ApiToken getValidApiTokenForUser(AuthenticatedUser user) { + ApiToken apiToken = null; + apiToken = findApiTokenByUser(user); + if ((apiToken == null) || (apiToken.getExpireTime().before(new Date()))) { + logger.fine("Created apiToken for user: " + user.getIdentifier()); + apiToken = generateApiTokenForUser(user); + } + return apiToken; + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/WebloaderUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/WebloaderUtil.java new file mode 100644 index 00000000000..266d55eceb3 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/WebloaderUtil.java @@ -0,0 +1,36 @@ +package edu.harvard.iq.dataverse.util; + +import java.util.Date; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.Locale; +import java.util.Map.Entry; +import java.util.logging.Logger; + +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpSession; + +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetPage; +import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; +import edu.harvard.iq.dataverse.authorization.users.ApiToken; +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.authorization.users.User; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; + +public class WebloaderUtil { + + private static final Logger logger = Logger.getLogger(WebloaderUtil.class.getCanonicalName()); + + /** + * Create the URL required to launch https://github.com/gdcc/dvweloader + */ + public static String getWebloaderUrl(Dataset d, ApiToken apiToken, String localeCode, String baseUrl) { + // Use URLTokenUtil for params currently in common with external tools. + URLTokenUtil tokenUtil = new URLTokenUtil(d, apiToken, localeCode); + String appUrl; + appUrl = baseUrl + + "?datasetPid={datasetPid}&siteUrl={siteUrl}&key={apiToken}&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}"; + return tokenUtil.replaceTokensWithValues(appUrl); + } +} diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml index 1bb862721a5..35753374dbb 100644 --- a/src/main/webapp/dataset.xhtml +++ b/src/main/webapp/dataset.xhtml @@ -846,6 +846,7 @@ + diff --git a/src/main/webapp/editFilesFragment.xhtml b/src/main/webapp/editFilesFragment.xhtml index 40937008ab8..a8d2bde1059 100644 --- a/src/main/webapp/editFilesFragment.xhtml +++ b/src/main/webapp/editFilesFragment.xhtml @@ -158,12 +158,13 @@ widgetVar="fileUploadWidget"> -
-

#{bundle['file.webloader.tip']}

-

#{bundle['file.webloaderAfterCreate.tip']}

- + +
+

#{bundle['file.fromWebloader.tip']}

+

#{bundle['file.fromWebloaderAfterCreate.tip']}

+
-
+

#{bundle['file.fromDropbox.tip']}

-
+

#{bundle['file.fromGlobus.tip']}

#{bundle['file.fromGlobusAfterCreate.tip']}

- +
diff --git a/src/main/webapp/editdatafiles.xhtml b/src/main/webapp/editdatafiles.xhtml index 6c4f07f51da..02acb224827 100644 --- a/src/main/webapp/editdatafiles.xhtml +++ b/src/main/webapp/editdatafiles.xhtml @@ -63,6 +63,7 @@ +
diff --git a/src/main/webapp/resources/css/structure.css b/src/main/webapp/resources/css/structure.css index 65489431d65..6ef6dfb29e5 100644 --- a/src/main/webapp/resources/css/structure.css +++ b/src/main/webapp/resources/css/structure.css @@ -883,7 +883,7 @@ div.panel-body.read-terms{max-height:220px; overflow-y:scroll; width:100%; backg #dragdropMsg {padding:20px;font-size:1.3em;color:#808080;text-align:center;} .dropin-btn-status.ui-icon {background: url("https://www.dropbox.com/static/images/widgets/dbx-saver-status.png") no-repeat;} .globus-btn.ui-icon {background: url("https://docs.globus.org/images/home/transfer.png") no-repeat;background-size:contain;display:inline-block;} -.webloader-btn.ui-icon {background: url("resources/images/folders.png") no-repeat;background-size:contain;display:inline-block;} +.webloader-btn.ui-icon {background: url("/resources/images/folders.png") no-repeat;background-size:contain;display:inline-block;} /* VERSIONS */ diff --git a/src/main/webapp/resources/images/folders.png b/src/main/webapp/resources/images/folders.png new file mode 100644 index 0000000000000000000000000000000000000000..a3dc36372803a113a1d6e562731cb96ef42d416a GIT binary patch literal 787 zcmV+u1MK{XP)00001b5ch_0Itp) z=>Px#1ZP1_K>z@;j|==^1poj532;bRa{vGi!vFvd!vV){sAK>D0;owuK~!i%?U=uc z>Od66<3+6PZ4^NyrT74XrSBjJ_JS`Uf{ng_jaA;kPPxI#%ErP!pavETJ4;za@3(qb z!ZjIZVwg#czrvlfA!NRs`OeHqY_r*DVdfA04J{u5s(EVxyWQ^F20}pB0t2fGVY;18 zhXPHrEUVdURw@;WgMq7ncDr4x)k>uj#hPZCCVuDhxvuN?|JX4Mqt$9rAQ4vqSTp&2 zo&pZm>vcRHr$8dk0`U4LlL-af3>t0hh~#MjXHc zO15DTdJnKpqu32-#DS6$1Xuz>f$$Q-IgducAoL#i2=K&Di){l{7|=CA6bOKerqe0f zyaGMkBYArECGP zEEYWle82?O3{?`gybW{*~haj=i%KCoJ?Xe18aCA%z&;>81oVN|PCiaD@* z1|C37~XB9(eKf3xOrXew=~~2z~E(YI+4{ zA^2EmH(;w(Ai!TDY2rZf5#WiRKJo#&CWsmn<22Rl^(eT}@AtiZ8dHz~^uC9~;jfK! zI^FGdDG^T4olYpo!0~u26bcmFK+i=ZCr+mmBRR4KU|X%%>v-i9b6}e0VzF@35c(>l zAOqQKmI7`tNw))q0s-Z6nF2m=3!|rzy@Z~gF8_l)dl1Q;7 Date: Tue, 25 Oct 2022 15:04:52 -0400 Subject: [PATCH 085/173] simplify - just use plus icon --- src/main/webapp/editFilesFragment.xhtml | 2 +- src/main/webapp/resources/css/structure.css | 2 -- src/main/webapp/resources/images/folders.png | Bin 787 -> 0 bytes 3 files changed, 1 insertion(+), 3 deletions(-) delete mode 100644 src/main/webapp/resources/images/folders.png diff --git a/src/main/webapp/editFilesFragment.xhtml b/src/main/webapp/editFilesFragment.xhtml index a8d2bde1059..09ee7f50024 100644 --- a/src/main/webapp/editFilesFragment.xhtml +++ b/src/main/webapp/editFilesFragment.xhtml @@ -162,7 +162,7 @@

#{bundle['file.fromWebloader.tip']}

#{bundle['file.fromWebloaderAfterCreate.tip']}

- +
diff --git a/src/main/webapp/resources/css/structure.css b/src/main/webapp/resources/css/structure.css index 6ef6dfb29e5..0dff334833e 100644 --- a/src/main/webapp/resources/css/structure.css +++ b/src/main/webapp/resources/css/structure.css @@ -883,8 +883,6 @@ div.panel-body.read-terms{max-height:220px; overflow-y:scroll; width:100%; backg #dragdropMsg {padding:20px;font-size:1.3em;color:#808080;text-align:center;} .dropin-btn-status.ui-icon {background: url("https://www.dropbox.com/static/images/widgets/dbx-saver-status.png") no-repeat;} .globus-btn.ui-icon {background: url("https://docs.globus.org/images/home/transfer.png") no-repeat;background-size:contain;display:inline-block;} -.webloader-btn.ui-icon {background: url("/resources/images/folders.png") no-repeat;background-size:contain;display:inline-block;} - /* VERSIONS */ div[id$="versionsTable"] th.col-select-width * {display:none;} diff --git a/src/main/webapp/resources/images/folders.png b/src/main/webapp/resources/images/folders.png deleted file mode 100644 index a3dc36372803a113a1d6e562731cb96ef42d416a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 787 zcmV+u1MK{XP)00001b5ch_0Itp) z=>Px#1ZP1_K>z@;j|==^1poj532;bRa{vGi!vFvd!vV){sAK>D0;owuK~!i%?U=uc z>Od66<3+6PZ4^NyrT74XrSBjJ_JS`Uf{ng_jaA;kPPxI#%ErP!pavETJ4;za@3(qb z!ZjIZVwg#czrvlfA!NRs`OeHqY_r*DVdfA04J{u5s(EVxyWQ^F20}pB0t2fGVY;18 zhXPHrEUVdURw@;WgMq7ncDr4x)k>uj#hPZCCVuDhxvuN?|JX4Mqt$9rAQ4vqSTp&2 zo&pZm>vcRHr$8dk0`U4LlL-af3>t0hh~#MjXHc zO15DTdJnKpqu32-#DS6$1Xuz>f$$Q-IgducAoL#i2=K&Di){l{7|=CA6bOKerqe0f zyaGMkBYArECGP zEEYWle82?O3{?`gybW{*~haj=i%KCoJ?Xe18aCA%z&;>81oVN|PCiaD@* z1|C37~XB9(eKf3xOrXew=~~2z~E(YI+4{ zA^2EmH(;w(Ai!TDY2rZf5#WiRKJo#&CWsmn<22Rl^(eT}@AtiZ8dHz~^uC9~;jfK! zI^FGdDG^T4olYpo!0~u26bcmFK+i=ZCr+mmBRR4KU|X%%>v-i9b6}e0VzF@35c(>l zAOqQKmI7`tNw))q0s-Z6nF2m=3!|rzy@Z~gF8_l)dl1Q;7 Date: Thu, 3 Nov 2022 17:59:06 +0100 Subject: [PATCH 086/173] build(ct-base): switch to Payara 5.2022.4 The upgrade to 5.2022.3 made Dataverse deployments fail because the postboot script deployment method was broken. This has been fixed with 5.2022.4, which is why we use this version now. --- modules/dataverse-parent/pom.xml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index 4ffc5941278..fe50601d583 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -337,7 +337,11 @@ ct - + + 5.2022.4 From 7d9327edbf194049c1233b12fed6c0ade8dc518d Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 3 Nov 2022 17:39:55 -0400 Subject: [PATCH 087/173] Refactored permissions checks and fixed workflow token access --- .../edu/harvard/iq/dataverse/api/Access.java | 334 +++++------------- 1 file changed, 96 insertions(+), 238 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index abeedf23b59..321b3ebfab6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -187,9 +187,6 @@ public class Access extends AbstractApiBean { @Inject MakeDataCountLoggingServiceBean mdcLogService; - - private static final String API_KEY_HEADER = "X-Dataverse-key"; - //@EJB // TODO: @@ -197,23 +194,19 @@ public class Access extends AbstractApiBean { @Path("datafile/bundle/{fileId}") @GET @Produces({"application/zip"}) - public BundleDownloadInstance datafileBundle(@PathParam("fileId") String fileId, @QueryParam("fileMetadataId") Long fileMetadataId,@QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiToken, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) /*throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { + public BundleDownloadInstance datafileBundle(@PathParam("fileId") String fileId, @QueryParam("fileMetadataId") Long fileMetadataId,@QueryParam("gbrecs") boolean gbrecs, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) /*throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { GuestbookResponse gbr = null; DataFile df = findDataFileOrDieWrapper(fileId); - if (apiToken == null || apiToken.equals("")) { - apiToken = headers.getHeaderString(API_KEY_HEADER); - } - // This will throw a ForbiddenException if access isn't authorized: - checkAuthorization(df, apiToken); + checkAuthorization(df); if (gbrecs != true && df.isReleased()){ // Write Guestbook record if not done previously and file is released - User apiTokenUser = findAPITokenUser(apiToken); + User apiTokenUser = findAPITokenUser(); gbr = guestbookResponseService.initAPIGuestbookResponse(df.getOwner(), df, session, apiTokenUser); guestbookResponseService.save(gbr); MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, df); @@ -278,7 +271,7 @@ private DataFile findDataFileOrDieWrapper(String fileId){ @Path("datafile/{fileId:.+}") @GET @Produces({"application/xml"}) - public Response datafile(@PathParam("fileId") String fileId, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiToken, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) /*throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { + public Response datafile(@PathParam("fileId") String fileId, @QueryParam("gbrecs") boolean gbrecs, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) /*throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { // check first if there's a trailing slash, and chop it: while (fileId.lastIndexOf('/') == fileId.length() - 1) { @@ -303,20 +296,16 @@ public Response datafile(@PathParam("fileId") String fileId, @QueryParam("gbrecs throw new NotFoundException(errorMessage); // (nobody should ever be using this API on a harvested DataFile)! } - - if (apiToken == null || apiToken.equals("")) { - apiToken = headers.getHeaderString(API_KEY_HEADER); - } - + + // This will throw a ForbiddenException if access isn't authorized: + checkAuthorization(df); + if (gbrecs != true && df.isReleased()){ // Write Guestbook record if not done previously and file is released - User apiTokenUser = findAPITokenUser(apiToken); + User apiTokenUser = findAPITokenUser(); gbr = guestbookResponseService.initAPIGuestbookResponse(df.getOwner(), df, session, apiTokenUser); } - - // This will throw a ForbiddenException if access isn't authorized: - checkAuthorization(df, apiToken); - + DownloadInfo dInfo = new DownloadInfo(df); logger.fine("checking if thumbnails are supported on this file."); @@ -532,11 +521,10 @@ public String tabularDatafileMetadataDDI(@PathParam("fileId") String fileId, @Q @Path("datafile/{fileId}/auxiliary") @GET public Response listDatafileMetadataAux(@PathParam("fileId") String fileId, - @QueryParam("key") String apiToken, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws ServiceUnavailableException { - return listAuxiliaryFiles(fileId, null, apiToken, uriInfo, headers, response); + return listAuxiliaryFiles(fileId, null, uriInfo, headers, response); } /* * GET method for retrieving a list auxiliary files associated with @@ -547,26 +535,21 @@ public Response listDatafileMetadataAux(@PathParam("fileId") String fileId, @GET public Response listDatafileMetadataAuxByOrigin(@PathParam("fileId") String fileId, @PathParam("origin") String origin, - @QueryParam("key") String apiToken, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws ServiceUnavailableException { - return listAuxiliaryFiles(fileId, origin, apiToken, uriInfo, headers, response); + return listAuxiliaryFiles(fileId, origin, uriInfo, headers, response); } - private Response listAuxiliaryFiles(String fileId, String origin, String apiToken, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response) { + private Response listAuxiliaryFiles(String fileId, String origin, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response) { DataFile df = findDataFileOrDieWrapper(fileId); - if (apiToken == null || apiToken.equals("")) { - apiToken = headers.getHeaderString(API_KEY_HEADER); - } - List auxFileList = auxiliaryFileService.findAuxiliaryFiles(df, origin); if (auxFileList == null || auxFileList.isEmpty()) { throw new NotFoundException("No Auxiliary files exist for datafile " + fileId + (origin==null ? "": " and the specified origin")); } - boolean isAccessAllowed = isAccessAuthorized(df, apiToken); + boolean isAccessAllowed = isAccessAuthorized(df); JsonArrayBuilder jab = Json.createArrayBuilder(); auxFileList.forEach(auxFile -> { if (isAccessAllowed || auxFile.getIsPublic()) { @@ -594,17 +577,12 @@ private Response listAuxiliaryFiles(String fileId, String origin, String apiToke public DownloadInstance downloadAuxiliaryFile(@PathParam("fileId") String fileId, @PathParam("formatTag") String formatTag, @PathParam("formatVersion") String formatVersion, - @QueryParam("key") String apiToken, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws ServiceUnavailableException { DataFile df = findDataFileOrDieWrapper(fileId); - if (apiToken == null || apiToken.equals("")) { - apiToken = headers.getHeaderString(API_KEY_HEADER); - } - DownloadInfo dInfo = new DownloadInfo(df); boolean publiclyAvailable = false; @@ -654,7 +632,7 @@ public DownloadInstance downloadAuxiliaryFile(@PathParam("fileId") String fileId // as defined for the DataFile itself), and will throw a ForbiddenException // if access is denied: if (!publiclyAvailable) { - checkAuthorization(df, apiToken); + checkAuthorization(df); } return downloadInstance; @@ -670,16 +648,16 @@ public DownloadInstance downloadAuxiliaryFile(@PathParam("fileId") String fileId @POST @Consumes("text/plain") @Produces({ "application/zip" }) - public Response postDownloadDatafiles(String fileIds, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiTokenParam, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { + public Response postDownloadDatafiles(String fileIds, @QueryParam("gbrecs") boolean gbrecs, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { - return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response); + return downloadDatafiles(fileIds, gbrecs, uriInfo, headers, response); } @Path("dataset/{id}") @GET @Produces({"application/zip"}) - public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersistentId, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiTokenParam, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { + public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersistentId, @QueryParam("gbrecs") boolean gbrecs, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { try { User user = findUserOrDie(); DataverseRequest req = createDataverseRequest(user); @@ -693,7 +671,7 @@ public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersist // We don't want downloads from Draft versions to be counted, // so we are setting the gbrecs (aka "do not write guestbook response") // variable accordingly: - return downloadDatafiles(fileIds, true, apiTokenParam, uriInfo, headers, response); + return downloadDatafiles(fileIds, true, uriInfo, headers, response); } } @@ -714,7 +692,7 @@ public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersist } String fileIds = getFileIdsAsCommaSeparated(latest.getFileMetadatas()); - return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response); + return downloadDatafiles(fileIds, gbrecs, uriInfo, headers, response); } catch (WrappedResponse wr) { return wr.getResponse(); } @@ -763,7 +741,7 @@ public Command handleLatestPublished() { if (dsv.isDraft()) { gbrecs = true; } - return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response); + return downloadDatafiles(fileIds, gbrecs, uriInfo, headers, response); } catch (WrappedResponse wr) { return wr.getResponse(); } @@ -784,11 +762,11 @@ private static String getFileIdsAsCommaSeparated(List fileMetadata @Path("datafiles/{fileIds}") @GET @Produces({"application/zip"}) - public Response datafiles(@PathParam("fileIds") String fileIds, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiTokenParam, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { - return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response); + public Response datafiles(@PathParam("fileIds") String fileIds, @QueryParam("gbrecs") boolean gbrecs, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { + return downloadDatafiles(fileIds, gbrecs, uriInfo, headers, response); } - private Response downloadDatafiles(String rawFileIds, boolean donotwriteGBResponse, String apiTokenParam, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response) throws WebApplicationException /* throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { + private Response downloadDatafiles(String rawFileIds, boolean donotwriteGBResponse, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response) throws WebApplicationException /* throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { final long zipDownloadSizeLimit = systemConfig.getZipDownloadLimit(); logger.fine("setting zip download size limit to " + zipDownloadSizeLimit + " bytes."); @@ -810,11 +788,7 @@ private Response downloadDatafiles(String rawFileIds, boolean donotwriteGBRespon String customZipServiceUrl = settingsService.getValueForKey(SettingsServiceBean.Key.CustomZipDownloadServiceUrl); boolean useCustomZipService = customZipServiceUrl != null; - String apiToken = (apiTokenParam == null || apiTokenParam.equals("")) - ? headers.getHeaderString(API_KEY_HEADER) - : apiTokenParam; - - User apiTokenUser = findAPITokenUser(apiToken); //for use in adding gb records if necessary + User apiTokenUser = findAPITokenUser(); //for use in adding gb records if necessary Boolean getOrig = false; for (String key : uriInfo.getQueryParameters().keySet()) { @@ -827,7 +801,7 @@ private Response downloadDatafiles(String rawFileIds, boolean donotwriteGBRespon if (useCustomZipService) { URI redirect_uri = null; try { - redirect_uri = handleCustomZipDownload(customZipServiceUrl, fileIds, apiToken, apiTokenUser, uriInfo, headers, donotwriteGBResponse, true); + redirect_uri = handleCustomZipDownload(customZipServiceUrl, fileIds, apiTokenUser, uriInfo, headers, donotwriteGBResponse, true); } catch (WebApplicationException wae) { throw wae; } @@ -859,7 +833,7 @@ public void write(OutputStream os) throws IOException, logger.fine("token: " + fileIdParams[i]); Long fileId = null; try { - fileId = new Long(fileIdParams[i]); + fileId = Long.parseLong(fileIdParams[i]); } catch (NumberFormatException nfe) { fileId = null; } @@ -867,7 +841,7 @@ public void write(OutputStream os) throws IOException, logger.fine("attempting to look up file id " + fileId); DataFile file = dataFileService.find(fileId); if (file != null) { - if (isAccessAuthorized(file, apiToken)) { + if (isAccessAuthorized(file)) { logger.fine("adding datafile (id=" + file.getId() + ") to the download list of the ZippedDownloadInstance."); //downloadInstance.addDataFile(file); @@ -1436,8 +1410,8 @@ public Response requestFileAccess(@PathParam("id") String fileToRequestAccessId, List args = Arrays.asList(wr.getLocalizedMessage()); return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.fileAccess.failure.noUser", args)); } - - if (isAccessAuthorized(dataFile, getRequestApiKey())) { + //Already have access + if (isAccessAuthorized(dataFile)) { return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.requestAccess.failure.invalidRequest")); } @@ -1708,15 +1682,15 @@ public Response rejectFileAccess(@PathParam("id") String fileToRequestAccessId, // checkAuthorization is a convenience method; it calls the boolean method // isAccessAuthorized(), the actual workhorse, tand throws a 403 exception if not. - private void checkAuthorization(DataFile df, String apiToken) throws WebApplicationException { + private void checkAuthorization(DataFile df) throws WebApplicationException { - if (!isAccessAuthorized(df, apiToken)) { + if (!isAccessAuthorized(df)) { throw new ForbiddenException(); } } - private boolean isAccessAuthorized(DataFile df, String apiToken) { + private boolean isAccessAuthorized(DataFile df) { // First, check if the file belongs to a released Dataset version: boolean published = false; @@ -1787,37 +1761,41 @@ private boolean isAccessAuthorized(DataFile df, String apiToken) { } } - if (!restricted && !embargoed) { - // And if they are not published, they can still be downloaded, if the user + + + //The one case where we don't need to check permissions + if (!restricted && !embargoed && published) { + // If they are not published, they can still be downloaded, if the user // has the permission to view unpublished versions! (this case will // be handled below) - if (published) { - return true; - } + return true; } + //For permissions check decide if we havce a session user, or an API user User user = null; /** * Authentication/authorization: - * - * note that the fragment below - that retrieves the session object - * and tries to find the user associated with the session - is really - * for logging/debugging purposes only; for practical purposes, it - * would be enough to just call "permissionService.on(df).has(Permission.DownloadFile)" - * and the method does just that, tries to authorize for the user in - * the current session (or guest user, if no session user is available): */ - if (session != null) { + User apiTokenUser = null; + //If we get a non-GuestUser from findUserOrDie, use it. Otherwise, check the session + try { + logger.fine("calling apiTokenUser = findUserOrDie()..."); + apiTokenUser = findUserOrDie(); + } catch (WrappedResponse wr) { + logger.log(Level.FINE, "Message from findUserOrDie(): {0}", wr.getMessage()); + } + + if ((apiTokenUser instanceof GuestUser) && session != null) { if (session.getUser() != null) { - if (session.getUser().isAuthenticated()) { - user = session.getUser(); - } else { + user = session.getUser(); + apiTokenUser=null; + //Fine logging + if (!session.getUser().isAuthenticated()) { logger.fine("User associated with the session is not an authenticated user."); if (session.getUser() instanceof PrivateUrlUser) { logger.fine("User associated with the session is a PrivateUrlUser user."); - user = session.getUser(); } if (session.getUser() instanceof GuestUser) { logger.fine("User associated with the session is indeed a guest user."); @@ -1829,154 +1807,41 @@ private boolean isAccessAuthorized(DataFile df, String apiToken) { } else { logger.fine("Session is null."); } - - User apiTokenUser = null; - - if ((apiToken != null)&&(apiToken.length()!=64)) { - // We'll also try to obtain the user information from the API token, - // if supplied: - - try { - logger.fine("calling apiTokenUser = findUserOrDie()..."); - apiTokenUser = findUserOrDie(); - } catch (WrappedResponse wr) { - logger.log(Level.FINE, "Message from findUserOrDie(): {0}", wr.getMessage()); - } - - if (apiTokenUser == null) { - logger.warning("API token-based auth: Unable to find a user with the API token provided."); - } + //If we don't have a user, nothing more to do. (Note session could have returned GuestUser) + if (user == null && apiTokenUser == null) { + logger.warning("Unable to find a user via session or with a token."); + return false; } - - // OK, let's revisit the case of non-restricted files, this time in - // an unpublished version: + + // OK, let's revisit the case of non-restricted files, this time in + // an unpublished version: // (if (published) was already addressed above) - - if (!restricted && !embargoed) { + + DataverseRequest dvr = null; + if (apiTokenUser != null) { + dvr = createDataverseRequest(apiTokenUser); + } else { + // used in JSF context, user may be Guest + dvr = dvRequestService.getDataverseRequest(); + } + if (!published) { // and restricted or embargoed (implied by earlier processing) // If the file is not published, they can still download the file, if the user // has the permission to view unpublished versions: - - if ( user != null ) { - // used in JSF context - if (permissionService.requestOn(dvRequestService.getDataverseRequest(), df.getOwner()).has(Permission.ViewUnpublishedDataset)) { - // it's not unthinkable, that a null user (i.e., guest user) could be given - // the ViewUnpublished permission! - logger.log(Level.FINE, "Session-based auth: user {0} has access rights on the non-restricted, unpublished datafile.", user.getIdentifier()); - return true; - } - } - - if (apiTokenUser != null) { - // used in an API context - if (permissionService.requestOn( createDataverseRequest(apiTokenUser), df.getOwner()).has(Permission.ViewUnpublishedDataset)) { - logger.log(Level.FINE, "Token-based auth: user {0} has access rights on the non-restricted, unpublished datafile.", apiTokenUser.getIdentifier()); - return true; - } - } - // last option - guest user in either contexts - // Guset user is impled by the code above. - if ( permissionService.requestOn(dvRequestService.getDataverseRequest(), df.getOwner()).has(Permission.ViewUnpublishedDataset) ) { + if (permissionService.requestOn(dvr, df.getOwner()).has(Permission.ViewUnpublishedDataset)) { + // it's not unthinkable, that a GuestUser could be given + // the ViewUnpublished permission! + logger.log(Level.FINE, + "Session-based auth: user {0} has access rights on the non-restricted, unpublished datafile.", + dvr.getUser().getIdentifier()); return true; } - - } else { - - // OK, this is a restricted and/or embargoed file. - - boolean hasAccessToRestrictedBySession = false; - boolean hasAccessToRestrictedByToken = false; - - if (permissionService.on(df).has(Permission.DownloadFile)) { - // Note: PermissionServiceBean.on(Datafile df) will obtain the - // User from the Session object, just like in the code fragment - // above. That's why it's not passed along as an argument. - hasAccessToRestrictedBySession = true; - } else if (apiTokenUser != null && permissionService.requestOn(createDataverseRequest(apiTokenUser), df).has(Permission.DownloadFile)) { - hasAccessToRestrictedByToken = true; - } - - if (hasAccessToRestrictedBySession || hasAccessToRestrictedByToken) { - if (published) { - if (hasAccessToRestrictedBySession) { - if (user != null) { - logger.log(Level.FINE, "Session-based auth: user {0} is granted access to the restricted, published datafile.", user.getIdentifier()); - } else { - logger.fine("Session-based auth: guest user is granted access to the restricted, published datafile."); - } - } else { - logger.log(Level.FINE, "Token-based auth: user {0} is granted access to the restricted, published datafile.", apiTokenUser.getIdentifier()); - } - return true; - } else { - // if the file is NOT published, we will let them download the - // file ONLY if they also have the permission to view - // unpublished versions: - // Note that the code below does not allow a case where it is the - // session user that has the permission on the file, and the API token - // user with the ViewUnpublished permission, or vice versa! - if (hasAccessToRestrictedBySession) { - if (permissionService.on(df.getOwner()).has(Permission.ViewUnpublishedDataset)) { - if (user != null) { - logger.log(Level.FINE, "Session-based auth: user {0} is granted access to the restricted, unpublished datafile.", user.getIdentifier()); - } else { - logger.fine("Session-based auth: guest user is granted access to the restricted, unpublished datafile."); - } - return true; - } - } else { - if (apiTokenUser != null && permissionService.requestOn(createDataverseRequest(apiTokenUser), df.getOwner()).has(Permission.ViewUnpublishedDataset)) { - logger.log(Level.FINE, "Token-based auth: user {0} is granted access to the restricted, unpublished datafile.", apiTokenUser.getIdentifier()); - return true; - } - } - } - } - } + } else { // published and restricted and/or embargoed - - if ((apiToken != null)) { - // Will try to obtain the user information from the API token, - // if supplied: - - try { - logger.fine("calling user = findUserOrDie()..."); - user = findUserOrDie(); - } catch (WrappedResponse wr) { - logger.log(Level.FINE, "Message from findUserOrDie(): {0}", wr.getMessage()); - } - - if (user == null) { - logger.warning("API token-based auth: Unable to find a user with the API token provided."); - return false; - } - - - //Doesn't this ~duplicate logic above - if so, if there's a way to get here, I think it still works for embargoed files (you only get access if you have download permissions, and, if not published, also view unpublished) - if (permissionService.requestOn(createDataverseRequest(user), df).has(Permission.DownloadFile)) { - if (published) { - logger.log(Level.FINE, "API token-based auth: User {0} has rights to access the datafile.", user.getIdentifier()); - //Same case as line 1809 (and part of 1708 though when published you don't need the DownloadFile permission) - return true; - } else { - // if the file is NOT published, we will let them download the - // file ONLY if they also have the permission to view - // unpublished versions: - if (permissionService.requestOn(createDataverseRequest(user), df.getOwner()).has(Permission.ViewUnpublishedDataset)) { - logger.log(Level.FINE, "API token-based auth: User {0} has rights to access the (unpublished) datafile.", user.getIdentifier()); - //Same case as line 1843? - return true; - } else { - logger.log(Level.FINE, "API token-based auth: User {0} is not authorized to access the (unpublished) datafile.", user.getIdentifier()); - } - } - } else { - logger.log(Level.FINE, "API token-based auth: User {0} is not authorized to access the datafile.", user.getIdentifier()); + if (permissionService.requestOn(dvr, df).has(Permission.DownloadFile)) { + return true; } - - return false; - } - + } if (user != null) { logger.log(Level.FINE, "Session-based auth: user {0} has NO access rights on the requested datafile.", user.getIdentifier()); } @@ -1984,37 +1849,30 @@ private boolean isAccessAuthorized(DataFile df, String apiToken) { if (apiTokenUser != null) { logger.log(Level.FINE, "Token-based auth: user {0} has NO access rights on the requested datafile.", apiTokenUser.getIdentifier()); } - - if (user == null && apiTokenUser == null) { - logger.fine("Unauthenticated access: No guest access to the datafile."); - } - return false; } - private User findAPITokenUser(String apiToken) { + private User findAPITokenUser() { User apiTokenUser = null; - - if ((apiToken != null) && (apiToken.length() != 64)) { - // We'll also try to obtain the user information from the API token, - // if supplied: - - try { - logger.fine("calling apiTokenUser = findUserOrDie()..."); - apiTokenUser = findUserOrDie(); - return apiTokenUser; - } catch (WrappedResponse wr) { - logger.log(Level.FINE, "Message from findUserOrDie(): {0}", wr.getMessage()); - return null; + try { + logger.fine("calling apiTokenUser = findUserOrDie()..."); + apiTokenUser = findUserOrDie(); + if(apiTokenUser instanceof GuestUser) { + if(session!=null && session.getUser()!=null) { + //The apiTokenUser, if set, will override the sessionUser in permissions calcs, so set it to null if we have a session user + apiTokenUser=null; + } } - + return apiTokenUser; + } catch (WrappedResponse wr) { + logger.log(Level.FINE, "Message from findUserOrDie(): {0}", wr.getMessage()); + return null; } - return apiTokenUser; } - private URI handleCustomZipDownload(String customZipServiceUrl, String fileIds, String apiToken, User apiTokenUser, UriInfo uriInfo, HttpHeaders headers, boolean donotwriteGBResponse, boolean orig) throws WebApplicationException { + private URI handleCustomZipDownload(String customZipServiceUrl, String fileIds, User apiTokenUser, UriInfo uriInfo, HttpHeaders headers, boolean donotwriteGBResponse, boolean orig) throws WebApplicationException { String zipServiceKey = null; Timestamp timestamp = null; @@ -2031,7 +1889,7 @@ private URI handleCustomZipDownload(String customZipServiceUrl, String fileIds, for (int i = 0; i < fileIdParams.length; i++) { Long fileId = null; try { - fileId = new Long(fileIdParams[i]); + fileId = Long.parseLong(fileIdParams[i]); validIdCount++; } catch (NumberFormatException nfe) { fileId = null; @@ -2040,7 +1898,7 @@ private URI handleCustomZipDownload(String customZipServiceUrl, String fileIds, DataFile file = dataFileService.find(fileId); if (file != null) { validFileCount++; - if (isAccessAuthorized(file, apiToken)) { + if (isAccessAuthorized(file)) { logger.fine("adding datafile (id=" + file.getId() + ") to the download list of the ZippedDownloadInstance."); if (donotwriteGBResponse != true && file.isReleased()) { GuestbookResponse gbr = guestbookResponseService.initAPIGuestbookResponse(file.getOwner(), file, session, apiTokenUser); From 05345ba39688291d028af40497b1ada4368a1418 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 4 Nov 2022 17:03:51 +0100 Subject: [PATCH 088/173] feat(ct-base): make buildx/BuildKit use a shared state for builds Should speed up recurring builds a bit. --- modules/container-base/pom.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml index 67e2c2f9911..f8b59dcecaa 100644 --- a/modules/container-base/pom.xml +++ b/modules/container-base/pom.xml @@ -97,6 +97,7 @@ linux/arm64 linux/amd64 + ${project.build.directory}/buildx-state Dockerfile From e261e3701b1af286d5901e1a82f84fff525dcd74 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 4 Nov 2022 17:09:27 +0100 Subject: [PATCH 089/173] feat(ct-base): switch /docroot to /dv and add volumes #8932 - Instead of a /docroot, add a more generic /dv which is owned by payara:payara and can be used to either store data in a single volume using subfolders or use subfolders with different backing volumes. Anyway, data is not written to overlay FS this way. (As long as an app image points to this location) - Also define /secrets and /dumps as volumes, so data flowing into these locations is again not added to the overlay FS (which might cause severe damage in case of heap dumps!) - Document the different locations in the base image guide. - Remove the /docroot workaround for uploaded files. This will be solved at application level (either by moving the workaround there) or https://github.com/IQSS/dataverse/pull/8983 --- .../source/container/base-image.rst | 48 +++++++++++++++---- .../container-base/src/main/docker/Dockerfile | 19 +++----- 2 files changed, 45 insertions(+), 22 deletions(-) diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst index 197f4175538..8cf6af1f904 100644 --- a/doc/sphinx-guides/source/container/base-image.rst +++ b/doc/sphinx-guides/source/container/base-image.rst @@ -218,7 +218,16 @@ Locations +++++++++ This environment variables represent certain locations and might be reused in your scripts etc. -These variables aren't meant to be reconfigurable and reflect state in the filesystem layout! +All of these variables aren't meant to be reconfigurable and reflect state in the filesystem layout! + +**Writeable at build time:** + +The overlay filesystem of Docker and other container technologies is not meant to be used for any performance IO. +You should avoid *writing* data anywhere in the file tree at runtime, except for well known locations with mounted +volumes backing them (see below). + +The locations below are meant to be written to when you build a container image, either this base or anything +building upon it. You can also use these for references in scripts, etc. .. list-table:: :align: left @@ -245,10 +254,35 @@ These variables aren't meant to be reconfigurable and reflect state in the files * - ``DEPLOY_DIR`` - ``${HOME_DIR}/deployments`` - Any EAR or WAR file, exploded WAR directory etc are autodeployed on start - * - ``DOCROOT_DIR`` - - ``/docroot`` - - Mount a volume here to store i18n language bundle files, sitemaps, images for Dataverse collections, logos, - custom themes and stylesheets, etc here. You might need to replicate this data or place on shared file storage. + * - ``DOMAIN_DIR`` + - ``${PAYARA_DIR}/glassfish`` ``/domains/${DOMAIN_NAME}`` + - Path to root of the Payara domain applications will be deployed into. Usually ``${DOMAIN_NAME}`` will be ``domain1``. + + +**Writeable at runtime:** + +The locations below are defined as `Docker volumes `_ by the base image. +They will by default get backed by an "anonymous volume", but you can (and should) bind-mount a host directory or +named Docker volume in these places to avoid data loss, gain performance and/or use a network file system. + +**Notes:** +1. On Kubernetes you still need to provide volume definitions for these places in your deployment objects! +2. You should not write data into these locations at build time - it will be shadowed by the mounted volumes! + +.. list-table:: + :align: left + :width: 100 + :widths: 10 10 50 + :header-rows: 1 + + * - Env. variable + - Value + - Description + * - ``STORAGE_DIR`` + - ``/dv`` + - This place is writeable by the Payara user, making it usable as a place to store research data, customizations + or other. Images inheriting the base image should create distinct folders here, backed by different + mounted volumes. * - ``SECRETS_DIR`` - ``/secrets`` - Mount secrets or other here, being picked up automatically by @@ -258,10 +292,6 @@ These variables aren't meant to be reconfigurable and reflect state in the files - ``/dumps`` - Default location where heap dumps will be stored (see above). You should mount some storage here (disk or ephemeral). - * - ``DOMAIN_DIR`` - - ``${PAYARA_DIR}/glassfish`` ``/domains/${DOMAIN_NAME}`` - - Path to root of the Payara domain applications will be deployed into. Usually ``${DOMAIN_NAME}`` will be ``domain1``. - Exposed Ports diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile index cafeb2ffb59..07968e92359 100644 --- a/modules/container-base/src/main/docker/Dockerfile +++ b/modules/container-base/src/main/docker/Dockerfile @@ -1,4 +1,4 @@ -# Copyright 2019 Forschungszentrum Jülich GmbH +# Copyright 2022 Forschungszentrum Jülich GmbH # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -38,7 +38,7 @@ ENV PAYARA_DIR="${HOME_DIR}/appserver" \ SCRIPT_DIR="${HOME_DIR}/scripts" \ CONFIG_DIR="${HOME_DIR}/config" \ DEPLOY_DIR="${HOME_DIR}/deployments" \ - DOCROOT_DIR="/docroot" \ + STORAGE_DIR="/dv" \ SECRETS_DIR="/secrets" \ DUMPS_DIR="/dumps" \ PASSWORD_FILE="${HOME_DIR}/passwordFile" \ @@ -73,17 +73,19 @@ ARG GID=1000 USER root WORKDIR / SHELL ["/bin/bash", "-euo", "pipefail", "-c"] +# Mark these directories as mutuable data containers to avoid cluttering the images overlayfs at runtime. +VOLUME ${STORAGE_DIR} ${SECRETS_DIR} ${DUMPS_DIR} RUN < Date: Fri, 4 Nov 2022 17:10:47 +0100 Subject: [PATCH 090/173] ci(ct-base): switch some steps to run on push or schedule #8932 Instead of only running the steps to push images to Docker Hub on a Git push event, also make it possible to run them an anything not being a pull_request event. (Like a schedule) --- .github/workflows/container_base_push.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 82c7a376ae0..2520a7e9257 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -53,18 +53,18 @@ jobs: - name: Build base container image with local architecture run: mvn -f modules/container-base -Pct package - - if: ${{ github.event_name == 'push' }} # run only if this is a push - PRs have no access to secrets + - if: ${{ github.event_name != 'pull_request' }} # run only if this is not a pull request - PRs have no access to secrets name: Log in to the Container registry uses: docker/login-action@v1 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - - if: ${{ github.event_name == 'push' }} # run only if this is a push - multi-arch makes no sense with PR + - if: ${{ github.event_name != 'pull_request' }} # run only if this is not a pull request - multi-arch makes no sense with PR name: Set up QEMU for multi-arch builds uses: docker/setup-qemu-action@v2 - name: Re-set image tag based on branch if: ${{ github.ref == 'master' }} run: echo "IMAGE_TAG=release" - - if: ${{ github.event_name == 'push' }} # run only if this is a push - tag push will only succeed in upstream + - if: ${{ github.event_name != 'pull_request' }} # run only if this is not a pull request - tag push will only succeed in upstream name: Deploy multi-arch base container image to Docker Hub run: mvn -f modules/container-base -Pct deploy -Dbase.image.tag=${{ env.IMAGE_TAG }} -Ddocker.registry=${{ env.REGISTRY }} From fbfcaa4c5fec93dc2e8ea434497a700e5a047463 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 8 Nov 2022 16:30:39 +0100 Subject: [PATCH 091/173] docs,ci(ct-base): add and push README description to Docker Hub #8932 When pushing to Docker Hub from development, we now also push a short description with disclaimers, links to docs and license hints. --- .github/workflows/container_base_push.yml | 21 +++++++-- modules/container-base/README.md | 56 +++++++++++++++++++++++ 2 files changed, 74 insertions(+), 3 deletions(-) create mode 100644 modules/container-base/README.md diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 2520a7e9257..1ef8ba94e78 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -9,6 +9,7 @@ on: paths: - 'modules/container-base/**' - 'modules/dataverse-parent/pom.xml' + - '.github/workflows/container_base_push.yml' pull_request: branches: - 'develop' @@ -16,6 +17,7 @@ on: paths: - 'modules/container-base/**' - 'modules/dataverse-parent/pom.xml' + - '.github/workflows/container_base_push.yml' env: IMAGE_TAG: develop @@ -53,18 +55,31 @@ jobs: - name: Build base container image with local architecture run: mvn -f modules/container-base -Pct package - - if: ${{ github.event_name != 'pull_request' }} # run only if this is not a pull request - PRs have no access to secrets + # Run anything below only if this is not a pull request. + # Accessing, pushing tags etc. to DockerHub will only succeed in upstream because secrets. + + - if: ${{ github.event_name != 'pull_request' && github.ref == 'develop' }} + name: Push description to DockerHub + uses: peter-evans/dockerhub-description@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + repository: gdcc/base + short-description: "Dataverse Base Container image providing Payara application server and optimized configuration" + readme-filepath: ./modules/container-base/README.md + + - if: ${{ github.event_name != 'pull_request' }} name: Log in to the Container registry uses: docker/login-action@v1 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - - if: ${{ github.event_name != 'pull_request' }} # run only if this is not a pull request - multi-arch makes no sense with PR + - if: ${{ github.event_name != 'pull_request' }} name: Set up QEMU for multi-arch builds uses: docker/setup-qemu-action@v2 - name: Re-set image tag based on branch if: ${{ github.ref == 'master' }} run: echo "IMAGE_TAG=release" - - if: ${{ github.event_name != 'pull_request' }} # run only if this is not a pull request - tag push will only succeed in upstream + - if: ${{ github.event_name != 'pull_request' }} name: Deploy multi-arch base container image to Docker Hub run: mvn -f modules/container-base -Pct deploy -Dbase.image.tag=${{ env.IMAGE_TAG }} -Ddocker.registry=${{ env.REGISTRY }} diff --git a/modules/container-base/README.md b/modules/container-base/README.md new file mode 100644 index 00000000000..d6f93b14da7 --- /dev/null +++ b/modules/container-base/README.md @@ -0,0 +1,56 @@ +# Dataverse Base Container Image + +A "base image" offers you a pre-installed and pre-tuned application server to deploy Dataverse software to. +Adding basic functionality like executing scripts at container boot, monitoring, memory tweaks etc is all done +at this layer, to make the application image focus on the app itself. + +## Quick Reference + +**Maintained by:** + +This image is created, maintained and supported by the Dataverse community on a best-effort basis. + +**Where to find documentation:** + +The [Dataverse Container Guide - Base Image](https://guides.dataverse.org/en/latest/container/base-image.html) +provides in-depth information about content, building, tuning and so on for this image. + +**Where to get help and ask questions:** + +IQSS will not offer you support how to deploy or run it, please reach out to the community for help on using it. +You can join the Community Chat on Matrix at https://chat.dataverse.org or the Community Slack at +https://dataversecommunity.slack.com to ask for help and guidance. + +## Supported Image Tags + +This image is sourced within the main upstream code [repository of the Dataverse software](https://github.com/IQSS/dataverse). +Development and maintenance happens there (again, by the community). Community supported image tags are based on the two +most important branches: + +- `develop` representing the unstable state of affairs in Dataverse's development branch + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/develop/modules/container-base/src/main/docker/Dockerfile)) +- `release` representing the latest stable release in Dataverse's main branch + ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/master/modules/container-base/src/main/docker/Dockerfile)) + +Within the main repository, you may find the base image's files at `/modules/container-base`. +This Maven module uses the `Maven Docker Plugin `_ to build and ship the image. +You may use, extend, or alter this image to your liking and/or host in some different registry if you want to. + +**Supported architectures:** This image is created as a "multi-arch image", supporting the most common architectures +Dataverse usually runs on: AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2). + +## License + +Image content created by the community is licensed under [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0), +like the [main Dataverse project](https://github.com/IQSS/dataverse/blob/develop/LICENSE.md). + +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and limitations under the License. + +As with all Docker images, all images likely also contain other software which may be under other licenses (such as +[Payara Server](https://github.com/payara/Payara/blob/master/LICENSE.txt), Bash, etc from the base +distribution, along with any direct or indirect (Java) dependencies contained). + +As for any pre-built image usage, it is the image user's responsibility to ensure that any use of this image complies +with any relevant licenses for all software contained within. From 1241591eb171609542df9e218388f6bb71e7ae71 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 8 Nov 2022 16:31:33 +0100 Subject: [PATCH 092/173] docs(ct-base): add short intro to base image docs page #8932 Explain a bit (short!) what this image is and what to expect. --- doc/sphinx-guides/source/container/base-image.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst index 8cf6af1f904..8016ce95f27 100644 --- a/doc/sphinx-guides/source/container/base-image.rst +++ b/doc/sphinx-guides/source/container/base-image.rst @@ -4,8 +4,13 @@ Application Base Image .. contents:: |toctitle| :local: +A "base image" offers you a pre-installed and pre-tuned application server to deploy Dataverse software to. +Adding basic functionality like executing scripts at container boot, monitoring, memory tweaks etc is all done +at this layer, to make the application image focus on the app itself. + Within the main repository, you may find the base image's files at ``/modules/container-base``. This Maven module uses the `Maven Docker Plugin `_ to build and ship the image. +You may use, extend, or alter this image to your liking and/or host in some different registry if you want to. **NOTE: This image is created, maintained and supported by the Dataverse community on a best-effort basis.** IQSS will not offer you support how to deploy or run it, please reach out to the community for help on using it. From 22eb801f0a1dacaea2f34ea1a2864cf5d54f5365 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 8 Nov 2022 23:36:39 +0100 Subject: [PATCH 093/173] ci(ct-base): update action versions #8932 --- .github/workflows/container_base_push.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 1ef8ba94e78..519e135f944 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -38,15 +38,15 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Set up JDK ${{ matrix.jdk }} - uses: actions/setup-java@v2 + uses: actions/setup-java@v3 with: java-version: ${{ matrix.jdk }} distribution: 'adopt' - name: Cache Maven packages - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ~/.m2 key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} @@ -70,7 +70,7 @@ jobs: - if: ${{ github.event_name != 'pull_request' }} name: Log in to the Container registry - uses: docker/login-action@v1 + uses: docker/login-action@v2 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} From 7d4388ed5022e64a1db721160169d93a2c565007 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 8 Nov 2022 23:42:20 +0100 Subject: [PATCH 094/173] ci(ct-base): fix step if-conditions for branch names #8932 Github context offers ".ref" but we need ".ref_name" to match *just* the branch name. --- .github/workflows/container_base_push.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 519e135f944..5a7280ce3b1 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -58,7 +58,7 @@ jobs: # Run anything below only if this is not a pull request. # Accessing, pushing tags etc. to DockerHub will only succeed in upstream because secrets. - - if: ${{ github.event_name != 'pull_request' && github.ref == 'develop' }} + - if: ${{ github.event_name == 'push' && github.ref_name == 'develop' }} name: Push description to DockerHub uses: peter-evans/dockerhub-description@v3 with: @@ -78,7 +78,7 @@ jobs: name: Set up QEMU for multi-arch builds uses: docker/setup-qemu-action@v2 - name: Re-set image tag based on branch - if: ${{ github.ref == 'master' }} + if: ${{ github.ref_name == 'master' }} run: echo "IMAGE_TAG=release" - if: ${{ github.event_name != 'pull_request' }} name: Deploy multi-arch base container image to Docker Hub From 3d790aacc7ffd4f44e8fb9a4880400960b52b48d Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 8 Nov 2022 23:50:31 +0100 Subject: [PATCH 095/173] ci(ct-base): fix failing image pushes #8932 The login to the registry needs to be explicit otherwise pushes will fail to acquire the correct token and pushes are rejected with "insufficient_scope: authorization failed" --- .github/workflows/container_base_push.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml index 5a7280ce3b1..fc0a3564e50 100644 --- a/.github/workflows/container_base_push.yml +++ b/.github/workflows/container_base_push.yml @@ -72,6 +72,7 @@ jobs: name: Log in to the Container registry uses: docker/login-action@v2 with: + registry: ${{ env.REGISTRY }} username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - if: ${{ github.event_name != 'pull_request' }} From 609688092192e674686243096fcc45a9e4086826 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 9 Nov 2022 15:18:48 +0100 Subject: [PATCH 096/173] docs(ct-base): rephrase slightly to match wording in main index Co-authored-by: Benjamin Peuch --- doc/sphinx-guides/source/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/index.rst b/doc/sphinx-guides/source/index.rst index be32e94d80f..0cd01b8a5a7 100755 --- a/doc/sphinx-guides/source/index.rst +++ b/doc/sphinx-guides/source/index.rst @@ -31,7 +31,7 @@ The User Guide is further divided into primary activities: finding & using data, adding Datasets, administering dataverses or Datasets, and Dataset exploration/visualizations. Details on all of the above tasks can be found in the Users Guide. The Installation Guide is for people or organizations who want to host their -own Dataverse. The Container Guide adds to this information on container-based installations. +own Dataverse. The Container Guide gives information on how to deploy Dataverse with containers. The Developer Guide contains instructions for people who want to contribute to the Open Source Dataverse project or who want to modify the code to suit their own needs. Finally, the API Guide is for From 4a79dcbddde84251c4a975e3b858d00171ffef66 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 9 Nov 2022 15:25:33 +0100 Subject: [PATCH 097/173] docs(ct-base): apply some language tweaks to docs pages Co-authored-by: Benjamin Peuch --- doc/sphinx-guides/source/container/index.rst | 2 +- modules/container-base/README.md | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/sphinx-guides/source/container/index.rst b/doc/sphinx-guides/source/container/index.rst index f6c99bfc19e..6d22318ad03 100644 --- a/doc/sphinx-guides/source/container/index.rst +++ b/doc/sphinx-guides/source/container/index.rst @@ -9,7 +9,7 @@ Container Guide Running Dataverse software in containers is quite different than in a :doc:`classic installation <../installation/prep>`. -Both approaches have pros and cons. These days (2022) containers are very often used for development and testing, +Both approaches have pros and cons. These days, containers are very often used for development and testing, but there is an ever rising move for running applications in the cloud using container technology. **NOTE:** diff --git a/modules/container-base/README.md b/modules/container-base/README.md index d6f93b14da7..ce48eae8a65 100644 --- a/modules/container-base/README.md +++ b/modules/container-base/README.md @@ -1,7 +1,7 @@ # Dataverse Base Container Image A "base image" offers you a pre-installed and pre-tuned application server to deploy Dataverse software to. -Adding basic functionality like executing scripts at container boot, monitoring, memory tweaks etc is all done +Adding basic functionality like executing scripts at container boot, monitoring, memory tweaks, etc., is all done at this layer, to make the application image focus on the app itself. ## Quick Reference @@ -17,14 +17,14 @@ provides in-depth information about content, building, tuning and so on for this **Where to get help and ask questions:** -IQSS will not offer you support how to deploy or run it, please reach out to the community for help on using it. +IQSS will not offer you support how to deploy or run it. Please reach out to the community for help on using it. You can join the Community Chat on Matrix at https://chat.dataverse.org or the Community Slack at https://dataversecommunity.slack.com to ask for help and guidance. ## Supported Image Tags This image is sourced within the main upstream code [repository of the Dataverse software](https://github.com/IQSS/dataverse). -Development and maintenance happens there (again, by the community). Community supported image tags are based on the two +Development and maintenance happens there (again, by the community). Community-supported image tags are based on the two most important branches: - `develop` representing the unstable state of affairs in Dataverse's development branch @@ -32,7 +32,7 @@ most important branches: - `release` representing the latest stable release in Dataverse's main branch ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/master/modules/container-base/src/main/docker/Dockerfile)) -Within the main repository, you may find the base image's files at `/modules/container-base`. +Within the main repository, you may find the base image files at `/modules/container-base`. This Maven module uses the `Maven Docker Plugin `_ to build and ship the image. You may use, extend, or alter this image to your liking and/or host in some different registry if you want to. @@ -49,7 +49,7 @@ Unless required by applicable law or agreed to in writing, software distributed See the License for the specific language governing permissions and limitations under the License. As with all Docker images, all images likely also contain other software which may be under other licenses (such as -[Payara Server](https://github.com/payara/Payara/blob/master/LICENSE.txt), Bash, etc from the base +[Payara Server](https://github.com/payara/Payara/blob/master/LICENSE.txt), Bash, etc., from the base distribution, along with any direct or indirect (Java) dependencies contained). As for any pre-built image usage, it is the image user's responsibility to ensure that any use of this image complies From c4e5028928302b183530d23159ee5e0f807f08b0 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 10 Nov 2022 11:42:59 +0100 Subject: [PATCH 098/173] refactor(metadata): rename CodeMeta softwareVersion to codeVersion #7844 As the citation block already contains a compound field "software" with both "softwareName" and "softwareVersion", meant to describe software used to create the dataset, this name conflict must be resolved. By renaming to "codeVersion", the semantic is not changed, as this metadata block is about describing software deposits. As the termURI is explicitly set to "schema.org/softwareVersion" it remains compatible with OAI-ORE and other linked data usages. A future exporter for CodeMeta might require special attention for this field. --- scripts/api/data/metadatablocks/codemeta.tsv | 2 +- src/main/java/propertyFiles/codeMeta20.properties | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/api/data/metadatablocks/codemeta.tsv b/scripts/api/data/metadatablocks/codemeta.tsv index 029ca2355ec..3c872426387 100644 --- a/scripts/api/data/metadatablocks/codemeta.tsv +++ b/scripts/api/data/metadatablocks/codemeta.tsv @@ -1,7 +1,7 @@ #metadataBlock name dataverseAlias displayName blockURI codeMeta20 Software Metadata (CodeMeta v2.0) https://codemeta.github.io/terms/ #datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id termURI - softwareVersion Software Version Version of the software instance, usually following some convention like SemVer etc. e.g. 0.2.1 or 1.3 or 2021.1 etc text 0 #VALUE TRUE FALSE FALSE TRUE TRUE FALSE codeMeta20 https://schema.org/softwareVersion + codeVersion Software Version Version of the software instance, usually following some convention like SemVer etc. e.g. 0.2.1 or 1.3 or 2021.1 etc text 0 #VALUE TRUE FALSE FALSE TRUE TRUE FALSE codeMeta20 https://schema.org/softwareVersion developmentStatus Development Status Description of development status, e.g. work in progress (wip), active, etc. See repostatus.org for more information. text 1 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE codeMeta20 https://www.repostatus.org codeRepository Code Repository Link to the repository where the un-compiled, human readable code and related code is located (SVN, GitHub, CodePlex, institutional GitLab instance, Gitea, etc.). e.g. https://github.com/user/project url 2 #VALUE TRUE FALSE TRUE FALSE TRUE FALSE codeMeta20 https://schema.org/codeRepository applicationCategory Application Category Type of software application, e.g. Simulation, Analysis, Visualisation. text 3 #VALUE TRUE FALSE TRUE TRUE TRUE FALSE codeMeta20 https://schema.org/applicationCategory diff --git a/src/main/java/propertyFiles/codeMeta20.properties b/src/main/java/propertyFiles/codeMeta20.properties index e203c1e46e9..5f788df4e83 100644 --- a/src/main/java/propertyFiles/codeMeta20.properties +++ b/src/main/java/propertyFiles/codeMeta20.properties @@ -1,8 +1,8 @@ metadatablock.name=codeMeta20 metadatablock.displayName=Software Metadata (CodeMeta 2.0) -datasetfieldtype.softwareVersion.title=Software Version -datasetfieldtype.softwareVersion.description=Version of the software instance, usually following some convention like SemVer etc. -datasetfieldtype.softwareVersion.watermark=e.g. 0.2.1 or 1.3 or 2021.1 etc +datasetfieldtype.codeVersion.title=Software Version +datasetfieldtype.codeVersion.description=Version of the software instance, usually following some convention like SemVer etc. +datasetfieldtype.codeVersion.watermark=e.g. 0.2.1 or 1.3 or 2021.1 etc datasetfieldtype.developmentStatus.title=Development Status datasetfieldtype.developmentStatus.description=Description of development status, e.g. work in progress (wip), active, etc. See repostatus.org for more information. datasetfieldtype.developmentStatus.watermark= Development Status From d79b4aa3ad1f99ab61d0330462c41c36f478514c Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 10 Nov 2022 11:44:57 +0100 Subject: [PATCH 099/173] style(metadata): rephrase CodeMeta storage and memory requirements descriptions #7844 A slight rephrasing should make it easier to understand what is expected as content for these metadata fields. --- scripts/api/data/metadatablocks/codemeta.tsv | 4 ++-- src/main/java/propertyFiles/codeMeta20.properties | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/api/data/metadatablocks/codemeta.tsv b/scripts/api/data/metadatablocks/codemeta.tsv index 3c872426387..b65cf56b1af 100644 --- a/scripts/api/data/metadatablocks/codemeta.tsv +++ b/scripts/api/data/metadatablocks/codemeta.tsv @@ -18,8 +18,8 @@ softwareSuggestions Name & Version Name and version of the optional software/library dependency e.g. Sphinx 5.0.2 text 0 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE softwareSuggestionsItem codeMeta20 https://codemeta.github.io/terms/softwareSuggestions softwareSuggestionsInfoUrl Info URL Link to optional software/library homepage or documentation (ideally also versioned) e.g. https://www.sphinx-doc.org url 1 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE softwareSuggestionsItem codeMeta20 https://dataverse.org/schema/codeMeta20/softwareSuggestionsInfoUrl memoryRequirements Memory Requirements Minimum memory requirements. text 12 #VALUE TRUE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://schema.org/memoryRequirements - processorRequirements Processor Requirements Processor architecture required to run the application (e.g. IA64). text 13 #VALUE TRUE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://schema.org/processorRequirements - storageRequirements Storage Requirements Storage requirements (e.g. free space required). text 14 #VALUE TRUE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://schema.org/storageRequirements + processorRequirements Processor Requirements Processor architecture or other CPU requirements to run the application (e.g. IA64). text 13 #VALUE TRUE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://schema.org/processorRequirements + storageRequirements Storage Requirements Minimum storage requirements (e.g. free space required). text 14 #VALUE TRUE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://schema.org/storageRequirements permissions Permissions Permission(s) required to run the code (for example, a mobile app may require full internet access or may run only on wifi). text 15 #VALUE TRUE FALSE TRUE FALSE FALSE FALSE codeMeta20 https://schema.org/permissions softwareHelp Software Help/Documentation Link to help texts or documentation e.g. https://user.github.io/project/docs url 16 #VALUE FALSE FALSE TRUE FALSE TRUE FALSE codeMeta20 https://schema.org/softwareHelp readme Readme Link to the README of the project e.g. https://github.com/user/project/blob/main/README.md url 17 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE codeMeta20 https://codemeta.github.io/terms/readme diff --git a/src/main/java/propertyFiles/codeMeta20.properties b/src/main/java/propertyFiles/codeMeta20.properties index 5f788df4e83..92153ccb10a 100644 --- a/src/main/java/propertyFiles/codeMeta20.properties +++ b/src/main/java/propertyFiles/codeMeta20.properties @@ -52,10 +52,10 @@ datasetfieldtype.memoryRequirements.title=Memory Requirements datasetfieldtype.memoryRequirements.description=Minimum memory requirements. datasetfieldtype.memoryRequirements.watermark= datasetfieldtype.processorRequirements.title=Processor Requirements -datasetfieldtype.processorRequirements.description=Processor architecture required to run the application (e.g. IA64). +datasetfieldtype.processorRequirements.description=Processor architecture or other CPU requirements to run the application (e.g. IA64). datasetfieldtype.processorRequirements.watermark= datasetfieldtype.storageRequirements.title=Storage Requirements -datasetfieldtype.storageRequirements.description=Storage requirements (e.g. free space required). +datasetfieldtype.storageRequirements.description=Minimum storage requirements (e.g. free space required). datasetfieldtype.storageRequirements.watermark= datasetfieldtype.permissions.title=Permissions datasetfieldtype.permissions.description=Permission(s) required to run the code (for example, a mobile app may require full internet access or may run only on wifi). From 8d5edf23a13631e878c413e55c320cb704a579b5 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 10 Nov 2022 12:35:50 +0100 Subject: [PATCH 100/173] feat(metadata): add CodeMeta fields to Solr schema #7844 Adding the fields of the CodeMeta block to the Solr schema to enable quick usage of the fields (despite being flagged experimental in the guides). --- conf/solr/8.11.1/schema.xml | 48 ++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/conf/solr/8.11.1/schema.xml b/conf/solr/8.11.1/schema.xml index 63312ab5d40..2656abf0dc5 100644 --- a/conf/solr/8.11.1/schema.xml +++ b/conf/solr/8.11.1/schema.xml @@ -405,9 +405,31 @@ + + + + + + + + + + + + + + + + + + + + + + + - @@ -645,6 +667,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + - 5.0.0-RC1 + 5.0.0-RC2 1.15.0 From 3d1e98c5a9f5f755d8d78b6151b659fe2377f3ed Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Fri, 2 Dec 2022 13:27:40 -0500 Subject: [PATCH 107/173] this method was renamed in RC2 (#8843) --- .../harvest/server/xoai/DataverseXoaiItemRepository.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiItemRepository.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiItemRepository.java index faf3cf9ddc4..147d42648fa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiItemRepository.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiItemRepository.java @@ -49,7 +49,7 @@ public DataverseXoaiItemRepository (OAIRecordServiceBean recordService, DatasetS } @Override - public ItemIdentifier getItem(String identifier) throws IdDoesNotExistException { + public ItemIdentifier getItemIdentifier(String identifier) throws IdDoesNotExistException { // This method is called when ListMetadataFormats request specifies // the identifier, requesting the formats available for this specific record. // In our case, under the current implementation, we need to simply look From aeffa3b6fc13a029b70630d856b5f0373a333903 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 5 Dec 2022 20:41:24 -0500 Subject: [PATCH 108/173] a few extra oai tests (#8843) --- .../iq/dataverse/api/HarvestingServerIT.java | 222 +++++++++++++----- .../edu/harvard/iq/dataverse/api/UtilIT.java | 10 + 2 files changed, 176 insertions(+), 56 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java index fdd034ab12e..5355b57490d 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java @@ -10,7 +10,12 @@ import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import com.jayway.restassured.response.Response; import com.jayway.restassured.path.json.JsonPath; +import com.jayway.restassured.path.xml.XmlPath; +import com.jayway.restassured.path.xml.element.Node; import static edu.harvard.iq.dataverse.api.UtilIT.API_TOKEN_HTTP_HEADER; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; import javax.json.Json; import javax.json.JsonArray; import static javax.ws.rs.core.Response.Status.FORBIDDEN; @@ -24,18 +29,32 @@ import static org.junit.Assert.assertTrue; /** - * extremely minimal API tests for creating OAI sets. + * Tests for the Harvesting Server functionality + * Note that these test BOTH the proprietary Dataverse rest APIs for creating + * and managing sets, AND the OAI-PMH functionality itself. */ public class HarvestingServerIT { private static final Logger logger = Logger.getLogger(HarvestingServerIT.class.getCanonicalName()); + private static String normalUserAPIKey; + private static String adminUserAPIKey; + private static String singleSetDatasetIdentifier; + private static String singleSetDatasetPersistentId; + @BeforeClass public static void setUpClass() { RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); // enable harvesting server // Gave some thought to storing the original response, and resetting afterwards - but that appears to be more complexity than it's worth Response enableHarvestingServerResponse = UtilIT.setSetting(SettingsServiceBean.Key.OAIServerEnabled,"true"); + + // Create users: + setupUsers(); + + // Create and publish some datasets: + setupDatasets(); + } @AfterClass @@ -44,7 +63,7 @@ public static void afterClass() { Response enableHarvestingServerResponse = UtilIT.setSetting(SettingsServiceBean.Key.OAIServerEnabled,"false"); } - private void setupUsers() { + private static void setupUsers() { Response cu0 = UtilIT.createRandomUser(); normalUserAPIKey = UtilIT.getApiTokenFromResponse(cu0); Response cu1 = UtilIT.createRandomUser(); @@ -52,6 +71,40 @@ private void setupUsers() { Response u1a = UtilIT.makeSuperUser(un1); adminUserAPIKey = UtilIT.getApiTokenFromResponse(cu1); } + + private static void setupDatasets() { + // create dataverse: + Response createDataverseResponse = UtilIT.createRandomDataverse(adminUserAPIKey); + createDataverseResponse.prettyPrint(); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + // publish dataverse: + Response publishDataverse = UtilIT.publishDataverseViaNativeApi(dataverseAlias, adminUserAPIKey); + assertEquals(OK.getStatusCode(), publishDataverse.getStatusCode()); + + // create dataset: + Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, adminUserAPIKey); + createDatasetResponse.prettyPrint(); + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDatasetResponse); + + // retrieve the global id: + singleSetDatasetPersistentId = UtilIT.getDatasetPersistentIdFromResponse(createDatasetResponse); + + // publish dataset: + Response publishDataset = UtilIT.publishDatasetViaNativeApi(singleSetDatasetPersistentId, "major", adminUserAPIKey); + assertEquals(200, publishDataset.getStatusCode()); + + singleSetDatasetIdentifier = singleSetDatasetPersistentId.substring(singleSetDatasetPersistentId.lastIndexOf('/') + 1); + + logger.info("identifier: " + singleSetDatasetIdentifier); + + // Publish command is executed asynchronously, i.e. it may + // still be running after we received the OK from the publish API. + // The oaiExport step also requires the metadata exports to be done and this + // takes longer than just publish/reindex. + // So wait for all of this to finish. + UtilIT.sleepForReexport(singleSetDatasetPersistentId, adminUserAPIKey, 10); + } private String jsonForTestSpec(String name, String def) { String r = String.format("{\"name\":\"%s\",\"definition\":\"%s\"}", name, def);//description is optional @@ -63,20 +116,84 @@ private String jsonForEditSpec(String name, String def, String desc) { return r; } - private String normalUserAPIKey; - private String adminUserAPIKey; + private XmlPath validateOaiVerbResponse(Response oaiResponse, String verb) { + // confirm that the response is in fact XML: + XmlPath responseXmlPath = oaiResponse.getBody().xmlPath(); + assertNotNull(responseXmlPath); + + String dateString = responseXmlPath.getString("OAI-PMH.responseDate"); + assertNotNull(dateString); // TODO: validate that it's well-formatted! + logger.info("date string from the OAI output:"+dateString); + assertEquals("http://localhost:8080/oai", responseXmlPath.getString("OAI-PMH.request")); + assertEquals(verb, responseXmlPath.getString("OAI-PMH.request.@verb")); + return responseXmlPath; + } + + @Test + public void testOaiIdentify() { + // Run Identify: + Response identifyResponse = UtilIT.getOaiIdentify(); + assertEquals(OK.getStatusCode(), identifyResponse.getStatusCode()); + //logger.info("Identify response: "+identifyResponse.prettyPrint()); + + // Validate the response: + + XmlPath responseXmlPath = validateOaiVerbResponse(identifyResponse, "Identify"); + assertEquals("http://localhost:8080/oai", responseXmlPath.getString("OAI-PMH.Identify.baseURL")); + // Confirm that the server is reporting the correct parameters that + // our server implementation should be using: + assertEquals("2.0", responseXmlPath.getString("OAI-PMH.Identify.protocolVersion")); + assertEquals("transient", responseXmlPath.getString("OAI-PMH.Identify.deletedRecord")); + assertEquals("YYYY-MM-DDThh:mm:ssZ", responseXmlPath.getString("OAI-PMH.Identify.granularity")); + } + + @Test + public void testOaiListMetadataFormats() { + // Run ListMeatadataFormats: + Response listFormatsResponse = UtilIT.getOaiListMetadataFormats(); + assertEquals(OK.getStatusCode(), listFormatsResponse.getStatusCode()); + //logger.info("ListMetadataFormats response: "+listFormatsResponse.prettyPrint()); + + // Validate the response: + + XmlPath responseXmlPath = validateOaiVerbResponse(listFormatsResponse, "ListMetadataFormats"); + + // Check the payload of the response atgainst the list of metadata formats + // we are currently offering under OAI; will need to be explicitly + // modified if/when we add more harvestable formats. + + List listFormats = responseXmlPath.getList("OAI-PMH.ListMetadataFormats.metadataFormat"); + + assertNotNull(listFormats); + assertEquals(5, listFormats.size()); + + // The metadata formats are reported in an unpredictable ordder. We + // want to sort the prefix names for comparison purposes, and for that + // they need to be saved in a modifiable list: + List metadataPrefixes = new ArrayList<>(); + + for (int i = 0; i < listFormats.size(); i++) { + metadataPrefixes.add(responseXmlPath.getString("OAI-PMH.ListMetadataFormats.metadataFormat["+i+"].metadataPrefix")); + } + Collections.sort(metadataPrefixes); + + assertEquals("[Datacite, dataverse_json, oai_datacite, oai_dc, oai_ddi]", metadataPrefixes.toString()); + + } + + @Test - public void testSetCreation() { - setupUsers(); + public void testSetCreateAPIandOAIlistIdentifiers() { + // Create the set with Dataverse /api/harvest/server API: String setName = UtilIT.getRandomString(6); String def = "*"; // make sure the set does not exist - String u0 = String.format("/api/harvest/server/oaisets/%s", setName); + String setPath = String.format("/api/harvest/server/oaisets/%s", setName); String createPath ="/api/harvest/server/oaisets/add"; Response r0 = given() - .get(u0); + .get(setPath); assertEquals(404, r0.getStatusCode()); // try to create set as normal user, should fail @@ -94,7 +211,7 @@ public void testSetCreation() { assertEquals(201, r2.getStatusCode()); Response getSet = given() - .get(u0); + .get(setPath); logger.info("getSet.getStatusCode(): " + getSet.getStatusCode()); logger.info("getSet printresponse: " + getSet.prettyPrint()); @@ -118,17 +235,19 @@ public void testSetCreation() { Response r4 = UtilIT.exportOaiSet(setName); assertEquals(200, r4.getStatusCode()); - // try to delete as normal user should fail + + + // try to delete as normal user, should fail Response r5 = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, normalUserAPIKey) - .delete(u0); + .delete(setPath); logger.info("r5.getStatusCode(): " + r5.getStatusCode()); assertEquals(400, r5.getStatusCode()); - // try to delete as admin user should work + // try to delete as admin user, should work Response r6 = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) - .delete(u0); + .delete(setPath); logger.info("r6.getStatusCode(): " + r6.getStatusCode()); assertEquals(200, r6.getStatusCode()); @@ -136,7 +255,7 @@ public void testSetCreation() { @Test public void testSetEdit() { - setupUsers(); + //setupUsers(); String setName = UtilIT.getRandomString(6); String def = "*"; @@ -195,46 +314,17 @@ public void testSetEdit() { // OAI set with that one dataset, and attempt to retrieve the OAI record // with GetRecord. @Test - public void testOaiFunctionality() throws InterruptedException { + public void testSingleRecordOaiSet() throws InterruptedException { - setupUsers(); - - // create dataverse: - Response createDataverseResponse = UtilIT.createRandomDataverse(adminUserAPIKey); - createDataverseResponse.prettyPrint(); - String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + //setupUsers(); - // publish dataverse: - Response publishDataverse = UtilIT.publishDataverseViaNativeApi(dataverseAlias, adminUserAPIKey); - assertEquals(OK.getStatusCode(), publishDataverse.getStatusCode()); - - // create dataset: - Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, adminUserAPIKey); - createDatasetResponse.prettyPrint(); - Integer datasetId = UtilIT.getDatasetIdFromResponse(createDatasetResponse); - - // retrieve the global id: - String datasetPersistentId = UtilIT.getDatasetPersistentIdFromResponse(createDatasetResponse); - - // publish dataset: - Response publishDataset = UtilIT.publishDatasetViaNativeApi(datasetPersistentId, "major", adminUserAPIKey); - assertEquals(200, publishDataset.getStatusCode()); - - String identifier = datasetPersistentId.substring(datasetPersistentId.lastIndexOf('/') + 1); - - logger.info("identifier: " + identifier); + - // Let's try and create an OAI set with the dataset we have just - // created and published: - // - however, publish command is executed asynchronously, i.e. it may - // still be running after we received the OK from the publish API. - // The oaiExport step also requires the metadata exports to be done and this - // takes longer than just publish/reindex. - // So wait for all of this to finish. - UtilIT.sleepForReexport(datasetPersistentId, adminUserAPIKey, 10); + // Let's try and create an OAI set with the "single set dataset" that + // was created as part of the initial setup: - String setName = identifier; - String setQuery = "dsPersistentId:" + identifier; + String setName = singleSetDatasetIdentifier; + String setQuery = "dsPersistentId:" + singleSetDatasetIdentifier; String apiPath = String.format("/api/harvest/server/oaisets/%s", setName); String createPath ="/api/harvest/server/oaisets/add"; Response createSetResponse = given() @@ -277,12 +367,18 @@ public void testOaiFunctionality() throws InterruptedException { // There should be 1 and only 1 record in the response: assertEquals(1, ret.size()); // And the record should be the dataset we have just created: - assertEquals(datasetPersistentId, listIdentifiersResponse.getBody().xmlPath() + assertEquals(singleSetDatasetPersistentId, listIdentifiersResponse.getBody().xmlPath() .getString("OAI-PMH.ListIdentifiers.header.identifier")); break; } Thread.sleep(1000L); - } while (i")); // And now run GetRecord on the OAI record for the dataset: - Response getRecordResponse = UtilIT.getOaiRecord(datasetPersistentId, "oai_dc"); - - assertEquals(datasetPersistentId, getRecordResponse.getBody().xmlPath().getString("OAI-PMH.GetRecord.record.header.identifier")); + Response getRecordResponse = UtilIT.getOaiRecord(singleSetDatasetPersistentId, "oai_dc"); + + System.out.println("GetRecord response in its entirety: "+getRecordResponse.getBody().prettyPrint()); + System.out.println("one more time:"); + getRecordResponse.prettyPrint(); + + assertEquals(singleSetDatasetPersistentId, getRecordResponse.getBody().xmlPath().getString("OAI-PMH.GetRecord.record.header.identifier")); // TODO: // check the actual metadata payload of the OAI record more carefully? } + + // This test will attempt to create a set with multiple records (enough + // to trigger a paged response with a continuation token) and test its + // performance. + + + @Test + public void testMultiRecordOaiSet() throws InterruptedException { + + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 550d4ed1264..9fa47db167b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -2620,6 +2620,16 @@ static Response exportOaiSet(String setName) { return given().put(apiPath); } + static Response getOaiIdentify() { + String oaiVerbPath = "/oai?verb=Identify"; + return given().get(oaiVerbPath); + } + + static Response getOaiListMetadataFormats() { + String oaiVerbPath = "/oai?verb=ListMetadataFormats"; + return given().get(oaiVerbPath); + } + static Response getOaiRecord(String datasetPersistentId, String metadataFormat) { String apiPath = String.format("/oai?verb=GetRecord&identifier=%s&metadataPrefix=%s", datasetPersistentId, metadataFormat); return given().get(apiPath); From 4b60983e360b3ee4b5a50535b769852fc9ea67ef Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 6 Dec 2022 09:25:42 +0100 Subject: [PATCH 109/173] refactor(settings): remove unused Config var in SystemConfig #7000 --- src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index fe95f53d293..fc7fd7beb06 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -10,8 +10,6 @@ import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.validation.PasswordValidatorUtil; -import org.eclipse.microprofile.config.Config; -import org.eclipse.microprofile.config.ConfigProvider; import org.passay.CharacterRule; import javax.ejb.EJB; @@ -46,7 +44,6 @@ public class SystemConfig { private static final Logger logger = Logger.getLogger(SystemConfig.class.getCanonicalName()); - private static final Config config = ConfigProvider.getConfig(); @EJB SettingsServiceBean settingsService; @@ -133,7 +130,6 @@ public String getVersion(boolean withBuildNumber) { // It will default to read from microprofile-config.properties source, // which contains in the source a Maven property reference to ${project.version}. // When packaging the app to deploy it, Maven will replace this, rendering it a static entry. - // NOTE: MicroProfile Config will cache the entry for us in internal maps. String appVersion = JvmSettings.VERSION.lookup(); if (withBuildNumber) { From 711dc6362dc629269d7db5840eb13821fc978682 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 9 Dec 2022 10:39:44 -0500 Subject: [PATCH 110/173] extra metadata from NetCDF and HDF5 files in NcML format #9153 --- doc/release-notes/9153-extract-metadata.md | 1 + .../source/user/dataset-management.rst | 7 ++ .../edu/harvard/iq/dataverse/DatasetPage.java | 1 + .../iq/dataverse/EditDatafilesPage.java | 1 + .../datadeposit/MediaResourceManagerImpl.java | 1 + .../datasetutility/AddReplaceFileHelper.java | 2 + .../dataverse/ingest/IngestServiceBean.java | 64 ++++++++++++++++++- .../harvard/iq/dataverse/api/NetcdfIT.java | 57 +++++++++++++++++ 8 files changed, 133 insertions(+), 1 deletion(-) create mode 100644 doc/release-notes/9153-extract-metadata.md create mode 100644 src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java diff --git a/doc/release-notes/9153-extract-metadata.md b/doc/release-notes/9153-extract-metadata.md new file mode 100644 index 00000000000..ce4cc714805 --- /dev/null +++ b/doc/release-notes/9153-extract-metadata.md @@ -0,0 +1 @@ +For NetCDF and HDF5 files, an attempt will be made to extract metadata in NcML (XML) format and save it as an auxiliary file. diff --git a/doc/sphinx-guides/source/user/dataset-management.rst b/doc/sphinx-guides/source/user/dataset-management.rst index ec3bb392ce5..e891ca72880 100755 --- a/doc/sphinx-guides/source/user/dataset-management.rst +++ b/doc/sphinx-guides/source/user/dataset-management.rst @@ -299,6 +299,13 @@ Astronomy (FITS) Metadata found in the header section of `Flexible Image Transport System (FITS) files `_ are automatically extracted by the Dataverse Software, aggregated and displayed in the Astronomy Domain-Specific Metadata of the Dataset that the file belongs to. This FITS file metadata, is therefore searchable and browsable (facets) at the Dataset-level. +NetCDF and HDF5 +--------------- + +For NetCDF and HDF5 files, an attempt will be made to extract metadata in NcML_ (XML) format and save it as an auxiliary file. (See also :doc:`/developers/aux-file-support` in the Developer Guide.) + +.. _NcML: https://docs.unidata.ucar.edu/netcdf-java/current/userguide/ncml_overview.html + Compressed Files ---------------- diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 6e71f6c5042..b538aaca2c6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -3733,6 +3733,7 @@ public String save() { // Call Ingest Service one more time, to // queue the data ingest jobs for asynchronous execution: ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) session.getUser()); + ingestService.extractMetadata(dataset, (AuthenticatedUser) session.getUser()); //After dataset saved, then persist prov json data if(systemConfig.isProvCollectionEnabled()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index fc8df8681af..d045126a3aa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -1225,6 +1225,7 @@ public String save() { // queue the data ingest jobs for asynchronous execution: if (mode == FileEditMode.UPLOAD) { ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) session.getUser()); + ingestService.extractMetadata(dataset, (AuthenticatedUser) session.getUser()); } if (FileEditMode.EDIT == mode && Referrer.FILE == referrer && fileMetadatas.size() > 0) { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java index 5491024c73c..e8d25bb4148 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java @@ -373,6 +373,7 @@ DepositReceipt replaceOrAddFiles(String uri, Deposit deposit, AuthCredentials au } ingestService.startIngestJobsForDataset(dataset, user); + ingestService.extractMetadata(dataset, user); ReceiptGenerator receiptGenerator = new ReceiptGenerator(); String baseUrl = urlManager.getHostnamePlusBaseUrlPath(uri); diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index febbb249a91..5277d014430 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -1932,6 +1932,7 @@ private boolean step_100_startIngestJobs(){ // start the ingest! ingestService.startIngestJobsForDataset(dataset, dvRequest.getAuthenticatedUser()); msg("post ingest start"); + ingestService.extractMetadata(dataset, dvRequest.getAuthenticatedUser()); } return true; } @@ -2145,6 +2146,7 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { } //ingest job ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) authUser); + ingestService.extractMetadata(dataset, (AuthenticatedUser) authUser); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index b03bae618a4..e261efce642 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -20,6 +20,8 @@ package edu.harvard.iq.dataverse.ingest; +import edu.harvard.iq.dataverse.AuxiliaryFile; +import edu.harvard.iq.dataverse.AuxiliaryFileServiceBean; import edu.harvard.iq.dataverse.ControlledVocabularyValue; import edu.harvard.iq.dataverse.datavariable.VariableCategory; import edu.harvard.iq.dataverse.datavariable.VariableServiceBean; @@ -72,6 +74,7 @@ //import edu.harvard.iq.dvn.unf.*; import org.dataverse.unf.*; import java.io.BufferedInputStream; +import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; @@ -81,6 +84,7 @@ import java.nio.channels.FileChannel; import java.nio.channels.ReadableByteChannel; import java.nio.channels.WritableByteChannel; +import java.nio.charset.StandardCharsets; import java.nio.file.DirectoryStream; import java.nio.file.Files; import java.nio.file.Path; @@ -113,6 +117,9 @@ import javax.jms.QueueSession; import javax.jms.Message; import javax.faces.application.FacesMessage; +import javax.ws.rs.core.MediaType; +import ucar.nc2.NetcdfFile; +import ucar.nc2.NetcdfFiles; /** * @@ -134,6 +141,8 @@ public class IngestServiceBean { @EJB DataFileServiceBean fileService; @EJB + AuxiliaryFileServiceBean auxiliaryFileService; + @EJB SystemConfig systemConfig; @Resource(lookup = "java:app/jms/queue/ingest") @@ -343,6 +352,7 @@ public List saveAndAddFilesToDataset(DatasetVersion version, try { // FITS is the only type supported for metadata // extraction, as of now. -- L.A. 4.0 + // Consider adding other formats such as NetCDF/HDF5. dataFile.setContentType("application/fits"); metadataExtracted = extractMetadata(tempFileLocation, dataFile, version); } catch (IOException mex) { @@ -565,7 +575,58 @@ public int compare(DataFile d1, DataFile d2) { return sb.toString(); } - + // Note: There is another method called extractMetadata for FITS files. + public void extractMetadata(Dataset dataset, AuthenticatedUser user) { + for (DataFile dataFile : dataset.getFiles()) { + Path pathToLocalDataFile = null; + try { + pathToLocalDataFile = dataFile.getStorageIO().getFileSystemPath(); + } catch (IOException ex) { + logger.info("Exception calling dataAccess.getFileSystemPath: " + ex); + } + InputStream inputStream = null; + if (pathToLocalDataFile != null) { + try ( NetcdfFile netcdfFile = NetcdfFiles.open(pathToLocalDataFile.toString())) { + if (netcdfFile != null) { + // TODO: What should we pass as a URL to toNcml()? + String ncml = netcdfFile.toNcml("FIXME_URL"); + inputStream = new ByteArrayInputStream(ncml.getBytes(StandardCharsets.UTF_8)); + } else { + logger.info("NetcdfFiles.open() could open file id " + dataFile.getId() + " (null returned)."); + } + } catch (IOException ex) { + logger.info("NetcdfFiles.open() could open file id " + dataFile.getId() + ". Exception caught: " + ex); + } + } else { + logger.info("pathToLocalDataFile is null! Are you on S3? Metadata extraction from NetCDF/HDF5 is not yet available."); + // As a tabular file, we'll probably need to download the NetCDF/HDF5 files from S3 and then try to extra the metadata, + // unless we can get some sort of S3 interface working: + // https://docs.unidata.ucar.edu/netcdf-java/current/userguide/dataset_urls.html#object-stores + // If we need to download the file and extract only some of the bytes (hopefully the first bytes) here's the spec for NetCDF: + // https://docs.unidata.ucar.edu/netcdf-c/current/file_format_specifications.html + } + if (inputStream != null) { + // TODO: What should the tag be? + String formatTag = "ncml"; + // TODO: What should the version be? + String formatVersion = "0.1"; + // TODO: What should the origin be? + String origin = "myOrigin"; + boolean isPublic = true; + // TODO: What should the type be? + String type = "myType"; + // TODO: Does NcML have its own content type? (MIME type) + MediaType mediaType = new MediaType("text", "xml"); + try { + AuxiliaryFile auxFile = auxiliaryFileService.processAuxiliaryFile(inputStream, dataFile, formatTag, formatVersion, origin, isPublic, type, mediaType); + logger.info("Aux file extracted from NetCDF/HDF5 file saved: " + auxFile); + } catch (Exception ex) { + logger.info("exception throw calling processAuxiliaryFile: " + ex); + } + } + } + } + public void produceSummaryStatistics(DataFile dataFile, File generatedTabularFile) throws IOException { /* logger.info("Skipping summary statistics and UNF."); @@ -1159,6 +1220,7 @@ public boolean fileMetadataExtractable(DataFile dataFile) { * extractMetadata: * framework for extracting metadata from uploaded files. The results will * be used to populate the metadata of the Dataset to which the file belongs. + * Note that another method called extractMetadata creates aux files from data files. */ public boolean extractMetadata(String tempFileLocation, DataFile dataFile, DatasetVersion editVersion) throws IOException { boolean ingestSuccessful = false; diff --git a/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java b/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java new file mode 100644 index 00000000000..a83af514935 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java @@ -0,0 +1,57 @@ +package edu.harvard.iq.dataverse.api; + +import com.jayway.restassured.RestAssured; +import com.jayway.restassured.path.json.JsonPath; +import com.jayway.restassured.response.Response; +import java.io.IOException; +import static javax.ws.rs.core.Response.Status.CREATED; +import static javax.ws.rs.core.Response.Status.OK; +import org.junit.BeforeClass; +import org.junit.Test; + +public class NetcdfIT { + + @BeforeClass + public static void setUp() { + RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); + } + + @Test + public void testNmclFromNetcdf() throws IOException { + Response createUser = UtilIT.createRandomUser(); + createUser.then().assertThat().statusCode(OK.getStatusCode()); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + String username = UtilIT.getUsernameFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + createDataverseResponse.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response createDataset = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDataset.prettyPrint(); + createDataset.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset); + String datasetPid = UtilIT.getDatasetPersistentIdFromResponse(createDataset); + + String pathToFile = "src/test/resources/netcdf/madis-raob"; + + Response uploadFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, apiToken); + uploadFile.prettyPrint(); + uploadFile.then().assertThat().statusCode(OK.getStatusCode()); + + long fileId = JsonPath.from(uploadFile.body().asString()).getLong("data.files[0].dataFile.id"); + String tag = "ncml"; + String version = "0.1"; + + Response downloadNcml = UtilIT.downloadAuxFile(fileId, tag, version, apiToken); + //downloadNcml.prettyPrint(); // long output + downloadNcml.then().assertThat() + .statusCode(OK.getStatusCode()) + .contentType("text/xml; name=\"madis-raob.ncml_0.1.xml\";charset=UTF-8"); + } +} From c4f07f91446eedeee611a75537b3b90872817d0b Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Fri, 9 Dec 2022 17:57:29 -0500 Subject: [PATCH 111/173] more tests for the OAI server functionality (#8843) --- .../iq/dataverse/api/HarvestingServerIT.java | 349 ++++++++++++------ .../edu/harvard/iq/dataverse/api/UtilIT.java | 5 + 2 files changed, 243 insertions(+), 111 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java index 5355b57490d..d25ffd225d9 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java @@ -9,24 +9,18 @@ import org.junit.Test; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import com.jayway.restassured.response.Response; -import com.jayway.restassured.path.json.JsonPath; import com.jayway.restassured.path.xml.XmlPath; import com.jayway.restassured.path.xml.element.Node; -import static edu.harvard.iq.dataverse.api.UtilIT.API_TOKEN_HTTP_HEADER; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; -import javax.json.Json; -import javax.json.JsonArray; -import static javax.ws.rs.core.Response.Status.FORBIDDEN; import static javax.ws.rs.core.Response.Status.OK; import static org.hamcrest.CoreMatchers.equalTo; -import org.junit.Ignore; import java.util.List; -import static junit.framework.Assert.assertEquals; +//import static junit.framework.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertEquals; /** * Tests for the Harvesting Server functionality @@ -184,142 +178,204 @@ public void testOaiListMetadataFormats() { @Test - public void testSetCreateAPIandOAIlistIdentifiers() { - // Create the set with Dataverse /api/harvest/server API: + public void testNativeSetAPI() { String setName = UtilIT.getRandomString(6); String def = "*"; - - // make sure the set does not exist + + // This test focuses on the Create/List/Edit functionality of the + // Dataverse OAI Sets API (/api/harvest/server): + + // API Test 1. Make sure the set does not exist yet String setPath = String.format("/api/harvest/server/oaisets/%s", setName); String createPath ="/api/harvest/server/oaisets/add"; - Response r0 = given() + Response getSetResponse = given() .get(setPath); - assertEquals(404, r0.getStatusCode()); + assertEquals(404, getSetResponse.getStatusCode()); - // try to create set as normal user, should fail - Response r1 = given() + // API Test 2. Try to create set as normal user, should fail + Response createSetResponse = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, normalUserAPIKey) .body(jsonForTestSpec(setName, def)) .post(createPath); - assertEquals(400, r1.getStatusCode()); + assertEquals(400, createSetResponse.getStatusCode()); - // try to create set as admin user, should succeed - Response r2 = given() + // API Test 3. Try to create set as admin user, should succeed + createSetResponse = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) .body(jsonForTestSpec(setName, def)) .post(createPath); - assertEquals(201, r2.getStatusCode()); + assertEquals(201, createSetResponse.getStatusCode()); - Response getSet = given() - .get(setPath); + // API Test 4. Retrieve the set we've just created, validate the response + getSetResponse = given().get(setPath); - logger.info("getSet.getStatusCode(): " + getSet.getStatusCode()); - logger.info("getSet printresponse: " + getSet.prettyPrint()); - assertEquals(200, getSet.getStatusCode()); + System.out.println("getSetResponse.getStatusCode(): " + getSetResponse.getStatusCode()); + System.out.println("getSetResponse, full: " + getSetResponse.prettyPrint()); + assertEquals(200, getSetResponse.getStatusCode()); + + getSetResponse.then().assertThat() + .body("status", equalTo(AbstractApiBean.STATUS_OK)) + .body("data.definition", equalTo("*")) + .body("data.description", equalTo("")) + .body("data.name", equalTo(setName)); + + // API Test 5. Retrieve all sets, check that our new set is listed Response responseAll = given() .get("/api/harvest/server/oaisets"); - logger.info("responseAll.getStatusCode(): " + responseAll.getStatusCode()); - logger.info("responseAll printresponse: " + responseAll.prettyPrint()); + System.out.println("responseAll.getStatusCode(): " + responseAll.getStatusCode()); + System.out.println("responseAll full: " + responseAll.prettyPrint()); assertEquals(200, responseAll.getStatusCode()); - - // try to create set with same name as admin user, should fail - Response r3 = given() + assertTrue(responseAll.body().jsonPath().getList("data.oaisets").size() > 0); + assertTrue(responseAll.body().jsonPath().getList("data.oaisets.name").toString().contains(setName)); // todo: simplify + + // API Test 6. Try to create a set with the same name, should fail + createSetResponse = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) .body(jsonForTestSpec(setName, def)) .post(createPath); - assertEquals(400, r3.getStatusCode()); + assertEquals(400, createSetResponse.getStatusCode()); - // try to export set as admin user, should succeed (under admin API, not checking that normal user will fail) + // API Test 7. Try to export set as admin user, should succeed. Set export + // is under /api/admin, no need to try to access it as a non-admin user Response r4 = UtilIT.exportOaiSet(setName); assertEquals(200, r4.getStatusCode()); - - - - // try to delete as normal user, should fail - Response r5 = given() + + // API TEST 8. Try to delete the set as normal user, should fail + Response deleteResponse = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, normalUserAPIKey) .delete(setPath); - logger.info("r5.getStatusCode(): " + r5.getStatusCode()); - assertEquals(400, r5.getStatusCode()); + logger.info("deleteResponse.getStatusCode(): " + deleteResponse.getStatusCode()); + assertEquals(400, deleteResponse.getStatusCode()); - // try to delete as admin user, should work - Response r6 = given() + // API TEST 9. Delete as admin user, should work + deleteResponse = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) .delete(setPath); - logger.info("r6.getStatusCode(): " + r6.getStatusCode()); - assertEquals(200, r6.getStatusCode()); + logger.info("deleteResponse.getStatusCode(): " + deleteResponse.getStatusCode()); + assertEquals(200, deleteResponse.getStatusCode()); } @Test - public void testSetEdit() { - //setupUsers(); + public void testSetEditAPIandOAIlistSets() { + // This test focuses on testing the Edit functionality of the Dataverse + // OAI Set API and the ListSets method of the Dataverse OAI server. + + // Initial setup: crete a test set. + // Since the Create and List (POST and GET) functionality of the API + // is tested extensively in the previous test, we will not be paying + // as much attention to these methods, aside from confirming the + // expected HTTP result codes. + String setName = UtilIT.getRandomString(6); - String def = "*"; + String setDef = "*"; - // make sure the set does not exist - String u0 = String.format("/api/harvest/server/oaisets/%s", setName); + // Make sure the set does not exist + String setPath = String.format("/api/harvest/server/oaisets/%s", setName); String createPath ="/api/harvest/server/oaisets/add"; - Response r0 = given() - .get(u0); - assertEquals(404, r0.getStatusCode()); + Response getSetResponse = given() + .get(setPath); + assertEquals(404, getSetResponse.getStatusCode()); - // try to create set as admin user, should succeed - Response r1 = given() + // Create the set as admin user + Response createSetResponse = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) - .body(jsonForTestSpec(setName, def)) + .body(jsonForTestSpec(setName, setDef)) .post(createPath); - assertEquals(201, r1.getStatusCode()); + assertEquals(201, createSetResponse.getStatusCode()); + // I. Test the Modify/Edit (POST method) functionality of the + // Dataverse OAI Sets API - // try to edit as normal user should fail - Response r2 = given() + String newDefinition = "title:New"; + String newDescription = "updated"; + + // API Test 1. Try to modify the set as normal user, should fail + Response editSetResponse = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, normalUserAPIKey) - .body(jsonForEditSpec(setName, def,"")) - .put(u0); - logger.info("r2.getStatusCode(): " + r2.getStatusCode()); - assertEquals(400, r2.getStatusCode()); + .body(jsonForEditSpec(setName, setDef, "")) + .put(setPath); + logger.info("non-admin user editSetResponse.getStatusCode(): " + editSetResponse.getStatusCode()); + assertEquals(400, editSetResponse.getStatusCode()); - // try to edit as with blanks should fail - Response r3 = given() + // API Test 2. Try to modify as admin, but with invalid (empty) values, + // should fail + editSetResponse = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) .body(jsonForEditSpec(setName, "","")) - .put(u0); - logger.info("r3.getStatusCode(): " + r3.getStatusCode()); - assertEquals(400, r3.getStatusCode()); + .put(setPath); + logger.info("invalid values editSetResponse.getStatusCode(): " + editSetResponse.getStatusCode()); + assertEquals(400, editSetResponse.getStatusCode()); - // try to edit as with something should pass - Response r4 = given() + // API Test 3. Try to modify as admin, with sensible values + editSetResponse = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) - .body(jsonForEditSpec(setName, "newDef","newDesc")) - .put(u0); - logger.info("r4 Status code: " + r4.getStatusCode()); - logger.info("r4.prettyPrint(): " + r4.prettyPrint()); - assertEquals(OK.getStatusCode(), r4.getStatusCode()); - - logger.info("u0: " + u0); - // now delete it... - Response r6 = given() + .body(jsonForEditSpec(setName, newDefinition, newDescription)) + .put(setPath); + logger.info("admin user editSetResponse status code: " + editSetResponse.getStatusCode()); + logger.info("admin user editSetResponse.prettyPrint(): " + editSetResponse.prettyPrint()); + assertEquals(OK.getStatusCode(), editSetResponse.getStatusCode()); + + // API Test 4. List the set, confirm that the new values are shown + getSetResponse = given().get(setPath); + + System.out.println("getSetResponse.getStatusCode(): " + getSetResponse.getStatusCode()); + System.out.println("getSetResponse, full: " + getSetResponse.prettyPrint()); + assertEquals(200, getSetResponse.getStatusCode()); + + getSetResponse.then().assertThat() + .body("status", equalTo(AbstractApiBean.STATUS_OK)) + .body("data.definition", equalTo(newDefinition)) + .body("data.description", equalTo(newDescription)) + .body("data.name", equalTo(setName)); + + // II. Test the ListSets functionality of the OAI server + + Response listSetsResponse = UtilIT.getOaiListSets(); + + // 1. Validate the service section of the OAI response: + + XmlPath responseXmlPath = validateOaiVerbResponse(listSetsResponse, "ListSets"); + + // 2. Validate the payload of the response, by confirming that the set + // we created and modified, above, is being listed by the OAI server + // and its xml record is properly formatted + + List listSets = responseXmlPath.getList("OAI-PMH.ListSets.set.list()"); // TODO - maybe try it with findAll()? + assertNotNull(listSets); + assertTrue(listSets.size() > 0); + + Node foundSetNode = null; + for (Node setNode : listSets) { + + if (setName.equals(setNode.get("setName").toString())) { + foundSetNode = setNode; + break; + } + } + + assertNotNull("Newly-created set is not listed by the OAI server", foundSetNode); + assertEquals("Incorrect description in the ListSets entry", newDescription, foundSetNode.getPath("setDescription.metadata.element.field", String.class)); + + // ok, the xml record looks good! + + // Cleanup. Delete the set with the DELETE API + Response deleteSetResponse = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) - .delete(u0); - logger.info("r6.getStatusCode(): " + r6.getStatusCode()); - assertEquals(200, r6.getStatusCode()); + .delete(setPath); + assertEquals(200, deleteSetResponse.getStatusCode()); } - // A more elaborate test - we'll create and publish a dataset, then create an - // OAI set with that one dataset, and attempt to retrieve the OAI record - // with GetRecord. + // A more elaborate test - we will create and export an + // OAI set with a single dataset, and attempt to retrieve + // it and validate the OAI server responses of the corresponding + // ListIdentifiers, ListRecords and GetRecord methods. @Test public void testSingleRecordOaiSet() throws InterruptedException { - - //setupUsers(); - - - // Let's try and create an OAI set with the "single set dataset" that // was created as part of the initial setup: @@ -333,12 +389,18 @@ public void testSingleRecordOaiSet() throws InterruptedException { .post(createPath); assertEquals(201, createSetResponse.getStatusCode()); - // TODO: a) look up the set via native harvest/server api; - // b) look up the set via the OAI ListSets; - // export set: - // (this is asynchronous - so we should probably wait a little) + // The GET method of the oai set API, as well as the OAI ListSets + // method are tested extensively in another method in this class, so + // we'll skip checking those here. + + // Let's export the set. This is asynchronous - so we will try to + // wait a little - but in practice, everything potentially time-consuming + // must have been done when the dataset was exported, in the setup method. + Response exportSetResponse = UtilIT.exportOaiSet(setName); assertEquals(200, exportSetResponse.getStatusCode()); + Thread.sleep(1000L); + Response getSet = given() .get(apiPath); @@ -350,25 +412,38 @@ public void testSingleRecordOaiSet() throws InterruptedException { do { - // Run ListIdentifiers on this newly-created set: + // OAI Test 1. Run ListIdentifiers on this newly-created set: Response listIdentifiersResponse = UtilIT.getOaiListIdentifiers(setName, "oai_dc"); - List ret = listIdentifiersResponse.getBody().xmlPath().getList("OAI-PMH.ListIdentifiers.header"); - assertEquals(OK.getStatusCode(), listIdentifiersResponse.getStatusCode()); + + // Validate the service section of the OAI response: + XmlPath responseXmlPath = validateOaiVerbResponse(listIdentifiersResponse, "ListIdentifiers"); + + List ret = responseXmlPath.getList("OAI-PMH.ListIdentifiers.header"); assertNotNull(ret); - logger.info("setName: " + setName); + if (logger.isLoggable(Level.FINE)) { logger.info("listIdentifiersResponse.prettyPrint:..... "); listIdentifiersResponse.prettyPrint(); } - if (ret.size() != 1) { + if (ret.isEmpty()) { + // OK, we'll sleep for another second - provided it's been less + // than 10 sec. total. i++; } else { - // There should be 1 and only 1 record in the response: + // Validate the payload of the ListRecords response: + // a) There should be 1 and only 1 record in the response: assertEquals(1, ret.size()); - // And the record should be the dataset we have just created: - assertEquals(singleSetDatasetPersistentId, listIdentifiersResponse.getBody().xmlPath() + // b) The one record in it should be the dataset we have just created: + assertEquals(singleSetDatasetPersistentId, responseXmlPath .getString("OAI-PMH.ListIdentifiers.header.identifier")); + assertEquals(setName, responseXmlPath + .getString("OAI-PMH.ListIdentifiers.header.setSpec")); + assertNotNull(responseXmlPath.getString("OAI-PMH.ListIdentifiers.header.dateStamp")); + // TODO: validate the formatting of the date string in the record + // header, above! + + // ok, ListIdentifiers response looks valid. break; } Thread.sleep(1000L); @@ -379,34 +454,86 @@ public void testSingleRecordOaiSet() throws InterruptedException { // already happened during its publishing (we made sure to wait there). // Exporting the set should not take any time - but I'll keep that code // in place since it's not going to hurt. - L.A. + System.out.println("Waited " + i + " seconds for OIA export."); //Fail if we didn't find the exported record before the timeout assertTrue(i < maxWait); + + + // OAI Test 2. Run ListRecords, request oai_dc: Response listRecordsResponse = UtilIT.getOaiListRecords(setName, "oai_dc"); assertEquals(OK.getStatusCode(), listRecordsResponse.getStatusCode()); - List listRecords = listRecordsResponse.getBody().xmlPath().getList("OAI-PMH.ListRecords.record"); + + // Validate the service section of the OAI response: + + XmlPath responseXmlPath = validateOaiVerbResponse(listRecordsResponse, "ListRecords"); + + // Validate the payload of the response: + // (the header portion must be identical to that of ListIdentifiers above, + // plus the response must contain a metadata section with a valid oai_dc + // record) + + List listRecords = responseXmlPath.getList("OAI-PMH.ListRecords.record"); + // Same deal, there must be 1 record only in the set: assertNotNull(listRecords); assertEquals(1, listRecords.size()); - assertEquals(singleSetDatasetPersistentId, listRecordsResponse.getBody().xmlPath().getString("OAI-PMH.ListRecords.record[0].header.identifier")); - - // assert that Datacite format does not contain the XML prolog + // a) header section: + assertEquals(singleSetDatasetPersistentId, responseXmlPath.getString("OAI-PMH.ListRecords.record.header.identifier")); + assertEquals(setName, responseXmlPath + .getString("OAI-PMH.ListRecords.record.header.setSpec")); + assertNotNull(responseXmlPath.getString("OAI-PMH.ListRecords.record.header.dateStamp")); + // b) metadata section: + // in the metadata section we are showing the resolver url form of the doi: + String persistentIdUrl = singleSetDatasetPersistentId.replace("doi:", "https://doi.org/"); + assertEquals(persistentIdUrl, responseXmlPath.getString("OAI-PMH.ListRecords.record.metadata.dc.identifier")); + assertEquals("Darwin's Finches", responseXmlPath.getString("OAI-PMH.ListRecords.record.metadata.dc.title")); + assertEquals("Finch, Fiona", responseXmlPath.getString("OAI-PMH.ListRecords.record.metadata.dc.creator")); + assertEquals("Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.", + responseXmlPath.getString("OAI-PMH.ListRecords.record.metadata.dc.description")); + assertEquals("Medicine, Health and Life Sciences", + responseXmlPath.getString("OAI-PMH.ListRecords.record.metadata.dc.subject")); + // ok, looks legit! + + // OAI Test 3. + // Assert that Datacite format does not contain the XML prolog + // (this is a reference to a resolved issue; generally, harvestable XML + // exports must NOT contain the "")); - // And now run GetRecord on the OAI record for the dataset: - Response getRecordResponse = UtilIT.getOaiRecord(singleSetDatasetPersistentId, "oai_dc"); + // OAI Test 4. run and validate GetRecord response + Response getRecordResponse = UtilIT.getOaiRecord(singleSetDatasetPersistentId, "oai_dc"); System.out.println("GetRecord response in its entirety: "+getRecordResponse.getBody().prettyPrint()); - System.out.println("one more time:"); - getRecordResponse.prettyPrint(); + + // Validate the service section of the OAI response: + responseXmlPath = validateOaiVerbResponse(getRecordResponse, "GetRecord"); + + // Validate the payload of the response: + + // Note that for a set with a single record the output of ListRecrods is + // essentially identical to that of GetRecord! + // (we'll test a multi-record set in a different method) + // a) header section: + assertEquals(singleSetDatasetPersistentId, responseXmlPath.getString("OAI-PMH.GetRecord.record.header.identifier")); + assertEquals(setName, responseXmlPath + .getString("OAI-PMH.GetRecord.record.header.setSpec")); + assertNotNull(responseXmlPath.getString("OAI-PMH.GetRecord.record.header.dateStamp")); + // b) metadata section: + assertEquals(persistentIdUrl, responseXmlPath.getString("OAI-PMH.GetRecord.record.metadata.dc.identifier")); + assertEquals("Darwin's Finches", responseXmlPath.getString("OAI-PMH.GetRecord.record.metadata.dc.title")); + assertEquals("Finch, Fiona", responseXmlPath.getString("OAI-PMH.GetRecord.record.metadata.dc.creator")); + assertEquals("Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.", + responseXmlPath.getString("OAI-PMH.GetRecord.record.metadata.dc.description")); + assertEquals("Medicine, Health and Life Sciences", responseXmlPath.getString("OAI-PMH.GetRecord.record.metadata.dc.subject")); - assertEquals(singleSetDatasetPersistentId, getRecordResponse.getBody().xmlPath().getString("OAI-PMH.GetRecord.record.header.identifier")); + // ok, looks legit! - // TODO: - // check the actual metadata payload of the OAI record more carefully? } // This test will attempt to create a set with multiple records (enough diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 9fa47db167b..ac767279bd4 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -2630,6 +2630,11 @@ static Response getOaiListMetadataFormats() { return given().get(oaiVerbPath); } + static Response getOaiListSets() { + String oaiVerbPath = "/oai?verb=ListSets"; + return given().get(oaiVerbPath); + } + static Response getOaiRecord(String datasetPersistentId, String metadataFormat) { String apiPath = String.format("/oai?verb=GetRecord&identifier=%s&metadataPrefix=%s", datasetPersistentId, metadataFormat); return given().get(apiPath); From 9cbfa31d4489ed4ce6df6e37a0fecf92f3a77d18 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 12 Dec 2022 13:51:58 -0500 Subject: [PATCH 112/173] extra (extra tedious) server tests validating paging (resumptionToken) functionality of ListIdentifiers and ListRecords (#8843) --- .../iq/dataverse/api/HarvestingServerIT.java | 340 +++++++++++++++++- .../edu/harvard/iq/dataverse/api/UtilIT.java | 18 +- 2 files changed, 351 insertions(+), 7 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java index d25ffd225d9..3497c71e169 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java @@ -16,6 +16,8 @@ import static javax.ws.rs.core.Response.Status.OK; import static org.hamcrest.CoreMatchers.equalTo; import java.util.List; +import java.util.Set; +import java.util.HashSet; //import static junit.framework.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; @@ -35,6 +37,7 @@ public class HarvestingServerIT { private static String adminUserAPIKey; private static String singleSetDatasetIdentifier; private static String singleSetDatasetPersistentId; + private static List extraDatasetsIdentifiers = new ArrayList<>(); @BeforeClass public static void setUpClass() { @@ -98,6 +101,28 @@ private static void setupDatasets() { // takes longer than just publish/reindex. // So wait for all of this to finish. UtilIT.sleepForReexport(singleSetDatasetPersistentId, adminUserAPIKey, 10); + + // ... And let's create 4 more datasets for a multi-dataset experiment: + + for (int i = 0; i < 4; i++) { + // create dataset: + createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, adminUserAPIKey); + createDatasetResponse.prettyPrint(); + datasetId = UtilIT.getDatasetIdFromResponse(createDatasetResponse); + + // retrieve the global id: + String thisDatasetPersistentId = UtilIT.getDatasetPersistentIdFromResponse(createDatasetResponse); + + // publish dataset: + publishDataset = UtilIT.publishDatasetViaNativeApi(thisDatasetPersistentId, "major", adminUserAPIKey); + assertEquals(200, publishDataset.getStatusCode()); + + UtilIT.sleepForReexport(thisDatasetPersistentId, adminUserAPIKey, 10); + + extraDatasetsIdentifiers.add(thisDatasetPersistentId.substring(thisDatasetPersistentId.lastIndexOf('/') + 1)); + } + + } private String jsonForTestSpec(String name, String def) { @@ -423,16 +448,16 @@ public void testSingleRecordOaiSet() throws InterruptedException { assertNotNull(ret); if (logger.isLoggable(Level.FINE)) { - logger.info("listIdentifiersResponse.prettyPrint:..... "); - listIdentifiersResponse.prettyPrint(); + logger.info("listIdentifiersResponse.prettyPrint: " + + listIdentifiersResponse.prettyPrint()); } if (ret.isEmpty()) { // OK, we'll sleep for another second - provided it's been less // than 10 sec. total. i++; } else { - // Validate the payload of the ListRecords response: - // a) There should be 1 and only 1 record in the response: + // Validate the payload of the ListIdentifiers response: + // a) There should be 1 and only 1 item listed: assertEquals(1, ret.size()); // b) The one record in it should be the dataset we have just created: assertEquals(singleSetDatasetPersistentId, responseXmlPath @@ -537,12 +562,315 @@ public void testSingleRecordOaiSet() throws InterruptedException { } // This test will attempt to create a set with multiple records (enough - // to trigger a paged response with a continuation token) and test its - // performance. + // to trigger a paged respons) and test the resumption token functionality). + // Note that this test requires the OAI service to be configured with some + // non-default settings (the paging limits for ListIdentifiers and ListRecords + // must be set to something low, like 2). @Test public void testMultiRecordOaiSet() throws InterruptedException { + // Setup: Let's create a control OAI set with the 5 datasets created + // in the class init: + + String setName = UtilIT.getRandomString(6); + String setQuery = "(dsPersistentId:" + singleSetDatasetIdentifier; + for (String persistentId : extraDatasetsIdentifiers) { + setQuery = setQuery.concat(" OR dsPersistentId:" + persistentId); + } + setQuery = setQuery.concat(")"); + + String createPath = "/api/harvest/server/oaisets/add"; + + Response createSetResponse = given() + .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) + .body(jsonForTestSpec(setName, setQuery)) + .post(createPath); + assertEquals(201, createSetResponse.getStatusCode()); + + // Dataverse OAI Sets API is tested extensively in other methods here, + // so no need to test in any more details than confirming the OK result + // above + Response exportSetResponse = UtilIT.exportOaiSet(setName); + assertEquals(200, exportSetResponse.getStatusCode()); + Thread.sleep(1000L); + + // OAI Test 1. Run ListIdentifiers on the set we've just created: + Response listIdentifiersResponse = UtilIT.getOaiListIdentifiers(setName, "oai_dc"); + assertEquals(OK.getStatusCode(), listIdentifiersResponse.getStatusCode()); + + // Validate the service section of the OAI response: + XmlPath responseXmlPath = validateOaiVerbResponse(listIdentifiersResponse, "ListIdentifiers"); + + List ret = responseXmlPath.getList("OAI-PMH.ListIdentifiers.header.identifier"); + assertNotNull(ret); + + if (logger.isLoggable(Level.FINE)) { + logger.info("listIdentifiersResponse.prettyPrint: "+listIdentifiersResponse.prettyPrint()); + } + + // Validate the payload of the ListIdentifiers response: + // 1a) There should be 2 items listed: + assertEquals("Wrong number of items on the first ListIdentifiers page", + 2, ret.size()); + + // 1b) The response contains a resumptionToken for the next page of items: + String resumptionToken = responseXmlPath.getString("OAI-PMH.ListIdentifiers.resumptionToken"); + assertNotNull("No resumption token in the ListIdentifiers response", resumptionToken); + + // 1c) The total number of items in the set (5) is listed correctly: + assertEquals(5, responseXmlPath.getInt("OAI-PMH.ListIdentifiers.resumptionToken.@completeListSize")); + + // 1d) ... and the offset (cursor) is at the right position (0): + assertEquals(0, responseXmlPath.getInt("OAI-PMH.ListIdentifiers.resumptionToken.@cursor")); + + // The formatting of individual item records in the ListIdentifiers response + // is tested extensively in the previous test method, so we are not + // looking at them in such detail here; but we should record the + // identifiers listed, so that we can confirm that all the set is + // served as expected. + + Set persistentIdsInListIdentifiers = new HashSet<>(); + + for (String persistentId : ret) { + persistentIdsInListIdentifiers.add(persistentId.substring(persistentId.lastIndexOf('/') + 1)); + } + + // ok, let's move on to the next ListIdentifiers page: + // (we repeat the exact same checks as the above; minus the different + // expected offset) + + // OAI Test 2. Run ListIdentifiers with the resumptionToken obtained + // in the previous step: + + listIdentifiersResponse = UtilIT.getOaiListIdentifiersWithResumptionToken(resumptionToken); + assertEquals(OK.getStatusCode(), listIdentifiersResponse.getStatusCode()); + + // Validate the service section of the OAI response: + responseXmlPath = validateOaiVerbResponse(listIdentifiersResponse, "ListIdentifiers"); + + ret = responseXmlPath.getList("OAI-PMH.ListIdentifiers.header.identifier"); + assertNotNull(ret); + + if (logger.isLoggable(Level.FINE)) { + logger.info("listIdentifiersResponse.prettyPrint: "+listIdentifiersResponse.prettyPrint()); + } + + // Validate the payload of the ListIdentifiers response: + // 2a) There should still be 2 items listed: + assertEquals("Wrong number of items on the second ListIdentifiers page", + 2, ret.size()); + + // 2b) The response should contain a resumptionToken for the next page of items: + resumptionToken = responseXmlPath.getString("OAI-PMH.ListIdentifiers.resumptionToken"); + assertNotNull("No resumption token in the ListIdentifiers response", resumptionToken); + + // 2c) The total number of items in the set (5) is listed correctly: + assertEquals(5, responseXmlPath.getInt("OAI-PMH.ListIdentifiers.resumptionToken.@completeListSize")); + + // 2d) ... and the offset (cursor) is at the right position (2): + assertEquals(2, responseXmlPath.getInt("OAI-PMH.ListIdentifiers.resumptionToken.@cursor")); + + // Record the identifiers listed on this results page: + + for (String persistentId : ret) { + persistentIdsInListIdentifiers.add(persistentId.substring(persistentId.lastIndexOf('/') + 1)); + } + + // And now the next and the final ListIdentifiers page. + // This time around we should get an *empty* resumptionToken (indicating + // that there are no more results): + + // OAI Test 3. Run ListIdentifiers with the final resumptionToken + + listIdentifiersResponse = UtilIT.getOaiListIdentifiersWithResumptionToken(resumptionToken); + assertEquals(OK.getStatusCode(), listIdentifiersResponse.getStatusCode()); + + // Validate the service section of the OAI response: + responseXmlPath = validateOaiVerbResponse(listIdentifiersResponse, "ListIdentifiers"); + + ret = responseXmlPath.getList("OAI-PMH.ListIdentifiers.header.identifier"); + assertNotNull(ret); + + if (logger.isLoggable(Level.FINE)) { + logger.info("listIdentifiersResponse.prettyPrint: "+listIdentifiersResponse.prettyPrint()); + } + + // Validate the payload of the ListIdentifiers response: + // 3a) There should be only 1 item listed: + assertEquals("Wrong number of items on the final ListIdentifiers page", + 1, ret.size()); + + // 3b) The response contains a resumptionToken for the next page of items: + resumptionToken = responseXmlPath.getString("OAI-PMH.ListIdentifiers.resumptionToken"); + assertNotNull("No resumption token in the final ListIdentifiers response", resumptionToken); + assertTrue("Non-empty resumption token in the final ListIdentifiers response", "".equals(resumptionToken)); + + // 3c) The total number of items in the set (5) is still listed correctly: + assertEquals(5, responseXmlPath.getInt("OAI-PMH.ListIdentifiers.resumptionToken.@completeListSize")); + + // 3d) ... and the offset (cursor) is at the right position (4): + assertEquals(4, responseXmlPath.getInt("OAI-PMH.ListIdentifiers.resumptionToken.@cursor")); + // Record the last identifier listed on this final page: + persistentIdsInListIdentifiers.add(ret.get(0).substring(ret.get(0).lastIndexOf('/') + 1)); + + // Finally, let's confirm that the expected 5 datasets have been listed + // as part of this Set: + + boolean allDatasetsListed = true; + + allDatasetsListed = persistentIdsInListIdentifiers.contains(singleSetDatasetIdentifier); + for (String persistentId : extraDatasetsIdentifiers) { + allDatasetsListed = persistentIdsInListIdentifiers.contains(persistentId); + } + + assertTrue("Control datasets not properly listed in the paged ListIdentifiers response", + allDatasetsListed); + + // OK, it is safe to assume ListIdentifiers works as it should in page mode. + + // We will now repeat the exact same tests for ListRecords (again, no + // need to pay close attention to the formatting of the individual records, + // since that's tested in the previous test method, since our focus is + // testing the paging/resumptionToken functionality) + + // OAI Test 4. Run ListRecords on the set we've just created: + Response listRecordsResponse = UtilIT.getOaiListRecords(setName, "oai_dc"); + assertEquals(OK.getStatusCode(), listRecordsResponse.getStatusCode()); + + // Validate the service section of the OAI response: + responseXmlPath = validateOaiVerbResponse(listRecordsResponse, "ListRecords"); + + ret = responseXmlPath.getList("OAI-PMH.ListRecords.record.header.identifier"); + assertNotNull(ret); + + if (logger.isLoggable(Level.FINE)) { + logger.info("listRecordsResponse.prettyPrint: "+listRecordsResponse.prettyPrint()); + } + + // Validate the payload of the ListRecords response: + // 4a) There should be 2 items listed: + assertEquals("Wrong number of items on the first ListRecords page", + 2, ret.size()); + + // 4b) The response contains a resumptionToken for the next page of items: + resumptionToken = responseXmlPath.getString("OAI-PMH.ListRecords.resumptionToken"); + assertNotNull("No resumption token in the ListRecords response", resumptionToken); + + // 4c) The total number of items in the set (5) is listed correctly: + assertEquals(5, responseXmlPath.getInt("OAI-PMH.ListRecords.resumptionToken.@completeListSize")); + + // 4d) ... and the offset (cursor) is at the right position (0): + assertEquals(0, responseXmlPath.getInt("OAI-PMH.ListRecords.resumptionToken.@cursor")); + + Set persistentIdsInListRecords = new HashSet<>(); + + for (String persistentId : ret) { + persistentIdsInListRecords.add(persistentId.substring(persistentId.lastIndexOf('/') + 1)); + } + + // ok, let's move on to the next ListRecords page: + // (we repeat the exact same checks as the above; minus the different + // expected offset) + + // OAI Test 5. Run ListRecords with the resumptionToken obtained + // in the previous step: + + listRecordsResponse = UtilIT.getOaiListRecordsWithResumptionToken(resumptionToken); + assertEquals(OK.getStatusCode(), listRecordsResponse.getStatusCode()); + + // Validate the service section of the OAI response: + responseXmlPath = validateOaiVerbResponse(listRecordsResponse, "ListRecords"); + + ret = responseXmlPath.getList("OAI-PMH.ListRecords.record.header.identifier"); + assertNotNull(ret); + + if (logger.isLoggable(Level.FINE)) { + logger.info("listRecordsResponse.prettyPrint: "+listRecordsResponse.prettyPrint()); + } + + // Validate the payload of the ListRecords response: + // 4a) There should still be 2 items listed: + assertEquals("Wrong number of items on the second ListRecords page", + 2, ret.size()); + + // 4b) The response should contain a resumptionToken for the next page of items: + resumptionToken = responseXmlPath.getString("OAI-PMH.ListRecords.resumptionToken"); + assertNotNull("No resumption token in the ListRecords response", resumptionToken); + + // 4c) The total number of items in the set (5) is listed correctly: + assertEquals(5, responseXmlPath.getInt("OAI-PMH.ListRecords.resumptionToken.@completeListSize")); + + // 4d) ... and the offset (cursor) is at the right position (2): + assertEquals(2, responseXmlPath.getInt("OAI-PMH.ListRecords.resumptionToken.@cursor")); + + // Record the identifiers listed on this results page: + + for (String persistentId : ret) { + persistentIdsInListRecords.add(persistentId.substring(persistentId.lastIndexOf('/') + 1)); + } + + // And now the next and the final ListRecords page. + // This time around we should get an *empty* resumptionToken (indicating + // that there are no more results): + + // OAI Test 6. Run ListRecords with the final resumptionToken + + listRecordsResponse = UtilIT.getOaiListRecordsWithResumptionToken(resumptionToken); + assertEquals(OK.getStatusCode(), listRecordsResponse.getStatusCode()); + + // Validate the service section of the OAI response: + responseXmlPath = validateOaiVerbResponse(listRecordsResponse, "ListRecords"); + + ret = responseXmlPath.getList("OAI-PMH.ListRecords.record.header.identifier"); + assertNotNull(ret); + + if (logger.isLoggable(Level.FINE)) { + logger.info("listRecordsResponse.prettyPrint: "+listRecordsResponse.prettyPrint()); + } + + // Validate the payload of the ListRecords response: + // 6a) There should be only 1 item listed: + assertEquals("Wrong number of items on the final ListRecords page", + 1, ret.size()); + + // 6b) The response contains a resumptionToken for the next page of items: + resumptionToken = responseXmlPath.getString("OAI-PMH.ListRecords.resumptionToken"); + assertNotNull("No resumption token in the final ListRecords response", resumptionToken); + assertTrue("Non-empty resumption token in the final ListRecords response", "".equals(resumptionToken)); + + // 6c) The total number of items in the set (5) is still listed correctly: + assertEquals(5, responseXmlPath.getInt("OAI-PMH.ListRecords.resumptionToken.@completeListSize")); + + // 6d) ... and the offset (cursor) is at the right position (4): + assertEquals(4, responseXmlPath.getInt("OAI-PMH.ListRecords.resumptionToken.@cursor")); + + // Record the last identifier listed on this final page: + persistentIdsInListRecords.add(ret.get(0).substring(ret.get(0).lastIndexOf('/') + 1)); + + // Finally, let's confirm that the expected 5 datasets have been listed + // as part of this Set: + + allDatasetsListed = true; + + allDatasetsListed = persistentIdsInListRecords.contains(singleSetDatasetIdentifier); + for (String persistentId : extraDatasetsIdentifiers) { + allDatasetsListed = persistentIdsInListRecords.contains(persistentId); + } + + assertTrue("Control datasets not properly listed in the paged ListRecords response", + allDatasetsListed); + + // OK, it is safe to assume ListRecords works as it should in page mode + // as well. + + // And finally, let's delete the set + String setPath = String.format("/api/harvest/server/oaisets/%s", setName); + Response deleteResponse = given() + .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) + .delete(setPath); + logger.info("deleteResponse.getStatusCode(): " + deleteResponse.getStatusCode()); + assertEquals("Failed to delete the control multi-record set", 200, deleteResponse.getStatusCode()); } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index ac767279bd4..e669a268010 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -2641,7 +2641,18 @@ static Response getOaiRecord(String datasetPersistentId, String metadataFormat) } static Response getOaiListIdentifiers(String setName, String metadataFormat) { - String apiPath = String.format("/oai?verb=ListIdentifiers&set=%s&metadataPrefix=%s", setName, metadataFormat); + + String apiPath; + if (StringUtil.nonEmpty(setName)) { + apiPath = String.format("/oai?verb=ListIdentifiers&set=%s&metadataPrefix=%s", setName, metadataFormat); + } else { + apiPath = String.format("/oai?verb=ListIdentifiers&metadataPrefix=%s", metadataFormat); + } + return given().get(apiPath); + } + + static Response getOaiListIdentifiersWithResumptionToken(String resumptionToken) { + String apiPath = String.format("/oai?verb=ListIdentifiers&resumptionToken=%s", resumptionToken); return given().get(apiPath); } @@ -2649,6 +2660,11 @@ static Response getOaiListRecords(String setName, String metadataFormat) { String apiPath = String.format("/oai?verb=ListRecords&set=%s&metadataPrefix=%s", setName, metadataFormat); return given().get(apiPath); } + + static Response getOaiListRecordsWithResumptionToken(String resumptionToken) { + String apiPath = String.format("/oai?verb=ListRecords&resumptionToken=%s", resumptionToken); + return given().get(apiPath); + } static Response changeAuthenticatedUserIdentifier(String oldIdentifier, String newIdentifier, String apiToken) { Response response; From 395d605a8e156dd2ee295a8aa2a0892cad898617 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 12 Dec 2022 17:04:44 -0500 Subject: [PATCH 113/173] An automated test of an actual harvest (#8843) --- .../iq/dataverse/api/HarvestingClients.java | 31 +--- .../iq/dataverse/api/HarvestingClientsIT.java | 169 ++++++++++++++++-- .../iq/dataverse/api/HarvestingServerIT.java | 8 + 3 files changed, 164 insertions(+), 44 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java b/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java index 42534514b68..b75cb687c62 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java @@ -373,13 +373,13 @@ public Response startHarvestingJob(@PathParam("nickName") String clientNickname, } if (authenticatedUser == null || !authenticatedUser.isSuperuser()) { - return error(Response.Status.FORBIDDEN, "Only the Dataverse Admin user can run harvesting jobs"); + return error(Response.Status.FORBIDDEN, "Only admin users can run harvesting jobs"); } HarvestingClient harvestingClient = harvestingClientService.findByNickname(clientNickname); if (harvestingClient == null) { - return error(Response.Status.NOT_FOUND, "No such dataverse: "+clientNickname); + return error(Response.Status.NOT_FOUND, "No such client: "+clientNickname); } DataverseRequest dataverseRequest = createDataverseRequest(authenticatedUser); @@ -391,35 +391,8 @@ public Response startHarvestingJob(@PathParam("nickName") String clientNickname, return this.accepted(); } - // This GET shows the status of the harvesting run in progress for this - // client, if present: - // @GET - // @Path("{nickName}/run") - // TODO: - - // This DELETE kills the harvesting run in progress for this client, - // if present: - // @DELETE - // @Path("{nickName}/run") - // TODO: - - - - - /* Auxiliary, helper methods: */ - /* - @Deprecated - public static JsonArrayBuilder harvestingConfigsAsJsonArray(List harvestingDataverses) { - JsonArrayBuilder hdArr = Json.createArrayBuilder(); - - for (Dataverse hd : harvestingDataverses) { - hdArr.add(harvestingConfigAsJson(hd.getHarvestingClientConfig())); - } - return hdArr; - }*/ - public static JsonObjectBuilder harvestingConfigAsJson(HarvestingClient harvestingConfig) { if (harvestingConfig == null) { return null; diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java index 9eac3545e54..8fef360c68b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java @@ -1,34 +1,58 @@ package edu.harvard.iq.dataverse.api; import java.util.logging.Logger; +import java.util.logging.Level; import com.jayway.restassured.RestAssured; import static com.jayway.restassured.RestAssured.given; import org.junit.Test; import com.jayway.restassured.response.Response; +import static javax.ws.rs.core.Response.Status.CREATED; +import static javax.ws.rs.core.Response.Status.UNAUTHORIZED; +import static javax.ws.rs.core.Response.Status.ACCEPTED; +import static javax.ws.rs.core.Response.Status.OK; import static org.hamcrest.CoreMatchers.equalTo; -import static junit.framework.Assert.assertEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; import org.junit.BeforeClass; /** - * extremely minimal (for now) API tests for creating OAI clients. + * This class tests Harvesting Client functionality. + * Note that these methods test BOTH the proprietary Dataverse rest API for + * creating and managing harvesting clients, AND the underlining OAI-PMH + * harvesting functionality itself. I.e., we will use the Dataverse + * /api/harvest/clients/ api to run an actual harvest of a control set and + * then validate the resulting harvested content. */ public class HarvestingClientsIT { private static final Logger logger = Logger.getLogger(HarvestingClientsIT.class.getCanonicalName()); private static final String harvestClientsApi = "/api/harvest/clients/"; - private static final String harvestCollection = "root"; + private static final String rootCollection = "root"; private static final String harvestUrl = "https://demo.dataverse.org/oai"; private static final String archiveUrl = "https://demo.dataverse.org"; private static final String harvestMetadataFormat = "oai_dc"; private static final String archiveDescription = "RestAssured harvesting client test"; + private static final String controlOaiSet = "controlTestSet"; + private static final int datasetsInControlSet = 7; + private static String normalUserAPIKey; + private static String adminUserAPIKey; + private static String harvestCollectionAlias; @BeforeClass public static void setUpClass() { RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); + + // Create the users, an admin and a non-admin: + setupUsers(); + + // Create a collection that we will use to harvest remote content into: + setupCollection(); + } - private void setupUsers() { + private static void setupUsers() { Response cu0 = UtilIT.createRandomUser(); normalUserAPIKey = UtilIT.getApiTokenFromResponse(cu0); Response cu1 = UtilIT.createRandomUser(); @@ -36,13 +60,22 @@ private void setupUsers() { Response u1a = UtilIT.makeSuperUser(un1); adminUserAPIKey = UtilIT.getApiTokenFromResponse(cu1); } + + private static void setupCollection() { + Response createDataverseResponse = UtilIT.createRandomDataverse(adminUserAPIKey); + createDataverseResponse.prettyPrint(); + assertEquals(CREATED.getStatusCode(), createDataverseResponse.getStatusCode()); + + harvestCollectionAlias = UtilIT.getAliasFromResponse(createDataverseResponse); - private String normalUserAPIKey; - private String adminUserAPIKey; + // publish dataverse: + Response publishDataverse = UtilIT.publishDataverseViaNativeApi(harvestCollectionAlias, adminUserAPIKey); + assertEquals(OK.getStatusCode(), publishDataverse.getStatusCode()); + } @Test public void testCreateEditDeleteClient() { - setupUsers(); + //setupUsers(); String nickName = UtilIT.getRandomString(6); @@ -52,7 +85,7 @@ public void testCreateEditDeleteClient() { + "\"harvestUrl\":\"%s\"," + "\"archiveUrl\":\"%s\"," + "\"metadataFormat\":\"%s\"}", - harvestCollection, harvestUrl, archiveUrl, harvestMetadataFormat); + rootCollection, harvestUrl, archiveUrl, harvestMetadataFormat); // Try to create a client as normal user, should fail: @@ -61,7 +94,7 @@ public void testCreateEditDeleteClient() { .header(UtilIT.API_TOKEN_HTTP_HEADER, normalUserAPIKey) .body(clientJson) .post(clientApiPath); - assertEquals(401, rCreate.getStatusCode()); + assertEquals(UNAUTHORIZED.getStatusCode(), rCreate.getStatusCode()); // Try to create the same as admin user, should succeed: @@ -70,7 +103,7 @@ public void testCreateEditDeleteClient() { .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) .body(clientJson) .post(clientApiPath); - assertEquals(201, rCreate.getStatusCode()); + assertEquals(CREATED.getStatusCode(), rCreate.getStatusCode()); // Try to update the client we have just created: @@ -80,7 +113,7 @@ public void testCreateEditDeleteClient() { .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) .body(updateJson) .put(clientApiPath); - assertEquals(200, rUpdate.getStatusCode()); + assertEquals(OK.getStatusCode(), rUpdate.getStatusCode()); // Now let's retrieve the client we've just created and edited: @@ -89,7 +122,7 @@ public void testCreateEditDeleteClient() { logger.info("getClient.getStatusCode(): " + getClientResponse.getStatusCode()); logger.info("getClient printresponse: " + getClientResponse.prettyPrint()); - assertEquals(200, getClientResponse.getStatusCode()); + assertEquals(OK.getStatusCode(), getClientResponse.getStatusCode()); // ... and validate the values: @@ -98,7 +131,7 @@ public void testCreateEditDeleteClient() { .body("data.type", equalTo("oai")) .body("data.nickName", equalTo(nickName)) .body("data.archiveDescription", equalTo(archiveDescription)) - .body("data.dataverseAlias", equalTo(harvestCollection)) + .body("data.dataverseAlias", equalTo(rootCollection)) .body("data.harvestUrl", equalTo(harvestUrl)) .body("data.archiveUrl", equalTo(archiveUrl)) .body("data.metadataFormat", equalTo(harvestMetadataFormat)); @@ -109,7 +142,7 @@ public void testCreateEditDeleteClient() { .header(UtilIT.API_TOKEN_HTTP_HEADER, normalUserAPIKey) .delete(clientApiPath); logger.info("rDelete.getStatusCode(): " + rDelete.getStatusCode()); - assertEquals(401, rDelete.getStatusCode()); + assertEquals(UNAUTHORIZED.getStatusCode(), rDelete.getStatusCode()); // Try to delete as admin user should work: @@ -117,6 +150,112 @@ public void testCreateEditDeleteClient() { .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) .delete(clientApiPath); logger.info("rDelete.getStatusCode(): " + rDelete.getStatusCode()); - assertEquals(200, rDelete.getStatusCode()); + assertEquals(OK.getStatusCode(), rDelete.getStatusCode()); + } + + @Test + public void testHarvestingClientRun() throws InterruptedException { + // This test will create a client and attempt to perform an actual + // harvest and validate the resulting harvested content. + + // Setup: create the client via the API + // since this API is tested somewhat extensively in the previous + // method, we don't need to pay too much attention to this method, aside + // from confirming the expected HTTP status code. + + String nickName = UtilIT.getRandomString(6); + + String clientApiPath = String.format(harvestClientsApi+"%s", nickName); + String clientJson = String.format("{\"dataverseAlias\":\"%s\"," + + "\"type\":\"oai\"," + + "\"harvestUrl\":\"%s\"," + + "\"archiveUrl\":\"%s\"," + + "\"set\":\"%s\"," + + "\"metadataFormat\":\"%s\"}", + harvestCollectionAlias, harvestUrl, archiveUrl, controlOaiSet, harvestMetadataFormat); + + Response createResponse = given() + .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) + .body(clientJson) + .post(clientApiPath); + assertEquals(CREATED.getStatusCode(), createResponse.getStatusCode()); + + // API TEST 1. Run the harvest using the configuration (client) we have + // just created + + String runHarvestApiPath = String.format(harvestClientsApi+"%s/run", nickName); + + // TODO? - verify that a non-admin user cannot perform this operation (401) + + Response runResponse = given() + .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) + .post(runHarvestApiPath); + assertEquals(ACCEPTED.getStatusCode(), runResponse.getStatusCode()); + + // API TEST 2. As indicated by the ACCEPTED status code above, harvesting + // is an asynchronous operation that will be performed in the background. + // Verify that this "in progress" status is properly reported while it's + // running, and that it completes in some reasonable amount of time. + + int i = 0; + int maxWait=20; // a very conservative interval; this harvest has no business taking this long + do { + // keep checking the status of the client with the GET api: + Response getClientResponse = given() + .get(clientApiPath); + + assertEquals(OK.getStatusCode(), getClientResponse.getStatusCode()); + assertEquals(AbstractApiBean.STATUS_OK, getClientResponse.body().jsonPath().getString("status")); + + if (logger.isLoggable(Level.FINE)) { + logger.info("listIdentifiersResponse.prettyPrint: " + + getClientResponse.prettyPrint()); + } + + String clientStatus = getClientResponse.body().jsonPath().getString("data.status"); + assertNotNull(clientStatus); + + if ("inProgress".equals(clientStatus)) { + // we'll sleep for another second + i++; + } else { + // Check the values in the response: + // a) Confirm that the harvest has completed: + assertEquals("Unexpected client status: "+clientStatus, "inActive", clientStatus); + + // b) Confirm that it has actually succeeded: + assertEquals("Last harvest not reported a success", "SUCCESS", getClientResponse.body().jsonPath().getString("data.lastResult")); + String harvestTimeStamp = getClientResponse.body().jsonPath().getString("data.lastHarvest"); + assertNotNull(harvestTimeStamp); + + // c) Confirm that the other timestamps match: + assertEquals(harvestTimeStamp, getClientResponse.body().jsonPath().getString("data.lastSuccessful")); + assertEquals(harvestTimeStamp, getClientResponse.body().jsonPath().getString("data.lastNonEmpty")); + + // d) Confirm that the correct number of datasets have been harvested: + assertEquals(datasetsInControlSet, getClientResponse.body().jsonPath().getInt("data.lastDatasetsHarvested")); + + // ok, it looks like the harvest has completed successfully. + break; + } + Thread.sleep(1000L); + } while (i Date: Mon, 12 Dec 2022 17:10:35 -0500 Subject: [PATCH 114/173] comments (#8843) --- .../iq/dataverse/api/HarvestingClientsIT.java | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java index 8fef360c68b..448faa20b0b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java @@ -75,7 +75,9 @@ private static void setupCollection() { @Test public void testCreateEditDeleteClient() { - //setupUsers(); + // This method focuses on testing the native Dataverse harvesting client + // API. + String nickName = UtilIT.getRandomString(6); @@ -158,7 +160,7 @@ public void testHarvestingClientRun() throws InterruptedException { // This test will create a client and attempt to perform an actual // harvest and validate the resulting harvested content. - // Setup: create the client via the API + // Setup: create the client via native API // since this API is tested somewhat extensively in the previous // method, we don't need to pay too much attention to this method, aside // from confirming the expected HTTP status code. @@ -246,8 +248,11 @@ public void testHarvestingClientRun() throws InterruptedException { // Fail if it hasn't completed in maxWait seconds assertTrue(i < maxWait); - // TODO: use the native Dataverses/Datasets apis to verify that the expected - // datasets have been harvested. + // TODO(?) use the native Dataverses/Datasets apis to verify that the expected + // datasets have been harvested. This may or may not be necessary, seeing + // how we have already confirmed the number of successfully harvested + // datasets from the control set; somewhat hard to imagine a practical + // situation where that would not be enough (?). // Cleanup: delete the client From 9dcbfa05de4316cc3c5560e5350a1f46ebf30d4a Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Tue, 13 Dec 2022 17:57:03 +0100 Subject: [PATCH 115/173] revert(metadata): remove CodeMeta fields from Solr schema #7844 This reverts commit 8d5edf23a13631e878c413e55c320cb704a579b5. @IQSS decided we will not include fields from experimental blocks in the schema. --- conf/solr/8.11.1/schema.xml | 48 +------------------------------------ 1 file changed, 1 insertion(+), 47 deletions(-) diff --git a/conf/solr/8.11.1/schema.xml b/conf/solr/8.11.1/schema.xml index 2656abf0dc5..63312ab5d40 100644 --- a/conf/solr/8.11.1/schema.xml +++ b/conf/solr/8.11.1/schema.xml @@ -405,31 +405,9 @@ - - - - - - - - - - - - - - - - - - - - - - - + @@ -667,30 +645,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - + SAML2 SAML1 From 230298902fbb7296c9623a355e66e72302f83174 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 19 Jan 2023 10:00:33 -0500 Subject: [PATCH 173/173] rename sql scripts #9153 "Use a version like '4.11.0.1' in the example above where the previously released version was 4.11" -- dev guide That is, these scripts should have been 5.12.1.whatever since the last release was 5.12.1. Fixing. (They were 5.13.whatever.) --- ...-sorting_licenses.sql => V5.12.1.1__8671-sorting_licenses.sql} | 0 ...ls-for-tools.sql => V5.12.1.2__7715-signed-urls-for-tools.sql} | 0 ...imates.sql => V5.12.1.3__8840-improve-guestbook-estimates.sql} | 0 ...-extract-metadata.sql => V5.12.1.4__9153-extract-metadata.sql} | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename src/main/resources/db/migration/{V5.13.0.1__8671-sorting_licenses.sql => V5.12.1.1__8671-sorting_licenses.sql} (100%) rename src/main/resources/db/migration/{V5.13.0.2__7715-signed-urls-for-tools.sql => V5.12.1.2__7715-signed-urls-for-tools.sql} (100%) rename src/main/resources/db/migration/{V5.13.0.3__8840-improve-guestbook-estimates.sql => V5.12.1.3__8840-improve-guestbook-estimates.sql} (100%) rename src/main/resources/db/migration/{V5.13.0.3__9153-extract-metadata.sql => V5.12.1.4__9153-extract-metadata.sql} (100%) diff --git a/src/main/resources/db/migration/V5.13.0.1__8671-sorting_licenses.sql b/src/main/resources/db/migration/V5.12.1.1__8671-sorting_licenses.sql similarity index 100% rename from src/main/resources/db/migration/V5.13.0.1__8671-sorting_licenses.sql rename to src/main/resources/db/migration/V5.12.1.1__8671-sorting_licenses.sql diff --git a/src/main/resources/db/migration/V5.13.0.2__7715-signed-urls-for-tools.sql b/src/main/resources/db/migration/V5.12.1.2__7715-signed-urls-for-tools.sql similarity index 100% rename from src/main/resources/db/migration/V5.13.0.2__7715-signed-urls-for-tools.sql rename to src/main/resources/db/migration/V5.12.1.2__7715-signed-urls-for-tools.sql diff --git a/src/main/resources/db/migration/V5.13.0.3__8840-improve-guestbook-estimates.sql b/src/main/resources/db/migration/V5.12.1.3__8840-improve-guestbook-estimates.sql similarity index 100% rename from src/main/resources/db/migration/V5.13.0.3__8840-improve-guestbook-estimates.sql rename to src/main/resources/db/migration/V5.12.1.3__8840-improve-guestbook-estimates.sql diff --git a/src/main/resources/db/migration/V5.13.0.3__9153-extract-metadata.sql b/src/main/resources/db/migration/V5.12.1.4__9153-extract-metadata.sql similarity index 100% rename from src/main/resources/db/migration/V5.13.0.3__9153-extract-metadata.sql rename to src/main/resources/db/migration/V5.12.1.4__9153-extract-metadata.sql