From 98f21eada0a8dbbffae6cf133dc038b0e55e1b74 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Mon, 17 May 2021 15:20:34 +0200
Subject: [PATCH 001/173] feat(metadata): add metadata block for CodeMeta #7844

---
 scripts/api/data/metadatablocks/codemeta.tsv | 40 ++++++++++++++++++++
 1 file changed, 40 insertions(+)
 create mode 100644 scripts/api/data/metadatablocks/codemeta.tsv
diff --git a/scripts/api/data/metadatablocks/codemeta.tsv b/scripts/api/data/metadatablocks/codemeta.tsv
new file mode 100644
index 00000000000..9f32cf20299
--- /dev/null
+++ b/scripts/api/data/metadatablocks/codemeta.tsv
@@ -0,0 +1,40 @@
+#metadataBlock	name	dataverseAlias	displayName	blockURI												
+	codeMeta20		Software Metadata (v2.0)	https://codemeta.github.io/terms/												
+#datasetField	name	title	description	watermark	fieldType	displayOrder	displayFormat	advancedSearchField	allowControlledVocabulary	allowmultiples	facetable	displayoncreate	required	parent	metadatablock_id	termURI
+	softwareVersion	Software version	Version of the software instance.	major.minor, e.g. 1.3	text	0	v#VALUE	TRUE	FALSE	FALSE	TRUE	TRUE	FALSE		codeMeta20	https://schema.org/softwareVersion
+	developmentStatus	Development Status	Description of development status, e.g. work in progress (wip), active, inactive, suspended. See repostatus.org for more information.		text	1	<a href='https://www.repostatus.org/##VALUE'><img src='https://www.repostatus.org/badges/latest/#VALUE.svg' alt='#VALUE '/></a>	TRUE	TRUE	FALSE	TRUE	FALSE	FALSE		codeMeta20	
+	codeRepository	Code Repository	Link to the repository where the un-compiled, human readable code and related code is located (SVN, GitHub, CodePlex, institutional GitLab instance, etc.).	https://...	url	2	<a href="#VALUE" target="_blank" rel="noopener">#VALUE</a>	TRUE	FALSE	TRUE	FALSE	TRUE	FALSE		codeMeta20	https://schema.org/codeRepository
+	programmingLanguage	Programming Language	The programming language(s) used to implement the software (e.g. Python, C++, Matlab, Fortran, Java, Julia,...)		text	3	#VALUE	TRUE	FALSE	TRUE	TRUE	TRUE	FALSE		codeMeta20	https://schema.org/programmingLanguage
+	operatingSystem	Operating Systems	Operating systems supported (e.g. Windows 10, OSX 11.3, Android 11).			4		FALSE	FALSE	TRUE	FALSE	TRUE	FALSE		codeMeta20	https://schema.org/operatingSystem
+	operatingSystemName	Name	The supported operating systems name	Windows, Mac OS X, Linux, Android, ...	text	5	#VALUE	TRUE	FALSE	FALSE	TRUE	TRUE	FALSE	operatingSystem	codeMeta20	
+	operatingSystemVersion	Version	The supported operating systems version		text	6	#VALUE	TRUE	FALSE	FALSE	TRUE	TRUE	FALSE	operatingSystem	codeMeta20	
+	applicationCategory	Application Category	Type of	software application, e.g. Simulation, Analysis, Visualisation.	text		#VALUE	TRUE	FALSE	TRUE	TRUE	TRUE	FALSE		codeMeta20	https://schema.org/applicationCategory
+	applicationSubCategory	Application Subcategory	Subcategory of the application, e.g. Arcade Game.		text		#VALUE	TRUE	FALSE	TRUE	TRUE	FALSE	FALSE		codeMeta20	https://schema.org/applicationSubCategory
+	softwareHelp	Software Help/Documentation	Link to help texts or documentation	https://...	url		<a href="#VALUE" target="_blank" rel="noopener">#VALUE</a>	FALSE	FALSE	TRUE	FALSE	TRUE	FALSE		codeMeta20	https://schema.org/softwareHelp
+	buildInstructions	Build instructions	Link to installation instructions/documentation	https://...	url		<a href="#VALUE" target="_blank" rel="noopener">#VALUE</a>	FALSE	FALSE	FALSE	TRUE	FALSE	FALSE		codeMeta20	
+	runtimePlatform	Runtime Platform	Runtime platform or script interpreter dependencies (Example - Java v1, Python2.3, .Net Framework 3.0). Supersedes runtime.		text		#VALUE	TRUE	FALSE	TRUE	TRUE	FALSE	FALSE		codeMeta20	https://schema.org/runtimePlatform
+	targetProduct	Target Product	Target Operating System / Product to which the code applies. If applies to several versions, just the product name can be used.		text		#VALUE	FALSE	FALSE	TRUE	TRUE	FALSE	FALSE		codeMeta20	https://schema.org/targetProduct
+	softwareRequirements	Software Requirements	Required software dependencies		none			FALSE	FALSE	TRUE	FALSE	FALSE	FALSE		codeMeta20	https://schema.org/softwareRequirements
+	softwareRequirementsName	Name	Name or title of the required software/library		text		#VALUE		FALSE	FALSE	FALSE	FALSE	FALSE	softwareRequirements	codeMeta20	
+	softwareRequirementsVersion	Version	Version of the required software/library		text		#VALUE		FALSE	FALSE	FALSE	FALSE	FALSE	softwareRequirements	codeMeta20	
+	softwareRequirementsUrl	URL	Link to required software/library	https://...	url		<a href="#VALUE" target="_blank" rel="noopener">#VALUE</a>		FALSE	FALSE	FALSE	FALSE	FALSE	softwareRequirements	codeMeta20	
+	softwareSuggestions	Software Suggestions	Optional dependencies, e.g. for optional features, code development, etc.		none			FALSE	FALSE	TRUE	FALSE	FALSE	FALSE		codeMeta20	
+	softwareSuggestionsName	Name	Name or title of the optional software/library		text		#VALUE		FALSE	FALSE	FALSE	FALSE	FALSE	softwareSuggestions	codeMeta20	
+	softwareSuggestionsVersion	Version	Version of the optional software/library		text		#VALUE		FALSE	FALSE	FALSE	FALSE	FALSE	softwareSuggestions	codeMeta20	
+	softwareSuggestionsUrl	URL	Link to optional software/library	https://...	url		<a href="#VALUE" target="_blank" rel="noopener">#VALUE</a>		FALSE	FALSE	FALSE	FALSE	FALSE	softwareSuggestions	codeMeta20	
+	permissions	Permissions	Permission(s) required to run the code (for example, a mobile app may require full internet access or may run only on wifi).		text		#VALUE	FALSE	FALSE	TRUE	FALSE	FALSE	FALSE		codeMeta20	https://schema.org/permissions
+	memoryRequirements	Memory Requirements	Minimum memory requirements.		text		#VALUE	FALSE	FALSE	TRUE	FALSE	FALSE	FALSE		codeMeta20	https://schema.org/memoryRequirements
+	processorRequirements	Processor Requirements	Processor architecture required to run the application (e.g. IA64).		text		#VALUE	FALSE	FALSE	TRUE	FALSE	FALSE	FALSE		codeMeta20	https://schema.org/processorRequirements
+	storageRequirements	Storage Requirements	Storage requirements (e.g. free space required).		text		#VALUE	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE		codeMeta20	https://schema.org/storageRequirements
+	releaseNotes	Release Notes	Link to release notes	https://...	url		<a href="#VALUE" target="_blank" rel="noopener">#VALUE</a>	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE		codeMeta20	https://schema.org/releaseNotes
+	contIntegration	Continous integration	Link to continuous integration service	https://...	url		<a href="#VALUE" target="_blank" rel="noopener">#VALUE</a>	FALSE	FALSE	TRUE	FALSE	FALSE	FALSE		codeMeta20	
+	issueTracker	Issue Tracker	Link to software bug reporting or issue tracking system	https://...	url		<a href="#VALUE" target="_blank" rel="noopener">#VALUE</a>	FALSE	FALSE	FALSE	TRUE	FALSE	FALSE		codeMeta20	
+#controlledVocabulary	DatasetField	Value	identifier	displayOrder												
+	developmentStatus	Concept	concept	0												
+	developmentStatus	WIP	wip	1												
+	developmentStatus	Active	active	2												
+	developmentStatus	Inactive	inactive	3												
+	developmentStatus	Unsupported	unsupported	4												
+	developmentStatus	Moved	moved	5												
+	developmentStatus	Suspended	suspended	6												
+	developmentStatus	Abandoned	abandoned	7												
\ No newline at end of file

From f9f9cbda095f0c72ce54ad020933d005c1d9d1ee Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Mon, 17 May 2021 15:34:46 +0200
Subject: [PATCH 002/173] docs(metadata): add CodeMeta reference to user guide

---
 doc/sphinx-guides/source/user/appendix.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/sphinx-guides/source/user/appendix.rst b/doc/sphinx-guides/source/user/appendix.rst
index 003f02cdd61..e0fa83ad2ca 100755
--- a/doc/sphinx-guides/source/user/appendix.rst
+++ b/doc/sphinx-guides/source/user/appendix.rst
@@ -26,5 +26,6 @@ Detailed below are what metadata schemas we support for Citation and Domain Spec
   `Virtual Observatory (VO) Discovery and Provenance Metadata <http://perma.cc/H5ZJ-4KKY>`__ (`see .tsv version <https://github.com/IQSS/dataverse/blob/master/scripts/api/data/metadatablocks/astrophysics.tsv>`__).
 - `Life Sciences Metadata <https://docs.google.com/spreadsheet/ccc?key=0AjeLxEN77UZodHFEWGpoa19ia3pldEFyVFR0aFVGa0E#gid=2>`__: based on `ISA-Tab Specification <https://isa-specs.readthedocs.io/en/latest/isamodel.html>`__, along with controlled vocabulary from subsets of the `OBI Ontology <http://bioportal.bioontology.org/ontologies/OBI>`__ and the `NCBI Taxonomy for Organisms <http://www.ncbi.nlm.nih.gov/Taxonomy/taxonomyhome.html/>`__ (`see .tsv version <https://github.com/IQSS/dataverse/blob/master/scripts/api/data/metadatablocks/biomedical.tsv>`__).
 - `Journal Metadata <https://docs.google.com/spreadsheets/d/13HP-jI_cwLDHBetn9UKTREPJ_F4iHdAvhjmlvmYdSSw/edit#gid=8>`__: based on the `Journal Archiving and Interchange Tag Set, version 1.2 <https://jats.nlm.nih.gov/archiving/tag-library/1.2/chapter/how-to-read.html>`__ (`see .tsv version <https://github.com/IQSS/dataverse/blob/master/scripts/api/data/metadatablocks/journals.tsv>`__).
+- `CodeMeta Software Metadata <https://docs.google.com/spreadsheets/d/e/2PACX-1vTE-aSW0J7UQ0prYq8rP_P_AWVtqhyv46aJu9uPszpa9_UuOWRsyFjbWFDnCd7us7PSIpW7Qg2KwZ8v/pub>`__: based on the `CodeMeta Software Metadata Schema, version 2.0 <https://codemeta.github.io/terms/>`__ (`see .tsv version <https://github.com/IQSS/dataverse/blob/master/scripts/api/data/metadatablocks/codemeta.tsv>`__)
 
 See also the `Dataverse Software 4.0 Metadata Crosswalk: DDI, DataCite, DC, DCTerms, VO, ISA-Tab <https://docs.google.com/spreadsheets/d/10Luzti7svVTVKTA-px27oq3RxCUM-QbiTkm8iMd5C54/edit?usp=sharing>`__ document and the :doc:`/admin/metadatacustomization` section of the Admin Guide.

From ed485df14d3761fb3fca4e0ea0bfa2d20ed2f332 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Mon, 17 May 2021 15:35:25 +0200
Subject: [PATCH 003/173] feat(metadata): load CodeMeta by default in new
 installations.

---
 scripts/api/setup-datasetfields.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/api/setup-datasetfields.sh b/scripts/api/setup-datasetfields.sh
index 0d2d60b9538..741a439e542 100755
--- a/scripts/api/setup-datasetfields.sh
+++ b/scripts/api/setup-datasetfields.sh
@@ -7,3 +7,4 @@ curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @da
 curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/astrophysics.tsv -H "Content-type: text/tab-separated-values"
 curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/biomedical.tsv -H "Content-type: text/tab-separated-values"
 curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/journals.tsv -H "Content-type: text/tab-separated-values"
+curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/codemeta.tsv -H "Content-type: text/tab-separated-values"

From 3c497a1475e864103c0583bea861e5e9376e23d1 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Tue, 18 May 2021 13:08:58 +0200
Subject: [PATCH 004/173] fix(metadata): fix wrong tab in CodeMeta and rephrase
 softwareVersion watermark helptext #7844

---
 scripts/api/data/metadatablocks/codemeta.tsv | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/api/data/metadatablocks/codemeta.tsv b/scripts/api/data/metadatablocks/codemeta.tsv
index 9f32cf20299..c2711bca3ed 100644
--- a/scripts/api/data/metadatablocks/codemeta.tsv
+++ b/scripts/api/data/metadatablocks/codemeta.tsv
@@ -1,14 +1,14 @@
 #metadataBlock	name	dataverseAlias	displayName	blockURI												
 	codeMeta20		Software Metadata (v2.0)	https://codemeta.github.io/terms/												
 #datasetField	name	title	description	watermark	fieldType	displayOrder	displayFormat	advancedSearchField	allowControlledVocabulary	allowmultiples	facetable	displayoncreate	required	parent	metadatablock_id	termURI
-	softwareVersion	Software version	Version of the software instance.	major.minor, e.g. 1.3	text	0	v#VALUE	TRUE	FALSE	FALSE	TRUE	TRUE	FALSE		codeMeta20	https://schema.org/softwareVersion
+	softwareVersion	Software version	Version of the software instance, usually following some convention like SemVer etc.	e.g. 0.2.1 or 1.3 or 2021.1 etc	text	0	v#VALUE	TRUE	FALSE	FALSE	TRUE	TRUE	FALSE		codeMeta20	https://schema.org/softwareVersion
 	developmentStatus	Development Status	Description of development status, e.g. work in progress (wip), active, inactive, suspended. See repostatus.org for more information.		text	1	<a href='https://www.repostatus.org/##VALUE'><img src='https://www.repostatus.org/badges/latest/#VALUE.svg' alt='#VALUE '/></a>	TRUE	TRUE	FALSE	TRUE	FALSE	FALSE		codeMeta20	
 	codeRepository	Code Repository	Link to the repository where the un-compiled, human readable code and related code is located (SVN, GitHub, CodePlex, institutional GitLab instance, etc.).	https://...	url	2	<a href="#VALUE" target="_blank" rel="noopener">#VALUE</a>	TRUE	FALSE	TRUE	FALSE	TRUE	FALSE		codeMeta20	https://schema.org/codeRepository
 	programmingLanguage	Programming Language	The programming language(s) used to implement the software (e.g. Python, C++, Matlab, Fortran, Java, Julia,...)		text	3	#VALUE	TRUE	FALSE	TRUE	TRUE	TRUE	FALSE		codeMeta20	https://schema.org/programmingLanguage
 	operatingSystem	Operating Systems	Operating systems supported (e.g. Windows 10, OSX 11.3, Android 11).			4		FALSE	FALSE	TRUE	FALSE	TRUE	FALSE		codeMeta20	https://schema.org/operatingSystem
 	operatingSystemName	Name	The supported operating systems name	Windows, Mac OS X, Linux, Android, ...	text	5	#VALUE	TRUE	FALSE	FALSE	TRUE	TRUE	FALSE	operatingSystem	codeMeta20	
 	operatingSystemVersion	Version	The supported operating systems version		text	6	#VALUE	TRUE	FALSE	FALSE	TRUE	TRUE	FALSE	operatingSystem	codeMeta20	
-	applicationCategory	Application Category	Type of	software application, e.g. Simulation, Analysis, Visualisation.	text		#VALUE	TRUE	FALSE	TRUE	TRUE	TRUE	FALSE		codeMeta20	https://schema.org/applicationCategory
+	applicationCategory	Application Category	Type of software application, e.g. Simulation, Analysis, Visualisation.	""	text		#VALUE	TRUE	FALSE	TRUE	TRUE	TRUE	FALSE		codeMeta20	https://schema.org/applicationCategory
 	applicationSubCategory	Application Subcategory	Subcategory of the application, e.g. Arcade Game.		text		#VALUE	TRUE	FALSE	TRUE	TRUE	FALSE	FALSE		codeMeta20	https://schema.org/applicationSubCategory
 	softwareHelp	Software Help/Documentation	Link to help texts or documentation	https://...	url		<a href="#VALUE" target="_blank" rel="noopener">#VALUE</a>	FALSE	FALSE	TRUE	FALSE	TRUE	FALSE		codeMeta20	https://schema.org/softwareHelp
 	buildInstructions	Build instructions	Link to installation instructions/documentation	https://...	url		<a href="#VALUE" target="_blank" rel="noopener">#VALUE</a>	FALSE	FALSE	FALSE	TRUE	FALSE	FALSE		codeMeta20	

From 492491e89ef13f6254511b172641e1669b485e17 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Tue, 18 May 2021 13:11:52 +0200
Subject: [PATCH 005/173] fix(metadata): add standard name to Codemeta MDB
 displayName. #7844

---
 scripts/api/data/metadatablocks/codemeta.tsv | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/api/data/metadatablocks/codemeta.tsv b/scripts/api/data/metadatablocks/codemeta.tsv
index c2711bca3ed..020cdcc11fc 100644
--- a/scripts/api/data/metadatablocks/codemeta.tsv
+++ b/scripts/api/data/metadatablocks/codemeta.tsv
@@ -1,5 +1,5 @@
 #metadataBlock	name	dataverseAlias	displayName	blockURI												
-	codeMeta20		Software Metadata (v2.0)	https://codemeta.github.io/terms/												
+	codeMeta20		Software Metadata (CodeMeta v2.0)	https://codemeta.github.io/terms/
 #datasetField	name	title	description	watermark	fieldType	displayOrder	displayFormat	advancedSearchField	allowControlledVocabulary	allowmultiples	facetable	displayoncreate	required	parent	metadatablock_id	termURI
 	softwareVersion	Software version	Version of the software instance, usually following some convention like SemVer etc.	e.g. 0.2.1 or 1.3 or 2021.1 etc	text	0	v#VALUE	TRUE	FALSE	FALSE	TRUE	TRUE	FALSE		codeMeta20	https://schema.org/softwareVersion
 	developmentStatus	Development Status	Description of development status, e.g. work in progress (wip), active, inactive, suspended. See repostatus.org for more information.		text	1	<a href='https://www.repostatus.org/##VALUE'><img src='https://www.repostatus.org/badges/latest/#VALUE.svg' alt='#VALUE '/></a>	TRUE	TRUE	FALSE	TRUE	FALSE	FALSE		codeMeta20	

From 382c1e4035752b3917cd9c967e55a8d41601a20d Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Thu, 21 Jul 2022 13:49:58 +0200
Subject: [PATCH 006/173] fix(metadata): make CodeMeta TSV usable #7844

- Add missing displayOrder values
- Fix missing type for software requirements
- Avoid splitting up compound fields too much,
  otherwise data is not exportable to schema.org
  or CodeMeta JSON-LD without special handling (#7856)
- Tweak order
- Tweak descriptions and examples
- Fix whitespaces and line endings
---
 scripts/api/data/metadatablocks/codemeta.tsv | 77 ++++++++++----------
 1 file changed, 37 insertions(+), 40 deletions(-)

diff --git a/scripts/api/data/metadatablocks/codemeta.tsv b/scripts/api/data/metadatablocks/codemeta.tsv
index 020cdcc11fc..029ca2355ec 100644
--- a/scripts/api/data/metadatablocks/codemeta.tsv
+++ b/scripts/api/data/metadatablocks/codemeta.tsv
@@ -1,40 +1,37 @@
-#metadataBlock	name	dataverseAlias	displayName	blockURI												
-	codeMeta20		Software Metadata (CodeMeta v2.0)	https://codemeta.github.io/terms/
-#datasetField	name	title	description	watermark	fieldType	displayOrder	displayFormat	advancedSearchField	allowControlledVocabulary	allowmultiples	facetable	displayoncreate	required	parent	metadatablock_id	termURI
-	softwareVersion	Software version	Version of the software instance, usually following some convention like SemVer etc.	e.g. 0.2.1 or 1.3 or 2021.1 etc	text	0	v#VALUE	TRUE	FALSE	FALSE	TRUE	TRUE	FALSE		codeMeta20	https://schema.org/softwareVersion
-	developmentStatus	Development Status	Description of development status, e.g. work in progress (wip), active, inactive, suspended. See repostatus.org for more information.		text	1	<a href='https://www.repostatus.org/##VALUE'><img src='https://www.repostatus.org/badges/latest/#VALUE.svg' alt='#VALUE '/></a>	TRUE	TRUE	FALSE	TRUE	FALSE	FALSE		codeMeta20	
-	codeRepository	Code Repository	Link to the repository where the un-compiled, human readable code and related code is located (SVN, GitHub, CodePlex, institutional GitLab instance, etc.).	https://...	url	2	<a href="#VALUE" target="_blank" rel="noopener">#VALUE</a>	TRUE	FALSE	TRUE	FALSE	TRUE	FALSE		codeMeta20	https://schema.org/codeRepository
-	programmingLanguage	Programming Language	The programming language(s) used to implement the software (e.g. Python, C++, Matlab, Fortran, Java, Julia,...)		text	3	#VALUE	TRUE	FALSE	TRUE	TRUE	TRUE	FALSE		codeMeta20	https://schema.org/programmingLanguage
-	operatingSystem	Operating Systems	Operating systems supported (e.g. Windows 10, OSX 11.3, Android 11).			4		FALSE	FALSE	TRUE	FALSE	TRUE	FALSE		codeMeta20	https://schema.org/operatingSystem
-	operatingSystemName	Name	The supported operating systems name	Windows, Mac OS X, Linux, Android, ...	text	5	#VALUE	TRUE	FALSE	FALSE	TRUE	TRUE	FALSE	operatingSystem	codeMeta20	
-	operatingSystemVersion	Version	The supported operating systems version		text	6	#VALUE	TRUE	FALSE	FALSE	TRUE	TRUE	FALSE	operatingSystem	codeMeta20	
-	applicationCategory	Application Category	Type of software application, e.g. Simulation, Analysis, Visualisation.	""	text		#VALUE	TRUE	FALSE	TRUE	TRUE	TRUE	FALSE		codeMeta20	https://schema.org/applicationCategory
-	applicationSubCategory	Application Subcategory	Subcategory of the application, e.g. Arcade Game.		text		#VALUE	TRUE	FALSE	TRUE	TRUE	FALSE	FALSE		codeMeta20	https://schema.org/applicationSubCategory
-	softwareHelp	Software Help/Documentation	Link to help texts or documentation	https://...	url		<a href="#VALUE" target="_blank" rel="noopener">#VALUE</a>	FALSE	FALSE	TRUE	FALSE	TRUE	FALSE		codeMeta20	https://schema.org/softwareHelp
-	buildInstructions	Build instructions	Link to installation instructions/documentation	https://...	url		<a href="#VALUE" target="_blank" rel="noopener">#VALUE</a>	FALSE	FALSE	FALSE	TRUE	FALSE	FALSE		codeMeta20	
-	runtimePlatform	Runtime Platform	Runtime platform or script interpreter dependencies (Example - Java v1, Python2.3, .Net Framework 3.0). Supersedes runtime.		text		#VALUE	TRUE	FALSE	TRUE	TRUE	FALSE	FALSE		codeMeta20	https://schema.org/runtimePlatform
-	targetProduct	Target Product	Target Operating System / Product to which the code applies. If applies to several versions, just the product name can be used.		text		#VALUE	FALSE	FALSE	TRUE	TRUE	FALSE	FALSE		codeMeta20	https://schema.org/targetProduct
-	softwareRequirements	Software Requirements	Required software dependencies		none			FALSE	FALSE	TRUE	FALSE	FALSE	FALSE		codeMeta20	https://schema.org/softwareRequirements
-	softwareRequirementsName	Name	Name or title of the required software/library		text		#VALUE		FALSE	FALSE	FALSE	FALSE	FALSE	softwareRequirements	codeMeta20	
-	softwareRequirementsVersion	Version	Version of the required software/library		text		#VALUE		FALSE	FALSE	FALSE	FALSE	FALSE	softwareRequirements	codeMeta20	
-	softwareRequirementsUrl	URL	Link to required software/library	https://...	url		<a href="#VALUE" target="_blank" rel="noopener">#VALUE</a>		FALSE	FALSE	FALSE	FALSE	FALSE	softwareRequirements	codeMeta20	
-	softwareSuggestions	Software Suggestions	Optional dependencies, e.g. for optional features, code development, etc.		none			FALSE	FALSE	TRUE	FALSE	FALSE	FALSE		codeMeta20	
-	softwareSuggestionsName	Name	Name or title of the optional software/library		text		#VALUE		FALSE	FALSE	FALSE	FALSE	FALSE	softwareSuggestions	codeMeta20	
-	softwareSuggestionsVersion	Version	Version of the optional software/library		text		#VALUE		FALSE	FALSE	FALSE	FALSE	FALSE	softwareSuggestions	codeMeta20	
-	softwareSuggestionsUrl	URL	Link to optional software/library	https://...	url		<a href="#VALUE" target="_blank" rel="noopener">#VALUE</a>		FALSE	FALSE	FALSE	FALSE	FALSE	softwareSuggestions	codeMeta20	
-	permissions	Permissions	Permission(s) required to run the code (for example, a mobile app may require full internet access or may run only on wifi).		text		#VALUE	FALSE	FALSE	TRUE	FALSE	FALSE	FALSE		codeMeta20	https://schema.org/permissions
-	memoryRequirements	Memory Requirements	Minimum memory requirements.		text		#VALUE	FALSE	FALSE	TRUE	FALSE	FALSE	FALSE		codeMeta20	https://schema.org/memoryRequirements
-	processorRequirements	Processor Requirements	Processor architecture required to run the application (e.g. IA64).		text		#VALUE	FALSE	FALSE	TRUE	FALSE	FALSE	FALSE		codeMeta20	https://schema.org/processorRequirements
-	storageRequirements	Storage Requirements	Storage requirements (e.g. free space required).		text		#VALUE	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE		codeMeta20	https://schema.org/storageRequirements
-	releaseNotes	Release Notes	Link to release notes	https://...	url		<a href="#VALUE" target="_blank" rel="noopener">#VALUE</a>	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE		codeMeta20	https://schema.org/releaseNotes
-	contIntegration	Continous integration	Link to continuous integration service	https://...	url		<a href="#VALUE" target="_blank" rel="noopener">#VALUE</a>	FALSE	FALSE	TRUE	FALSE	FALSE	FALSE		codeMeta20	
-	issueTracker	Issue Tracker	Link to software bug reporting or issue tracking system	https://...	url		<a href="#VALUE" target="_blank" rel="noopener">#VALUE</a>	FALSE	FALSE	FALSE	TRUE	FALSE	FALSE		codeMeta20	
-#controlledVocabulary	DatasetField	Value	identifier	displayOrder												
-	developmentStatus	Concept	concept	0												
-	developmentStatus	WIP	wip	1												
-	developmentStatus	Active	active	2												
-	developmentStatus	Inactive	inactive	3												
-	developmentStatus	Unsupported	unsupported	4												
-	developmentStatus	Moved	moved	5												
-	developmentStatus	Suspended	suspended	6												
-	developmentStatus	Abandoned	abandoned	7												
\ No newline at end of file
+#metadataBlock	name	dataverseAlias	displayName	blockURI												
+	codeMeta20		Software Metadata (CodeMeta v2.0)	https://codemeta.github.io/terms/
+#datasetField	name	title	description	watermark	fieldType	displayOrder	displayFormat	advancedSearchField	allowControlledVocabulary	allowmultiples	facetable	displayoncreate	required	parent	metadatablock_id	termURI
+	softwareVersion	Software Version	Version of the software instance, usually following some convention like SemVer etc.	e.g. 0.2.1 or 1.3 or 2021.1 etc	text	0	#VALUE	TRUE	FALSE	FALSE	TRUE	TRUE	FALSE		codeMeta20	https://schema.org/softwareVersion
+	developmentStatus	Development Status	Description of development status, e.g. work in progress (wip), active, etc. See repostatus.org for more information.		text	1	<a href='https://www.repostatus.org/##VALUE'><img src='https://www.repostatus.org/badges/latest/#VALUE.svg' alt='#VALUE '/></a>	TRUE	TRUE	FALSE	TRUE	FALSE	FALSE		codeMeta20	https://www.repostatus.org
+	codeRepository	Code Repository	Link to the repository where the un-compiled, human readable code and related code is located (SVN, GitHub, CodePlex, institutional GitLab instance, Gitea, etc.).	e.g. https://github.com/user/project	url	2	<a href="#VALUE" target="_blank" rel="noopener">#VALUE</a>	TRUE	FALSE	TRUE	FALSE	TRUE	FALSE		codeMeta20	https://schema.org/codeRepository
+	applicationCategory	Application Category	Type of software application, e.g. Simulation, Analysis, Visualisation.		text	3	#VALUE	TRUE	FALSE	TRUE	TRUE	TRUE	FALSE		codeMeta20	https://schema.org/applicationCategory
+	applicationSubCategory	Application Subcategory	Subcategory of the application, e.g. Arcade Game.		text	4	#VALUE	TRUE	FALSE	TRUE	TRUE	FALSE	FALSE		codeMeta20	https://schema.org/applicationSubCategory
+	programmingLanguage	Programming Language	The programming language(s) used to implement the software (e.g. Python, C++, Matlab, Fortran, Java, Julia,...)		text	5	#VALUE	TRUE	FALSE	TRUE	TRUE	TRUE	FALSE		codeMeta20	https://schema.org/programmingLanguage
+	runtimePlatform	Runtime Platform	Runtime platform or script interpreter dependencies (e.g. Java 11, Python 3.10 or .Net Framework 4.8).	e.g. Python 3.10	text	6	#VALUE	TRUE	FALSE	TRUE	TRUE	FALSE	FALSE		codeMeta20	https://schema.org/runtimePlatform
+	operatingSystem	Operating Systems	Operating systems supported (e.g. Windows 10, OSX 11.3, Android 11).		text	7	#VALUE	TRUE	FALSE	TRUE	TRUE	TRUE	FALSE		codeMeta20	https://schema.org/operatingSystem
+	targetProduct	Target Product	Target Operating System / Product to which the code applies. If applies to several versions, just the product name can be used.		text	8	#VALUE	TRUE	FALSE	TRUE	TRUE	FALSE	FALSE		codeMeta20	https://schema.org/targetProduct
+	buildInstructions	Build Instructions	Link to installation instructions/documentation	e.g. https://github.com/user/project/blob/main/BUILD.md	url	9	<a href="#VALUE" target="_blank" rel="noopener">#VALUE</a>	FALSE	FALSE	TRUE	FALSE	FALSE	FALSE		codeMeta20	https://codemeta.github.io/terms/buildInstructions
+	softwareRequirementsItem	Software Requirements	Required software dependencies		none	10		FALSE	FALSE	TRUE	FALSE	TRUE	FALSE		codeMeta20
+	softwareRequirements	Name & Version	Name and version of the required software/library dependency	e.g. Pandas 1.4.3	text	0	#VALUE	TRUE	FALSE	FALSE	FALSE	TRUE	FALSE	softwareRequirementsItem	codeMeta20	https://schema.org/softwareRequirements
+	softwareRequirementsInfoUrl	Info URL	Link to required software/library homepage or documentation (ideally also versioned)	e.g. https://pandas.pydata.org/pandas-docs/version/1.4.3	url	1	<a href="#VALUE" target="_blank" rel="noopener">#VALUE</a>	FALSE	FALSE	FALSE	FALSE	TRUE	FALSE	softwareRequirementsItem	codeMeta20	https://dataverse.org/schema/codeMeta20/softwareRequirementsInfoUrl
+	softwareSuggestionsItem	Software Suggestions	Optional dependencies, e.g. for optional features, code development, etc.		none	11		FALSE	FALSE	TRUE	FALSE	FALSE	FALSE		codeMeta20
+	softwareSuggestions	Name & Version	Name and version of the optional software/library dependency	e.g. Sphinx 5.0.2	text	0	#VALUE	TRUE	FALSE	FALSE	TRUE	FALSE	FALSE	softwareSuggestionsItem	codeMeta20	https://codemeta.github.io/terms/softwareSuggestions
+	softwareSuggestionsInfoUrl	Info URL	Link to optional software/library homepage or documentation (ideally also versioned)	e.g. https://www.sphinx-doc.org	url	1	<a href="#VALUE" target="_blank" rel="noopener">#VALUE</a>	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	softwareSuggestionsItem	codeMeta20	https://dataverse.org/schema/codeMeta20/softwareSuggestionsInfoUrl
+	memoryRequirements	Memory Requirements	Minimum memory requirements.		text	12	#VALUE	TRUE	FALSE	FALSE	FALSE	FALSE	FALSE		codeMeta20	https://schema.org/memoryRequirements
+	processorRequirements	Processor Requirements	Processor architecture required to run the application (e.g. IA64).		text	13	#VALUE	TRUE	FALSE	TRUE	FALSE	FALSE	FALSE		codeMeta20	https://schema.org/processorRequirements
+	storageRequirements	Storage Requirements	Storage requirements (e.g. free space required).		text	14	#VALUE	TRUE	FALSE	FALSE	FALSE	FALSE	FALSE		codeMeta20	https://schema.org/storageRequirements
+	permissions	Permissions	Permission(s) required to run the code (for example, a mobile app may require full internet access or may run only on wifi).		text	15	#VALUE	TRUE	FALSE	TRUE	FALSE	FALSE	FALSE		codeMeta20	https://schema.org/permissions
+	softwareHelp	Software Help/Documentation	Link to help texts or documentation	e.g. https://user.github.io/project/docs	url	16	<a href="#VALUE" target="_blank" rel="noopener">#VALUE</a>	FALSE	FALSE	TRUE	FALSE	TRUE	FALSE		codeMeta20	https://schema.org/softwareHelp
+	readme	Readme	Link to the README of the project	e.g. https://github.com/user/project/blob/main/README.md	url	17	<a href="#VALUE" target="_blank" rel="noopener">#VALUE</a>	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE		codeMeta20	https://codemeta.github.io/terms/readme
+	releaseNotes	Release Notes	Link to release notes	e.g. https://github.com/user/project/blob/main/docs/release-0.1.md	url	18	<a href="#VALUE" target="_blank" rel="noopener">#VALUE</a>	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE		codeMeta20	https://schema.org/releaseNotes
+	contIntegration	Continuous Integration	Link to continuous integration service	e.g. https://github.com/user/project/actions	url	19	<a href="#VALUE" target="_blank" rel="noopener">#VALUE</a>	FALSE	FALSE	TRUE	FALSE	FALSE	FALSE		codeMeta20	https://codemeta.github.io/terms/contIntegration
+	issueTracker	Issue Tracker	Link to software bug reporting or issue tracking system	e.g. https://github.com/user/project/issues	url	20	<a href="#VALUE" target="_blank" rel="noopener">#VALUE</a>	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE		codeMeta20	https://codemeta.github.io/terms/issueTracker
+#controlledVocabulary	DatasetField	Value	identifier	displayOrder												
+	developmentStatus	Concept	concept	0												
+	developmentStatus	WIP	wip	1												
+	developmentStatus	Active	active	2												
+	developmentStatus	Inactive	inactive	3												
+	developmentStatus	Unsupported	unsupported	4												
+	developmentStatus	Moved	moved	5												
+	developmentStatus	Suspended	suspended	6												
+	developmentStatus	Abandoned	abandoned	7

From 1e8567d2ad343547d39c3df3d32e4a1d81229d6e Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Fri, 22 Jul 2022 10:01:45 +0200
Subject: [PATCH 007/173] feat(metadata): add i18n properties for CodeMeta
 #7844

---
 .../java/propertyFiles/codeMeta20.properties  | 85 +++++++++++++++++++
 1 file changed, 85 insertions(+)
 create mode 100644 src/main/java/propertyFiles/codeMeta20.properties

diff --git a/src/main/java/propertyFiles/codeMeta20.properties b/src/main/java/propertyFiles/codeMeta20.properties
new file mode 100644
index 00000000000..e203c1e46e9
--- /dev/null
+++ b/src/main/java/propertyFiles/codeMeta20.properties
@@ -0,0 +1,85 @@
+metadatablock.name=codeMeta20
+metadatablock.displayName=Software Metadata (CodeMeta 2.0)
+datasetfieldtype.softwareVersion.title=Software Version
+datasetfieldtype.softwareVersion.description=Version of the software instance, usually following some convention like SemVer etc.
+datasetfieldtype.softwareVersion.watermark=e.g. 0.2.1 or 1.3 or 2021.1 etc
+datasetfieldtype.developmentStatus.title=Development Status
+datasetfieldtype.developmentStatus.description=Description of development status, e.g. work in progress (wip), active, etc. See repostatus.org for more information.
+datasetfieldtype.developmentStatus.watermark=                                                                                                                       Development Status
+datasetfieldtype.codeRepository.title=Code Repository
+datasetfieldtype.codeRepository.description=Link to the repository where the un-compiled, human readable code and related code is located (SVN, GitHub, CodePlex, institutional GitLab instance, Gitea, etc.).
+datasetfieldtype.codeRepository.watermark=e.g. https://github.com/user/project
+datasetfieldtype.applicationCategory.title=	Application Category
+datasetfieldtype.applicationCategory.description=Type of software application, e.g. Simulation, Analysis, Visualisation.
+datasetfieldtype.applicationCategory.watermark=
+datasetfieldtype.applicationSubCategory.title=Application Subcategory
+datasetfieldtype.applicationSubCategory.description=Subcategory of the application, e.g. Arcade Game.
+datasetfieldtype.applicationSubCategory.watermark=
+datasetfieldtype.programmingLanguage.title=Programming Language
+datasetfieldtype.programmingLanguage.description=The programming language(s) used to implement the software (e.g. Python, C++, Matlab, Fortran, Java, Julia,...)
+datasetfieldtype.programmingLanguage.watermark=
+datasetfieldtype.runtimePlatform.title=Runtime Platform
+datasetfieldtype.runtimePlatform.description=Runtime platform or script interpreter dependencies (e.g. Java 11, Python 3.10 or .Net Framework 4.8).
+datasetfieldtype.runtimePlatform.watermark=e.g. Python 3.10
+datasetfieldtype.operatingSystem.title=Operating Systems
+datasetfieldtype.operatingSystem.description=Operating systems supported (e.g. Windows 10, OSX 11.3, Android 11).
+datasetfieldtype.operatingSystem.watermark=
+datasetfieldtype.targetProduct.title=Target Product
+datasetfieldtype.targetProduct.description=Target Operating System / Product to which the code applies. If applies to several versions, just the product name can be used.
+datasetfieldtype.targetProduct.watermark=
+datasetfieldtype.buildInstructions.title=Build Instructions
+datasetfieldtype.buildInstructions.description=Link to installation instructions/documentation
+datasetfieldtype.buildInstructions.watermark=e.g. https://github.com/user/project/blob/main/BUILD.md
+datasetfieldtype.softwareRequirementsItem.title=Software Requirements
+datasetfieldtype.softwareRequirementsItem.description=Required software dependencies
+datasetfieldtype.softwareRequirementsItem.watermark=
+datasetfieldtype.softwareRequirements.title=Name & Version
+datasetfieldtype.softwareRequirements.description=Name and version of the required software/library dependency
+datasetfieldtype.softwareRequirements.watermark=e.g. Pandas 1.4.3
+datasetfieldtype.softwareRequirementsInfoUrl.title=Info URL
+datasetfieldtype.softwareRequirementsInfoUrl.description=Link to required software/library homepage or documentation (ideally also versioned)
+datasetfieldtype.softwareRequirementsInfoUrl.watermark=e.g. https://pandas.pydata.org/pandas-docs/version/1.4.3
+datasetfieldtype.softwareSuggestionsItem.title=Software Suggestions
+datasetfieldtype.softwareSuggestionsItem.description=Optional dependencies, e.g. for optional features, code development, etc.
+datasetfieldtype.softwareSuggestionsItem.watermark=
+datasetfieldtype.softwareSuggestions.title=Name & Version
+datasetfieldtype.softwareSuggestions.description=Name and version of the optional software/library dependency
+datasetfieldtype.softwareSuggestions.watermark=e.g. Sphinx 5.0.2
+datasetfieldtype.softwareSuggestionsInfoUrl.title=Info URL
+datasetfieldtype.softwareSuggestionsInfoUrl.description=Link to optional software/library homepage or documentation (ideally also versioned)
+datasetfieldtype.softwareSuggestionsInfoUrl.watermark=e.g. https://www.sphinx-doc.org
+datasetfieldtype.memoryRequirements.title=Memory Requirements
+datasetfieldtype.memoryRequirements.description=Minimum memory requirements.
+datasetfieldtype.memoryRequirements.watermark=
+datasetfieldtype.processorRequirements.title=Processor Requirements
+datasetfieldtype.processorRequirements.description=Processor architecture required to run the application (e.g. IA64).
+datasetfieldtype.processorRequirements.watermark=
+datasetfieldtype.storageRequirements.title=Storage Requirements
+datasetfieldtype.storageRequirements.description=Storage requirements (e.g. free space required).
+datasetfieldtype.storageRequirements.watermark=
+datasetfieldtype.permissions.title=Permissions
+datasetfieldtype.permissions.description=Permission(s) required to run the code (for example, a mobile app may require full internet access or may run only on wifi).
+datasetfieldtype.permissions.watermark=
+datasetfieldtype.softwareHelp.title=Software Help/Documentation
+datasetfieldtype.softwareHelp.description=Link to help texts or documentation
+datasetfieldtype.softwareHelp.watermark=e.g. https://user.github.io/project/docs
+datasetfieldtype.readme.title=Readme
+datasetfieldtype.readme.description=Link to the README of the project
+datasetfieldtype.readme.watermark=e.g. https://github.com/user/project/blob/main/README.md
+datasetfieldtype.releaseNotes.title=Release Notes
+datasetfieldtype.releaseNotes.description=Link to release notes
+datasetfieldtype.releaseNotes.watermark=e.g. https://github.com/user/project/blob/main/docs/release-0.1.md
+datasetfieldtype.contIntegration.title=Continuous Integration
+datasetfieldtype.contIntegration.description=Link to continuous integration service
+datasetfieldtype.contIntegration.watermark=e.g. https://github.com/user/project/actions
+datasetfieldtype.issueTracker.title=Issue Tracker
+datasetfieldtype.issueTracker.description=Link to software bug reporting or issue tracking system
+datasetfieldtype.issueTracker.watermark=e.g. https://github.com/user/project/issues
+controlledvocabulary.developmentStatus.concept=Concept
+controlledvocabulary.developmentStatus.wip=WIP
+controlledvocabulary.developmentStatus.active=Active
+controlledvocabulary.developmentStatus.inactive=Inactive
+controlledvocabulary.developmentStatus.unsupported=Unsupported
+controlledvocabulary.developmentStatus.moved=Moved
+controlledvocabulary.developmentStatus.suspended=Suspended
+controlledvocabulary.developmentStatus.abandoned=Abandoned

From c6c669c0ebbb8a3ef161ef48b39391a0ee7064a9 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Mon, 1 Aug 2022 12:38:14 +0200
Subject: [PATCH 008/173] refactor(metadata): move CodeMeta to experimental
 #7844

With the merge of computational workflow metadata considered
experimental, move CodeMeta there, too.
---
 doc/sphinx-guides/source/user/appendix.rst | 2 +-
 scripts/api/setup-datasetfields.sh         | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/doc/sphinx-guides/source/user/appendix.rst b/doc/sphinx-guides/source/user/appendix.rst
index fe8dc580f1e..d6009edc9c9 100755
--- a/doc/sphinx-guides/source/user/appendix.rst
+++ b/doc/sphinx-guides/source/user/appendix.rst
@@ -30,13 +30,13 @@ Detailed below are what metadata schemas we support for Citation and Domain Spec
   `Virtual Observatory (VO) Discovery and Provenance Metadata <http://perma.cc/H5ZJ-4KKY>`__ (`see .tsv version <https://github.com/IQSS/dataverse/blob/master/scripts/api/data/metadatablocks/astrophysics.tsv>`__).
 - `Life Sciences Metadata <https://docs.google.com/spreadsheet/ccc?key=0AjeLxEN77UZodHFEWGpoa19ia3pldEFyVFR0aFVGa0E#gid=2>`__ (`see .tsv version <https://github.com/IQSS/dataverse/blob/master/scripts/api/data/metadatablocks/biomedical.tsv>`__): based on `ISA-Tab Specification <https://isa-specs.readthedocs.io/en/latest/isamodel.html>`__, along with controlled vocabulary from subsets of the `OBI Ontology <http://bioportal.bioontology.org/ontologies/OBI>`__ and the `NCBI Taxonomy for Organisms <http://www.ncbi.nlm.nih.gov/Taxonomy/taxonomyhome.html/>`__.
 - `Journal Metadata <https://docs.google.com/spreadsheets/d/13HP-jI_cwLDHBetn9UKTREPJ_F4iHdAvhjmlvmYdSSw/edit#gid=8>`__ (`see .tsv version <https://github.com/IQSS/dataverse/blob/master/scripts/api/data/metadatablocks/journals.tsv>`__): based on the `Journal Archiving and Interchange Tag Set, version 1.2 <https://jats.nlm.nih.gov/archiving/tag-library/1.2/chapter/how-to-read.html>`__.
-- `CodeMeta Software Metadata <https://docs.google.com/spreadsheets/d/e/2PACX-1vTE-aSW0J7UQ0prYq8rP_P_AWVtqhyv46aJu9uPszpa9_UuOWRsyFjbWFDnCd7us7PSIpW7Qg2KwZ8v/pub>`__: based on the `CodeMeta Software Metadata Schema, version 2.0 <https://codemeta.github.io/terms/>`__ (`see .tsv version <https://github.com/IQSS/dataverse/blob/master/scripts/api/data/metadatablocks/codemeta.tsv>`__)
 
 Experimental Metadata
 ~~~~~~~~~~~~~~~~~~~~~
 
 Unlike supported metadata, experimental metadata is not enabled by default in a new Dataverse installation. Feedback via any `channel <https://dataverse.org/contact>`_ is welcome!
 
+- `CodeMeta Software Metadata <https://docs.google.com/spreadsheets/d/e/2PACX-1vTE-aSW0J7UQ0prYq8rP_P_AWVtqhyv46aJu9uPszpa9_UuOWRsyFjbWFDnCd7us7PSIpW7Qg2KwZ8v/pub>`__: based on the `CodeMeta Software Metadata Schema, version 2.0 <https://codemeta.github.io/terms/>`__ (`see .tsv version <https://github.com/IQSS/dataverse/blob/master/scripts/api/data/metadatablocks/codemeta.tsv>`__)
 - `Computational Workflow Metadata <https://docs.google.com/spreadsheets/d/13HP-jI_cwLDHBetn9UKTREPJ_F4iHdAvhjmlvmYdSSw/edit#gid=447508596>`__ (`see .tsv version <https://github.com/IQSS/dataverse/blob/master/scripts/api/data/metadatablocks/computationalworkflow.tsv>`__): adapted from `Bioschemas Computational Workflow Profile, version 1.0 <https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE>`__ and `Codemeta <https://codemeta.github.io/terms/>`__.
 
 See Also
diff --git a/scripts/api/setup-datasetfields.sh b/scripts/api/setup-datasetfields.sh
index 741a439e542..0d2d60b9538 100755
--- a/scripts/api/setup-datasetfields.sh
+++ b/scripts/api/setup-datasetfields.sh
@@ -7,4 +7,3 @@ curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @da
 curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/astrophysics.tsv -H "Content-type: text/tab-separated-values"
 curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/biomedical.tsv -H "Content-type: text/tab-separated-values"
 curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/journals.tsv -H "Content-type: text/tab-separated-values"
-curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/codemeta.tsv -H "Content-type: text/tab-separated-values"

From 727ef9be9bd948f7c9b39855fa2648655439371b Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Wed, 7 Sep 2022 17:36:04 -0400
Subject: [PATCH 009/173] fix progress during hash calc

---
 src/main/webapp/resources/js/fileupload.js | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/main/webapp/resources/js/fileupload.js b/src/main/webapp/resources/js/fileupload.js
index 564239ee7ef..a478235c09f 100644
--- a/src/main/webapp/resources/js/fileupload.js
+++ b/src/main/webapp/resources/js/fileupload.js
@@ -144,6 +144,7 @@ var fileUpload = class fileUploadClass {
         async doUpload() {
                 this.state = UploadState.UPLOADING;
                 var thisFile = curFile-1;
+                this.id=thisFile;
                 //This appears to be the earliest point when the file table has been populated, and, since we don't know how many table entries have had ids added already, we check
                 var filerows = $('.ui-fileupload-files .ui-fileupload-row');
                 //Add an id attribute to each entry so we can later match progress and errors with the right entry
@@ -318,7 +319,7 @@ var fileUpload = class fileUploadClass {
                 if (directUploadReport) {
                         getMD5(this.file, prog => {
                                 var current = 1 + prog;
-                                $('progress').attr({
+                                $('[upid="' + this.id + '"] progress').attr({
                                         value: current,
                                         max: 2
                                 });

From d35e1c3ea8e149854598053f9dd0dd35bc9b132b Mon Sep 17 00:00:00 2001
From: Jim Myers <myersjd@umich.edu>
Date: Mon, 12 Sep 2022 09:04:11 -0400
Subject: [PATCH 010/173] Trigger auto-analyze more frequently for guestbook
 estimates

---
 .../db/migration/V5.12.0.1__8840-improve-guestbook-estimates.sql | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 src/main/resources/db/migration/V5.12.0.1__8840-improve-guestbook-estimates.sql

diff --git a/src/main/resources/db/migration/V5.12.0.1__8840-improve-guestbook-estimates.sql b/src/main/resources/db/migration/V5.12.0.1__8840-improve-guestbook-estimates.sql
new file mode 100644
index 00000000000..91ab5253f9c
--- /dev/null
+++ b/src/main/resources/db/migration/V5.12.0.1__8840-improve-guestbook-estimates.sql
@@ -0,0 +1 @@
+ALTER TABLE guestbookresponse SET (autovacuum_analyze_scale_factor = 0.01);
\ No newline at end of file

From 11ff8d7a7a75c9566935e6b2581bb88cdad0529e Mon Sep 17 00:00:00 2001
From: Jim Myers <myersjd@umich.edu>
Date: Mon, 12 Sep 2022 09:29:19 -0400
Subject: [PATCH 011/173] rel note

---
 doc/release-notes/8840-improved-download-estimate.md | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 doc/release-notes/8840-improved-download-estimate.md

diff --git a/doc/release-notes/8840-improved-download-estimate.md b/doc/release-notes/8840-improved-download-estimate.md
new file mode 100644
index 00000000000..cb264b7e683
--- /dev/null
+++ b/doc/release-notes/8840-improved-download-estimate.md
@@ -0,0 +1 @@
+To improve performance, Dataverse estimates download counts. This release includes an update that makes the estimate more accurate.
\ No newline at end of file

From a72e88ced9f09e0c8897e5707598252f2d54184c Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Mon, 20 Jun 2022 17:46:47 +0200
Subject: [PATCH 012/173] refactor(settings): simplify SystemConfig.getVersion
 #7000

Instead of trying to read a built time file from Maven,
use MicroProfile Config to retrieve the version and build number.

The version is by default set via microprofile-config.properties
(or overridden by an env var in a container).

The build number is still read from either BuildNumber.properties
or, if not present, from MicroProfile Config, defaulting to empty.

This also avoids copying extra files into containers to retrieve
the version string.
---
 .../iq/dataverse/util/SystemConfig.java       | 175 ++++--------------
 .../iq/dataverse/util/SystemConfigTest.java   |  36 ++++
 2 files changed, 77 insertions(+), 134 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
index bd27405fae5..25dd3dd6138 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
@@ -1,18 +1,28 @@
 package edu.harvard.iq.dataverse.util;
 
 import com.ocpsoft.pretty.PrettyContext;
-
 import edu.harvard.iq.dataverse.DataFile;
 import edu.harvard.iq.dataverse.DataverseServiceBean;
 import edu.harvard.iq.dataverse.DvObjectContainer;
 import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean;
 import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinAuthenticationProvider;
 import edu.harvard.iq.dataverse.authorization.providers.oauth2.AbstractOAuth2AuthenticationProvider;
+import edu.harvard.iq.dataverse.settings.JvmSettings;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
 import edu.harvard.iq.dataverse.validation.PasswordValidatorUtil;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
+import org.eclipse.microprofile.config.Config;
+import org.eclipse.microprofile.config.ConfigProvider;
+import org.passay.CharacterRule;
+
+import javax.ejb.EJB;
+import javax.ejb.Stateless;
+import javax.inject.Named;
+import javax.json.Json;
+import javax.json.JsonArray;
+import javax.json.JsonObject;
+import javax.json.JsonReader;
+import javax.json.JsonString;
+import javax.json.JsonValue;
 import java.io.StringReader;
 import java.net.InetAddress;
 import java.net.UnknownHostException;
@@ -23,25 +33,11 @@
 import java.util.List;
 import java.util.Map;
 import java.util.MissingResourceException;
-import java.util.Properties;
 import java.util.ResourceBundle;
 import java.util.logging.Logger;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
-import javax.ejb.EJB;
-import javax.ejb.Stateless;
-import javax.inject.Named;
-import javax.json.Json;
-import javax.json.JsonArray;
-import javax.json.JsonObject;
-import javax.json.JsonReader;
-import javax.json.JsonString;
-import javax.json.JsonValue;
-
-import org.passay.CharacterRule;
-import org.apache.commons.io.IOUtils;
-
 /**
  * System-wide configuration
  */
@@ -50,6 +46,7 @@
 public class SystemConfig {
 
     private static final Logger logger = Logger.getLogger(SystemConfig.class.getCanonicalName());
+    private static final Config config = ConfigProvider.getConfig();
 
     @EJB
     SettingsServiceBean settingsService;
@@ -109,9 +106,8 @@ public class SystemConfig {
     public static final long defaultZipDownloadLimit = 104857600L; // 100MB
     private static final int defaultMultipleUploadFilesLimit = 1000;
     private static final int defaultLoginSessionTimeout = 480; // = 8 hours
-
-    private static String appVersionString = null; 
-    private static String buildNumberString = null; 
+    
+    private String buildNumber = null;
     
     private static final String JVM_TIMER_SERVER_OPTION = "dataverse.timerServer";
     
@@ -132,127 +128,38 @@ public String getVersion() {
     // candidate for being moved into some kind of an application-scoped caching
     // service... some CachingService @Singleton - ? (L.A. 5.8)
     public String getVersion(boolean withBuildNumber) {
-        
-        if (appVersionString == null) {
-
-            // The Version Number is no longer supplied in a .properties file - so
-            // we can't just do 
-            //  return BundleUtil.getStringFromBundle("version.number", null, ResourceBundle.getBundle("VersionNumber", Locale.US));
-            //
-            // Instead, we'll rely on Maven placing the version number into the
-            // Manifest, and getting it from there:
-            // (this is considered a better practice, and will also allow us
-            // to maintain this number in only one place - the pom.xml file)
-            // -- L.A. 4.0.2
-            
-            // One would assume, that once the version is in the MANIFEST.MF, 
-            // as Implementation-Version:, it would be possible to obtain 
-            // said version simply as 
-            //    appVersionString = getClass().getPackage().getImplementationVersion();
-            // alas - that's not working, for whatever reason. (perhaps that's 
-            // only how it works with jar-ed packages; not with .war files).
-            // People on the interwebs suggest that one should instead 
-            // open the Manifest as a resource, then extract its attributes. 
-            // There were some complications with that too. Plus, relying solely 
-            // on the MANIFEST.MF would NOT work for those of the developers who 
-            // are using "in place deployment" (i.e., where 
-            // Netbeans runs their builds directly from the local target 
-            // directory, bypassing the war file deployment; and the Manifest 
-            // is only available in the .war file). For that reason, I am 
-            // going to rely on the pom.properties file, and use java.util.Properties 
-            // to read it. We have to look for this file in 2 different places
-            // depending on whether this is a .war file deployment, or a 
-            // developers build. (the app-level META-INF is only populated when
-            // a .war file is built; the "maven-archiver" directory, on the other 
-            // hand, is only available when it's a local build deployment).
-            // So, long story short, I'm resorting to the convoluted steps below. 
-            // It may look hacky, but it should actually be pretty solid and 
-            // reliable. 
-            
-            
-            // First, find the absolute path url of the application persistence file
-            // always supplied with the Dataverse app:
-            java.net.URL fileUrl = Thread.currentThread().getContextClassLoader().getResource("META-INF/persistence.xml");
-            String filePath = null;
-
-
-            if (fileUrl != null) {
-                filePath = fileUrl.getFile();
-                if (filePath != null) {
-                    InputStream mavenPropertiesInputStream = null;
-                    String mavenPropertiesFilePath; 
-                    Properties mavenProperties = new Properties();
-
-                    
-                    filePath = filePath.replaceFirst("/[^/]*$", "/");
-                    // Using a relative path, find the location of the maven pom.properties file. 
-                    // First, try to look for it in the app-level META-INF. This will only be 
-                    // available if it's a war file deployment: 
-                    mavenPropertiesFilePath = filePath.concat("../../../META-INF/maven/edu.harvard.iq/dataverse/pom.properties");                                     
-                    
-                    try {
-                        mavenPropertiesInputStream = new FileInputStream(mavenPropertiesFilePath);
-                    } catch (IOException ioex) {
-                        // OK, let's hope this is a local dev. build. 
-                        // In that case the properties file should be available in 
-                        // the maven-archiver directory: 
-                        
-                        mavenPropertiesFilePath = filePath.concat("../../../../maven-archiver/pom.properties");
-                        
-                        // try again: 
-                        
-                        try {
-                            mavenPropertiesInputStream = new FileInputStream(mavenPropertiesFilePath);
-                        } catch (IOException ioex2) {
-                            logger.warning("Failed to find and/or open for reading the pom.properties file.");
-                            mavenPropertiesInputStream = null; 
-                        }
-                    }
-                    
-                    if (mavenPropertiesInputStream != null) {
-                        try {
-                            mavenProperties.load(mavenPropertiesInputStream);
-                            appVersionString = mavenProperties.getProperty("version");                        
-                        } catch (IOException ioex) {
-                            logger.warning("caught IOException trying to read and parse the pom properties file.");
-                        } finally {
-                            IOUtils.closeQuietly(mavenPropertiesInputStream);
-                        }
-                    }
-                    
-                } else {
-                    logger.warning("Null file path representation of the location of persistence.xml in the webapp root directory!"); 
-                }
-            } else {
-                logger.warning("Could not find the location of persistence.xml in the webapp root directory!");
-            }
-
-            
-            if (appVersionString == null) {
-                // still null? - defaulting to 4.0:    
-                appVersionString = "4.0";
-            }
-        }
+        // Retrieve the version via MPCONFIG
+        // NOTE: You may override the version via all methods of MPCONFIG.
+        //       It will default to read from microprofile-config.properties source,
+        //       which contains in the source a Maven property reference to ${project.version}.
+        //       When packaging the app to deploy it, Maven will replace this, rendering it a static entry.
+        // NOTE: MicroProfile Config will cache the entry for us in internal maps.
+        String appVersion = JvmSettings.VERSION.lookup();
             
         if (withBuildNumber) {
-            if (buildNumberString == null) {
-                // (build number is still in a .properties file in the source tree; it only 
-                // contains a real build number if this war file was built by 
-                // Jenkins) 
-                        
+            if (buildNumber == null) {
+                // (build number is still in a .properties file in the source tree; it only
+                // contains a real build number if this war file was built by Jenkins)
+                // TODO: might be replaced with same trick as for version via Maven property w/ empty default
                 try {
-                    buildNumberString = ResourceBundle.getBundle("BuildNumber").getString("build.number");
+                    buildNumber = ResourceBundle.getBundle("BuildNumber").getString("build.number");
                 } catch (MissingResourceException ex) {
-                    buildNumberString = null; 
+                    buildNumber = null;
+                }
+                
+                // Also try to read the build number via MicroProfile Config if not already present from the
+                // properties file (so can be overridden by env var or other source)
+                if (buildNumber == null || buildNumber.isEmpty()) {
+                    buildNumber = JvmSettings.BUILD.lookupOptional().orElse("");
                 }
             }
             
-            if (buildNumberString != null && !buildNumberString.equals("")) {
-                return appVersionString + " build " + buildNumberString; 
-            } 
-        }        
+            if (!buildNumber.equals("")) {
+                return appVersion + " build " + buildNumber;
+            }
+        }
         
-        return appVersionString; 
+        return appVersion;
     }
 
     public String getSolrHostColonPort() {
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java b/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java
index 891b029f521..b8ad0a57748 100644
--- a/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java
@@ -1,13 +1,49 @@
 package edu.harvard.iq.dataverse.util;
 
+import edu.harvard.iq.dataverse.settings.JvmSettings;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.CsvSource;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 class SystemConfigTest {
 
+    SystemConfig systemConfig = new SystemConfig();
+    
+    @Test
+    void testGetVersion() {
+        // given
+        String version = "100.100";
+        System.setProperty(JvmSettings.VERSION.getScopedKey(), version);
+        
+        // when
+        String result = systemConfig.getVersion(false);
+        
+        // then
+        assertEquals(version, result);
+    }
+    
+    @Test
+    void testGetVersionWithBuild() {
+        // given
+        String version = "100.100";
+        String build = "FOOBAR";
+        System.setProperty(JvmSettings.VERSION.getScopedKey(), version);
+        System.setProperty(JvmSettings.BUILD.getScopedKey(), build);
+    
+        // when
+        String result = systemConfig.getVersion(true);
+    
+        // then
+        assertTrue(result.startsWith(version), "'" + result + "' not starting with " + version);
+        assertTrue(result.contains("build"));
+        
+        // Cannot test this here - there might be the bundle file present which is not under test control
+        //assertTrue(result.endsWith(build), "'" + result + "' not ending with " + build);
+    }
+    
     @Test
     void testGetLongLimitFromStringOrDefault_withNullInput() {
         long defaultValue = 5L;

From 5f925edf6668893c96df5117157086ef641a5b44 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Thu, 30 Jun 2022 21:57:49 +0200
Subject: [PATCH 013/173] docs(dev): add some tips about new options
 dataverse.build and dataverse.version #7000

---
 doc/sphinx-guides/source/developers/tips.rst | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/doc/sphinx-guides/source/developers/tips.rst b/doc/sphinx-guides/source/developers/tips.rst
index 3fff3e76ea8..2b15948bd34 100755
--- a/doc/sphinx-guides/source/developers/tips.rst
+++ b/doc/sphinx-guides/source/developers/tips.rst
@@ -173,6 +173,13 @@ commit id in your test deployment webpages on the bottom right corner next to th
 
 When you prefer manual updates, there is another script, see above: :ref:`custom_build_num_script`.
 
+An alternative to that is using *MicroProfile Config* and set the option ``dataverse.build`` via a system property,
+environment variable (``DATAVERSE_BUILD``) or `one of the other config sources
+<https://docs.payara.fish/community/docs/Technical%20Documentation/MicroProfile/Config/Overview.html#config-sources>`__.
+
+You could even override the version itself with the option ``dataverse.version`` in the same way, which is usually
+picked up from a build time source.
+
 Sample Data
 -----------
 

From 32f7a6f59743dda59fa3ff3c779fc35e6239f1e1 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Tue, 21 Jun 2022 20:57:01 +0200
Subject: [PATCH 014/173] refactor(settings): make Solr endpoint configurable
 via MPCONFIG #7000

By refactoring SystemConfig.getSolrHostColonPort, the Solr endpoint
is not just configurable via a database setting, but also by all
mechanisms of MicroProfile Config.

- The database setting still has priority over the other mechanisms.
- It's completely backward compatible, no config change necessary.
- Tests have been added to ensure the behaviour
- Default ("localhost:8983") for no setting given is now also done via MPCONFIG
- Default for container usage ("solr:8983") possible via MPCONFIG profile "ct"
---
 .../iq/dataverse/settings/JvmSettings.java    |  5 ++
 .../iq/dataverse/util/SystemConfig.java       | 37 +++++++-----
 .../META-INF/microprofile-config.properties   |  6 ++
 .../iq/dataverse/util/SystemConfigTest.java   | 59 ++++++++++++++++++-
 4 files changed, 92 insertions(+), 15 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
index 223e4b86da9..e73453abc16 100644
--- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
+++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
@@ -42,6 +42,11 @@ public enum JvmSettings {
     VERSION(PREFIX, "version"),
     BUILD(PREFIX, "build"),
     
+    // SOLR INDEX SETTINGS
+    SCOPE_SOLR(PREFIX, "solr"),
+    SOLR_HOST(SCOPE_SOLR, "host"),
+    SOLR_PORT(SCOPE_SOLR, "port"),
+    
     ;
     
     private static final String SCOPE_SEPARATOR = ".";
diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
index bd27405fae5..acdd112196f 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
@@ -8,6 +8,7 @@
 import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean;
 import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinAuthenticationProvider;
 import edu.harvard.iq.dataverse.authorization.providers.oauth2.AbstractOAuth2AuthenticationProvider;
+import edu.harvard.iq.dataverse.settings.JvmSettings;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
 import edu.harvard.iq.dataverse.validation.PasswordValidatorUtil;
 import java.io.FileInputStream;
@@ -24,6 +25,7 @@
 import java.util.Map;
 import java.util.MissingResourceException;
 import java.util.Properties;
+import java.util.Optional;
 import java.util.ResourceBundle;
 import java.util.logging.Logger;
 import java.util.regex.Matcher;
@@ -95,12 +97,6 @@ public class SystemConfig {
      */
     private static final String PASSWORD_RESET_TIMEOUT_IN_MINUTES = "dataverse.auth.password-reset-timeout-in-minutes";
 
-    /**
-     * A common place to find the String for a sane Solr hostname:port
-     * combination.
-     */
-    private String saneDefaultForSolrHostColonPort = "localhost:8983";
-
     /**
      * The default number of datafiles that we allow to be created through 
      * zip file upload.
@@ -254,15 +250,28 @@ public String getVersion(boolean withBuildNumber) {
         
         return appVersionString; 
     }
-
+    
+    /**
+     * Retrieve the Solr endpoint in "host:port" form, to be used with a Solr client.
+     *
+     * This will retrieve the setting from either the database ({@link SettingsServiceBean.Key#SolrHostColonPort}) or
+     * via Microprofile Config API (properties {@link JvmSettings#SOLR_HOST} and {@link JvmSettings#SOLR_PORT}).
+     *
+     * A database setting always takes precedence. If not given via other config sources, a default from
+     * <code>resources/META-INF/microprofile-config.properties</code> is used. (It's possible to use profiles.)
+     *
+     * @return Solr endpoint as string "hostname:port"
+     */
     public String getSolrHostColonPort() {
-        String SolrHost;
-        if ( System.getenv("SOLR_SERVICE_HOST") != null && System.getenv("SOLR_SERVICE_HOST") != ""){
-            SolrHost = System.getenv("SOLR_SERVICE_HOST");
-        }
-        else SolrHost = saneDefaultForSolrHostColonPort;
-        String solrHostColonPort = settingsService.getValueForKey(SettingsServiceBean.Key.SolrHostColonPort, SolrHost);
-        return solrHostColonPort;
+        // Get from MPCONFIG. Might be configured by a sysadmin or simply return the default shipped with
+        // resources/META-INF/microprofile-config.properties.
+        // NOTE: containers should use system property mp.config.profile=ct to use sane container usage default
+        String host = JvmSettings.SOLR_HOST.lookup();
+        String port = JvmSettings.SOLR_PORT.lookup();
+        
+        // DB setting takes precedence over all. If not present, will return default from above.
+        return Optional.ofNullable(settingsService.getValueForKey(SettingsServiceBean.Key.SolrHostColonPort))
+            .orElse(host + ":" + port);
     }
 
     public boolean isProvCollectionEnabled() {
diff --git a/src/main/resources/META-INF/microprofile-config.properties b/src/main/resources/META-INF/microprofile-config.properties
index 16298d83118..b6aa686de01 100644
--- a/src/main/resources/META-INF/microprofile-config.properties
+++ b/src/main/resources/META-INF/microprofile-config.properties
@@ -3,6 +3,12 @@
 dataverse.version=${project.version}
 dataverse.build=
 
+# SEARCH INDEX
+dataverse.solr.host=localhost
+# Activating mp config profile -Dmp.config.profile=ct changes default to "solr" as DNS name
+%ct.dataverse.solr.host=solr
+dataverse.solr.port=8983
+
 # DATABASE
 dataverse.db.host=localhost
 dataverse.db.port=5432
diff --git a/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java b/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java
index 891b029f521..75f919b90ab 100644
--- a/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java
@@ -1,13 +1,70 @@
 package edu.harvard.iq.dataverse.util;
 
+import edu.harvard.iq.dataverse.settings.JvmSettings;
+import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
+import edu.harvard.iq.dataverse.util.testing.JvmSetting;
 import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.CsvSource;
+import org.mockito.InjectMocks;
+import org.mockito.Mock;
+import org.mockito.junit.jupiter.MockitoExtension;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.mockito.Mockito.doReturn;
 
+@ExtendWith(MockitoExtension.class)
 class SystemConfigTest {
-
+    
+    @InjectMocks
+    SystemConfig systemConfig = new SystemConfig();
+    @Mock
+    SettingsServiceBean settingsService;
+    
+    @Test
+    @JvmSetting(key = JvmSettings.SOLR_HOST, value = "foobar")
+    @JvmSetting(key = JvmSettings.SOLR_PORT, value = "1234")
+    void testGetSolrHostColonPortNoDBEntry() {
+        // given
+        String hostPort = "foobar:1234";
+        
+        // when
+        doReturn(null).when(settingsService).getValueForKey(SettingsServiceBean.Key.SolrHostColonPort);
+        String result = systemConfig.getSolrHostColonPort();
+        
+        // then
+        assertEquals(hostPort, result);
+    }
+    
+    @Test
+    @JvmSetting(key = JvmSettings.SOLR_HOST, value = "foobar")
+    @JvmSetting(key = JvmSettings.SOLR_PORT, value = "1234")
+    void testGetSolrHostColonPortWithDBEntry() {
+        // given
+        String dbEntry = "hello:4321";
+        
+        // when
+        doReturn(dbEntry).when(settingsService).getValueForKey(SettingsServiceBean.Key.SolrHostColonPort);
+        String result = systemConfig.getSolrHostColonPort();
+        
+        // then
+        assertEquals(dbEntry, result);
+    }
+    
+    @Test
+    void testGetSolrHostColonPortDefault() {
+        // given
+        String hostPort = "localhost:8983";
+        
+        // when
+        doReturn(null).when(settingsService).getValueForKey(SettingsServiceBean.Key.SolrHostColonPort);
+        String result = systemConfig.getSolrHostColonPort();
+        
+        // then
+        assertEquals(hostPort, result);
+    }
+    
     @Test
     void testGetLongLimitFromStringOrDefault_withNullInput() {
         long defaultValue = 5L;

From af36a0d4b6fb03502bb6dec65d0acfd60116d2c4 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Tue, 21 Jun 2022 20:59:40 +0200
Subject: [PATCH 015/173] feat(settings,solr): make Solr URL details
 configurable

When using Dataverse with a non-default Solr, HTTPS, custom core name
or similar, it's necessary to have a configurable URL for the Solr
endpoint. This becomes now possible via MicroProfile Config, defaulting
to the old variant.
---
 .../iq/dataverse/search/IndexServiceBean.java | 13 +++-
 .../dataverse/search/SolrClientService.java   | 12 ++-
 .../iq/dataverse/settings/JvmSettings.java    |  3 +
 .../META-INF/microprofile-config.properties   |  3 +
 .../search/IndexServiceBeanTest.java          | 73 +++++++++++++++----
 .../search/SolrClientServiceTest.java         | 59 +++++++++++++++
 6 files changed, 144 insertions(+), 19 deletions(-)
 create mode 100644 src/test/java/edu/harvard/iq/dataverse/search/SolrClientServiceTest.java

diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java
index 484e5768eb1..06a6e5928df 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java
@@ -30,6 +30,7 @@
 import edu.harvard.iq.dataverse.datavariable.VariableMetadataUtil;
 import edu.harvard.iq.dataverse.datavariable.VariableServiceBean;
 import edu.harvard.iq.dataverse.harvest.client.HarvestingClient;
+import edu.harvard.iq.dataverse.settings.JvmSettings;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
 import edu.harvard.iq.dataverse.util.FileUtil;
 import edu.harvard.iq.dataverse.util.StringUtil;
@@ -86,6 +87,8 @@
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.sax.BodyContentHandler;
+import org.eclipse.microprofile.config.Config;
+import org.eclipse.microprofile.config.ConfigProvider;
 import org.xml.sax.ContentHandler;
 
 @Stateless
@@ -93,6 +96,7 @@
 public class IndexServiceBean {
 
     private static final Logger logger = Logger.getLogger(IndexServiceBean.class.getCanonicalName());
+    private static final Config config = ConfigProvider.getConfig();
 
     @PersistenceContext(unitName = "VDCNet-ejbPU")
     private EntityManager em;
@@ -153,13 +157,18 @@ public class IndexServiceBean {
     public static final String HARVESTED = "Harvested";
     private String rootDataverseName;
     private Dataverse rootDataverseCached;
-    private SolrClient solrServer;
+    SolrClient solrServer;
 
     private VariableMetadataUtil variableMetadataUtil;
 
     @PostConstruct
     public void init() {
-        String urlString = "http://" + systemConfig.getSolrHostColonPort() + "/solr/collection1";
+        // Get from MPCONFIG. Might be configured by a sysadmin or simply return the default shipped with
+        // resources/META-INF/microprofile-config.properties.
+        String protocol = JvmSettings.SOLR_PROT.lookup();
+        String path = JvmSettings.SOLR_PATH.lookup();
+    
+        String urlString = protocol + "://" + systemConfig.getSolrHostColonPort() + path;
         solrServer = new HttpSolrClient.Builder(urlString).build();
 
         rootDataverseName = findRootDataverseCached().getName();
diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrClientService.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrClientService.java
index f00ece9aacc..70483853979 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/SolrClientService.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrClientService.java
@@ -5,6 +5,7 @@
  */
 package edu.harvard.iq.dataverse.search;
 
+import edu.harvard.iq.dataverse.settings.JvmSettings;
 import edu.harvard.iq.dataverse.util.SystemConfig;
 import java.io.IOException;
 import java.util.logging.Logger;
@@ -15,6 +16,8 @@
 import javax.inject.Named;
 import org.apache.solr.client.solrj.SolrClient;
 import org.apache.solr.client.solrj.impl.HttpSolrClient;
+import org.eclipse.microprofile.config.Config;
+import org.eclipse.microprofile.config.ConfigProvider;
 
 /**
  *
@@ -30,6 +33,7 @@
 @Singleton
 public class SolrClientService {
     private static final Logger logger = Logger.getLogger(SolrClientService.class.getCanonicalName());
+    private static final Config config = ConfigProvider.getConfig();
     
     @EJB
     SystemConfig systemConfig;
@@ -38,9 +42,13 @@ public class SolrClientService {
     
     @PostConstruct
     public void init() {
-        String urlString = "http://" + systemConfig.getSolrHostColonPort() + "/solr/collection1";
-        solrClient = new HttpSolrClient.Builder(urlString).build();
+        // Get from MPCONFIG. Might be configured by a sysadmin or simply return the default shipped with
+        // resources/META-INF/microprofile-config.properties.
+        String protocol = JvmSettings.SOLR_PROT.lookup();
+        String path = JvmSettings.SOLR_PATH.lookup();
         
+        String urlString = protocol + "://" + systemConfig.getSolrHostColonPort() + path;
+        solrClient = new HttpSolrClient.Builder(urlString).build();
     }
     
     @PreDestroy
diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
index e73453abc16..222346e3b35 100644
--- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
+++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
@@ -46,6 +46,9 @@ public enum JvmSettings {
     SCOPE_SOLR(PREFIX, "solr"),
     SOLR_HOST(SCOPE_SOLR, "host"),
     SOLR_PORT(SCOPE_SOLR, "port"),
+    SOLR_PROT(SCOPE_SOLR, "protocol"),
+    SOLR_CORE(SCOPE_SOLR, "core"),
+    SOLR_PATH(SCOPE_SOLR, "path"),
     
     ;
     
diff --git a/src/main/resources/META-INF/microprofile-config.properties b/src/main/resources/META-INF/microprofile-config.properties
index b6aa686de01..c846d80220c 100644
--- a/src/main/resources/META-INF/microprofile-config.properties
+++ b/src/main/resources/META-INF/microprofile-config.properties
@@ -8,6 +8,9 @@ dataverse.solr.host=localhost
 # Activating mp config profile -Dmp.config.profile=ct changes default to "solr" as DNS name
 %ct.dataverse.solr.host=solr
 dataverse.solr.port=8983
+dataverse.solr.protocol=http
+dataverse.solr.core=collection1
+dataverse.solr.path=/solr/${dataverse.solr.core}
 
 # DATABASE
 dataverse.db.host=localhost
diff --git a/src/test/java/edu/harvard/iq/dataverse/search/IndexServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/search/IndexServiceBeanTest.java
index ad4647e4898..aab6af660cb 100644
--- a/src/test/java/edu/harvard/iq/dataverse/search/IndexServiceBeanTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/search/IndexServiceBeanTest.java
@@ -1,18 +1,5 @@
 package edu.harvard.iq.dataverse.search;
 
-import static org.junit.Assert.assertTrue;
-
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.Set;
-import java.util.logging.Logger;
-import java.util.stream.Collectors;
-
-import org.apache.solr.client.solrj.SolrServerException;
-import org.junit.Before;
-import org.junit.Test;
-import org.mockito.Mockito;
-
 import edu.harvard.iq.dataverse.ControlledVocabularyValue;
 import edu.harvard.iq.dataverse.Dataset;
 import edu.harvard.iq.dataverse.DatasetField;
@@ -26,21 +13,47 @@
 import edu.harvard.iq.dataverse.MetadataBlock;
 import edu.harvard.iq.dataverse.branding.BrandingUtil;
 import edu.harvard.iq.dataverse.mocks.MocksFactory;
+import edu.harvard.iq.dataverse.settings.JvmSettings;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
 import edu.harvard.iq.dataverse.util.SystemConfig;
+import edu.harvard.iq.dataverse.util.testing.JvmSetting;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.impl.HttpSolrClient;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.mockito.InjectMocks;
+import org.mockito.Mock;
+import org.mockito.Mockito;
+import org.mockito.junit.jupiter.MockitoExtension;
 
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Set;
+import java.util.logging.Logger;
+import java.util.stream.Collectors;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+@ExtendWith(MockitoExtension.class)
 public class IndexServiceBeanTest {
     private static final Logger logger = Logger.getLogger(IndexServiceBeanTest.class.getCanonicalName());
 
     private IndexServiceBean indexService;
     private Dataverse dataverse;
 
-    @Before
+    @Mock
+    private SettingsServiceBean settingsService;
+    @InjectMocks
+    private SystemConfig systemConfig = new SystemConfig();
+    
+    @BeforeEach
     public void setUp() {
         dataverse = MocksFactory.makeDataverse();
         dataverse.setDataverseType(DataverseType.UNCATEGORIZED);
         indexService = new IndexServiceBean();
-        indexService.systemConfig = new SystemConfig();
+        indexService.systemConfig = systemConfig;
         indexService.settingsService = Mockito.mock(SettingsServiceBean.class);
         indexService.dataverseService = Mockito.mock(DataverseServiceBean.class);
         indexService.datasetFieldService = Mockito.mock(DatasetFieldServiceBean.class);
@@ -48,6 +61,36 @@ public void setUp() {
 
         Mockito.when(indexService.dataverseService.findRootDataverse()).thenReturn(dataverse);
     }
+    
+    @Test
+    public void testInitWithDefaults() {
+        // given
+        String url = "http://localhost:8983/solr/collection1";
+        
+        // when
+        indexService.init();
+        
+        // then
+        HttpSolrClient client = (HttpSolrClient) indexService.solrServer;
+        assertEquals(url, client.getBaseURL());
+    }
+    
+    
+    @Test
+    @JvmSetting(key = JvmSettings.SOLR_HOST, value = "foobar")
+    @JvmSetting(key = JvmSettings.SOLR_PORT, value = "1234")
+    @JvmSetting(key = JvmSettings.SOLR_CORE, value = "test")
+    void testInitWithConfig() {
+        // given
+        String url = "http://foobar:1234/solr/test";
+        
+        // when
+        indexService.init();
+        
+        // then
+        HttpSolrClient client = (HttpSolrClient) indexService.solrServer;
+        assertEquals(url, client.getBaseURL());
+    }
 
     @Test
     public void TestIndexing() throws SolrServerException, IOException {
diff --git a/src/test/java/edu/harvard/iq/dataverse/search/SolrClientServiceTest.java b/src/test/java/edu/harvard/iq/dataverse/search/SolrClientServiceTest.java
new file mode 100644
index 00000000000..a3b3c8a2080
--- /dev/null
+++ b/src/test/java/edu/harvard/iq/dataverse/search/SolrClientServiceTest.java
@@ -0,0 +1,59 @@
+package edu.harvard.iq.dataverse.search;
+
+import edu.harvard.iq.dataverse.settings.JvmSettings;
+import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
+import edu.harvard.iq.dataverse.util.SystemConfig;
+import edu.harvard.iq.dataverse.util.testing.JvmSetting;
+import org.apache.solr.client.solrj.impl.HttpSolrClient;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.mockito.InjectMocks;
+import org.mockito.Mock;
+import org.mockito.junit.jupiter.MockitoExtension;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+@ExtendWith(MockitoExtension.class)
+class SolrClientServiceTest {
+    
+    @Mock
+    SettingsServiceBean settingsServiceBean;
+    @InjectMocks
+    SystemConfig systemConfig;
+    SolrClientService clientService = new SolrClientService();
+    
+    @BeforeEach
+    void setUp() {
+        clientService.systemConfig = systemConfig;
+    }
+    
+    @Test
+    void testInitWithDefaults() {
+        // given
+        String url = "http://localhost:8983/solr/collection1";
+        
+        // when
+        clientService.init();
+        
+        // then
+        HttpSolrClient client = (HttpSolrClient) clientService.getSolrClient();
+        assertEquals(url, client.getBaseURL());
+    }
+    
+    @Test
+    @JvmSetting(key = JvmSettings.SOLR_HOST, value = "foobar")
+    @JvmSetting(key = JvmSettings.SOLR_PORT, value = "1234")
+    @JvmSetting(key = JvmSettings.SOLR_CORE, value = "test")
+    void testInitWithConfig() {
+        // given
+        String url = "http://foobar:1234/solr/test";
+        
+        // when
+        clientService.init();
+        
+        // then
+        HttpSolrClient client = (HttpSolrClient) clientService.getSolrClient();
+        assertEquals(url, client.getBaseURL());
+    }
+}
\ No newline at end of file

From 0727d85b6a2755e2eb754988ff20a55aae8c92fb Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Thu, 23 Jun 2022 18:13:27 +0200
Subject: [PATCH 016/173] docs(settings): mark :SolrHostColonPort with
 @Deprecated #7000

---
 .../harvard/iq/dataverse/settings/SettingsServiceBean.java | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
index c12b8f6e452..98dd6e2fa3b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
@@ -174,7 +174,12 @@ public enum Key {
          *
          */
         SearchRespectPermissionRoot,
-        /** Solr hostname and port, such as "localhost:8983". */
+        /**
+         * Solr hostname and port, such as "localhost:8983".
+         * @deprecated New installations should not use this database setting, but use {@link JvmSettings#SOLR_HOST}
+         *             and {@link JvmSettings#SOLR_PORT}.
+         */
+        @Deprecated(forRemoval = true, since = "2022-07-01")
         SolrHostColonPort,
         /** Enable full-text indexing in solr up to max file size */
         SolrFullTextIndexing, //true or false (default)

From 6965bebe78b83987af069f4f6a13e6a50539884a Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Tue, 21 Jun 2022 21:34:07 +0200
Subject: [PATCH 017/173] docs(settings): add Solr MPCONFIG options to guides
 #7000

Describe the new options to set the Solr endpoint, crosslinking
the old way and adding hints about MPCONFIG profiles.
---
 .../source/installation/config.rst            | 74 +++++++++++++++++++
 1 file changed, 74 insertions(+)

diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst
index ab0bad70206..c1cf39e4182 100644
--- a/doc/sphinx-guides/source/installation/config.rst
+++ b/doc/sphinx-guides/source/installation/config.rst
@@ -1462,6 +1462,61 @@ Defaults to ``5432``, the default PostgreSQL port.
 
 Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_DB_PORT``.
 
+.. _dataverse.solr.host:
+
+dataverse.solr.host
++++++++++++++++++++
+
+The hostname of a Solr server to connect to. Remember to restart / redeploy Dataverse after changing the setting
+(as with :ref:`:SolrHostColonPort`).
+
+Defaults to ``localhost``.
+
+Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SOLR_HOST``.
+Defaults to ``solr``, when used with ``mp.config.profile=ct`` (:ref:`see below <:ApplicationServerSettings>`).
+
+dataverse.solr.port
++++++++++++++++++++
+
+The Solr server port to connect to. Remember to restart / redeploy Dataverse after changing the setting
+(as with :ref:`:SolrHostColonPort`).
+
+Defaults to ``8983``, the default Solr port.
+
+Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SOLR_PORT``.
+
+dataverse.solr.core
++++++++++++++++++++
+
+The name of the Solr core to use for this Dataverse installation. Might be used to switch to a different core quickly.
+Remember to restart / redeploy Dataverse after changing the setting (as with :ref:`:SolrHostColonPort`).
+
+Defaults to ``collection1``.
+
+Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SOLR_CORE``.
+
+dataverse.solr.protocol
++++++++++++++++++++++++
+
+The Solr server URL protocol for the connection. Remember to restart / redeploy Dataverse after changing the setting
+(as with :ref:`:SolrHostColonPort`).
+
+Defaults to ``http``, but might be set to ``https`` for extra secure Solr installations.
+
+Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SOLR_PROTOCOL``.
+
+dataverse.solr.path
++++++++++++++++++++
+
+The path part of the Solr endpoint URL (e.g. ``/solr/collection1`` of ``http://localhost:8389/solr/collection1``).
+Might be used to target a Solr API at non-default places. Remember to restart / redeploy Dataverse after changing the
+setting (as with :ref:`:SolrHostColonPort`).
+
+Defaults to ``/solr/${dataverse.solr.core}``, interpolating the core name when used. Make sure to include the variable
+when using it to configure your core name!
+
+Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SOLR_PATH``.
+
 dataverse.rserve.host
 +++++++++++++++++++++
 
@@ -1673,6 +1728,21 @@ To facilitate large file upload and download, the Dataverse Software installer b
 
 and restart Payara to apply your change.
 
+mp.config.profile
++++++++++++++++++
+
+MicroProfile Config 2.0 defines the `concept of "profiles" <https://download.eclipse.org/microprofile/microprofile-config-2.0/microprofile-config-spec-2.0.html#configprofile>`_.
+They can be used to change configuration values by context. This is used in Dataverse to change some configuration
+defaults when used inside container context rather classic installations.
+
+As per the spec, you will need to set the configuration value ``mp.config.profile`` to ``ct`` as early as possible.
+This is best done with a system property:
+
+``./asadmin create-system-properties 'mp.config.profile=ct'``
+
+You might also create your own profiles and use these, please refer to the upstream documentation linked above.
+
+
 .. _database-settings:
 
 Database Settings
@@ -2160,6 +2230,8 @@ Limit the number of files in a zip that your Dataverse installation will accept.
 
 ``curl -X PUT -d 2048 http://localhost:8080/api/admin/settings/:ZipUploadFilesLimit``
 
+.. _:SolrHostColonPort:
+
 :SolrHostColonPort
 ++++++++++++++++++
 
@@ -2167,6 +2239,8 @@ By default your Dataverse installation will attempt to connect to Solr on port 8
 
 ``curl -X PUT -d localhost:8983 http://localhost:8080/api/admin/settings/:SolrHostColonPort``
 
+**Note:** instead of using a database setting, you could alternatively use JVM settings like :ref:`dataverse.solr.host`.
+
 :SolrFullTextIndexing
 +++++++++++++++++++++
 

From a7fe29c8e2e088fff71a13327e28c7cbb9595c15 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Fri, 16 Sep 2022 10:32:10 +0200
Subject: [PATCH 018/173] test(settings): make SystemConfigTest version testing
 use JvmSetting extension

---
 .../harvard/iq/dataverse/util/SystemConfigTest.java | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java b/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java
index b8ad0a57748..3bbe331a361 100644
--- a/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/util/SystemConfigTest.java
@@ -1,6 +1,7 @@
 package edu.harvard.iq.dataverse.util;
 
 import edu.harvard.iq.dataverse.settings.JvmSettings;
+import edu.harvard.iq.dataverse.util.testing.JvmSetting;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.CsvSource;
@@ -26,22 +27,18 @@ void testGetVersion() {
     }
     
     @Test
+    @JvmSetting(key = JvmSettings.VERSION, value = "100.100")
+    @JvmSetting(key = JvmSettings.BUILD, value = "FOOBAR")
     void testGetVersionWithBuild() {
-        // given
-        String version = "100.100";
-        String build = "FOOBAR";
-        System.setProperty(JvmSettings.VERSION.getScopedKey(), version);
-        System.setProperty(JvmSettings.BUILD.getScopedKey(), build);
-    
         // when
         String result = systemConfig.getVersion(true);
     
         // then
-        assertTrue(result.startsWith(version), "'" + result + "' not starting with " + version);
+        assertTrue(result.startsWith("100.100"), "'" + result + "' not starting with 100.100");
         assertTrue(result.contains("build"));
         
         // Cannot test this here - there might be the bundle file present which is not under test control
-        //assertTrue(result.endsWith(build), "'" + result + "' not ending with " + build);
+        //assertTrue(result.endsWith("FOOBAR"), "'" + result + "' not ending with FOOBAR");
     }
     
     @Test

From 3fb596e1f97fc074e8dc2056728cb5ace97989e1 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Mon, 27 Jun 2022 14:19:35 +0200
Subject: [PATCH 019/173] refactor(settings): replace dataverse.fqdn and
 siteUrl lookups via MPCONFIG #7000

- Add both settings to JvmSettings to enable lookup
- Refactor SystemConfig.getDataverseSiteUrlStatic to use MPCONFIG,
  but keep current behaviour of constructing the URL from FQDN or
  DNS reverse lookup. (Out of scope here, see #6636)
- Replace clones of the method in Xrecord, DdiExportUtil,
  HandlenetServiceBean with direct usages of the static method
  to avoid unnecessary duplicated code.
- Refactor SchemaDotOrgExporterTest with @JvmSetting for site url.
- Remove unused constants from SystemConfig
- Added default for container usage within "ct" profile, so we avoid
  extra lookups/settings for development usage.

See also https://github.com/IQSS/dataverse/issues/6636
---
 .../iq/dataverse/HandlenetServiceBean.java    | 19 +---
 .../edu/harvard/iq/dataverse/api/Info.java    |  3 +-
 .../dataverse/export/ddi/DdiExportUtil.java   | 38 +------
 .../harvest/server/xoai/Xrecord.java          | 25 +----
 .../iq/dataverse/settings/JvmSettings.java    |  2 +
 .../iq/dataverse/util/SystemConfig.java       | 98 +++++++++----------
 .../META-INF/microprofile-config.properties   |  5 +
 .../export/SchemaDotOrgExporterTest.java      |  6 +-
 8 files changed, 69 insertions(+), 127 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java
index 1a8ee8a85e8..df16991b51e 100644
--- a/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java
@@ -24,8 +24,6 @@
 
 import java.io.File;
 import java.io.FileInputStream;
-import java.net.InetAddress;
-import java.net.UnknownHostException;
 import java.util.*;
 import java.util.logging.Level;
 import java.util.logging.Logger;
@@ -34,6 +32,7 @@
 import java.security.PrivateKey;
 
 /* Handlenet imports: */
+import edu.harvard.iq.dataverse.util.SystemConfig;
 import net.handle.hdllib.AbstractMessage;
 import net.handle.hdllib.AbstractResponse;
 import net.handle.hdllib.AdminRecord;
@@ -247,21 +246,7 @@ private String getRegistrationUrl(DvObject dvObject) {
     }
  
     public String getSiteUrl() {
-        logger.log(Level.FINE,"getSiteUrl");
-        String hostUrl = System.getProperty("dataverse.siteUrl");
-        if (hostUrl != null && !"".equals(hostUrl)) {
-            return hostUrl;
-        }
-        String hostName = System.getProperty("dataverse.fqdn");
-        if (hostName == null) {
-            try {
-                hostName = InetAddress.getLocalHost().getCanonicalHostName();
-            } catch (UnknownHostException e) {
-                return null;
-            }
-        }
-        hostUrl = "https://" + hostName;
-        return hostUrl;
+        return SystemConfig.getDataverseSiteUrlStatic();
     }
     
     private byte[] readKey(final String file) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Info.java b/src/main/java/edu/harvard/iq/dataverse/api/Info.java
index 4fe5cba5b9f..fd7824c15cf 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Info.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Info.java
@@ -1,5 +1,6 @@
 package edu.harvard.iq.dataverse.api;
 
+import edu.harvard.iq.dataverse.settings.JvmSettings;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
 import edu.harvard.iq.dataverse.util.SystemConfig;
 import javax.ejb.EJB;
@@ -44,7 +45,7 @@ public Response getInfo() {
     @GET
     @Path("server")
     public Response getServer() {
-        return response( req -> ok(systemConfig.getDataverseServer()));
+        return response( req -> ok(JvmSettings.FQDN.lookup()));
     }
     
     @GET
diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java
index 4bbcd653ac3..eb7632dd03c 100644
--- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java
+++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java
@@ -32,18 +32,15 @@
 import edu.harvard.iq.dataverse.export.DDIExporter;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
 
-import static edu.harvard.iq.dataverse.util.SystemConfig.FQDN;
-import static edu.harvard.iq.dataverse.util.SystemConfig.SITE_URL;
 
 import edu.harvard.iq.dataverse.util.BundleUtil;
 import edu.harvard.iq.dataverse.util.FileUtil;
+import edu.harvard.iq.dataverse.util.SystemConfig;
 import edu.harvard.iq.dataverse.util.json.JsonUtil;
 import edu.harvard.iq.dataverse.util.xml.XmlPrinter;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
-import java.net.InetAddress;
-import java.net.UnknownHostException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
@@ -1292,7 +1289,7 @@ private static void writeNotesElement(XMLStreamWriter xmlw, DatasetVersionDTO da
     // harvesting *all* files are encoded as otherMats; even tabular ones.
     private static void createOtherMats(XMLStreamWriter xmlw, List<FileDTO> fileDtos) throws XMLStreamException {
         // The preferred URL for this dataverse, for cooking up the file access API links:
-        String dataverseUrl = getDataverseSiteUrl();
+        String dataverseUrl = SystemConfig.getDataverseSiteUrlStatic();
         
         for (FileDTO fileDTo : fileDtos) {
             // We'll continue using the scheme we've used before, in DVN2-3: non-tabular files are put into otherMat,
@@ -1339,7 +1336,7 @@ private static void createOtherMats(XMLStreamWriter xmlw, List<FileDTO> fileDtos
     
     private static void createOtherMatsFromFileMetadatas(XMLStreamWriter xmlw, List<FileMetadata> fileMetadatas) throws XMLStreamException {
         // The preferred URL for this dataverse, for cooking up the file access API links:
-        String dataverseUrl = getDataverseSiteUrl();
+        String dataverseUrl = SystemConfig.getDataverseSiteUrlStatic();
         
         for (FileMetadata fileMetadata : fileMetadatas) {
             // We'll continue using the scheme we've used before, in DVN2-3: non-tabular files are put into otherMat,
@@ -1555,33 +1552,6 @@ private static void saveJsonToDisk(String datasetVersionAsJson) throws IOExcepti
         Files.write(Paths.get("/tmp/out.json"), datasetVersionAsJson.getBytes());
     }
     
-    /**
-     * The "official", designated URL of the site;
-     * can be defined as a complete URL; or derived from the 
-     * "official" hostname. If none of these options is set,
-     * defaults to the InetAddress.getLocalHOst() and https;
-     */
-    private static String getDataverseSiteUrl() {
-        String hostUrl = System.getProperty(SITE_URL);
-        if (hostUrl != null && !"".equals(hostUrl)) {
-            return hostUrl;
-        }
-        String hostName = System.getProperty(FQDN);
-        if (hostName == null) {
-            try {
-                hostName = InetAddress.getLocalHost().getCanonicalHostName();
-            } catch (UnknownHostException e) {
-                hostName = null;
-            }
-        }
-        
-        if (hostName != null) {
-            return "https://" + hostName;
-        }
-        
-        return "http://localhost:8080";
-    }
-    
     
     
     
@@ -1893,7 +1863,7 @@ private static void createVarDDI(XMLStreamWriter xmlw, DataVariable dv, FileMeta
     }
     
     private static void createFileDscr(XMLStreamWriter xmlw, DatasetVersion datasetVersion) throws XMLStreamException {
-        String dataverseUrl = getDataverseSiteUrl();
+        String dataverseUrl = SystemConfig.getDataverseSiteUrlStatic();
         for (FileMetadata fileMetadata : datasetVersion.getFileMetadatas()) {
             DataFile dataFile = fileMetadata.getDataFile();
 
diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/Xrecord.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/Xrecord.java
index 7e115c78f06..4485b798658 100644
--- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/Xrecord.java
+++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/Xrecord.java
@@ -8,14 +8,12 @@
 import edu.harvard.iq.dataverse.Dataset;
 import edu.harvard.iq.dataverse.export.ExportException;
 import edu.harvard.iq.dataverse.export.ExportService;
-import static edu.harvard.iq.dataverse.util.SystemConfig.FQDN;
-import static edu.harvard.iq.dataverse.util.SystemConfig.SITE_URL;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
-import java.net.InetAddress;
-import java.net.UnknownHostException;
+
+import edu.harvard.iq.dataverse.util.SystemConfig;
 import org.apache.poi.util.ReplacingInputStream;
 
 /**
@@ -149,7 +147,7 @@ private void writeMetadataStream(InputStream inputStream, OutputStream outputStr
     private String customMetadataExtensionRef(String identifier) {
         String ret = "<" + METADATA_FIELD 
                 + " directApiCall=\"" 
-                + getDataverseSiteUrl()
+                + SystemConfig.getDataverseSiteUrlStatic()
                 + DATAVERSE_EXTENDED_METADATA_API 
                 + "?exporter=" 
                 + DATAVERSE_EXTENDED_METADATA_FORMAT 
@@ -164,21 +162,4 @@ private String customMetadataExtensionRef(String identifier) {
     private boolean isExtendedDataverseMetadataMode(String formatName) {
         return DATAVERSE_EXTENDED_METADATA_FORMAT.equals(formatName);
     }
-    
-    private String getDataverseSiteUrl() {
-        String hostUrl = System.getProperty(SITE_URL);
-        if (hostUrl != null && !"".equals(hostUrl)) {
-            return hostUrl;
-        }
-        String hostName = System.getProperty(FQDN);
-        if (hostName == null) {
-            try {
-                hostName = InetAddress.getLocalHost().getCanonicalHostName();
-            } catch (UnknownHostException e) {
-                return null;
-            }
-        }
-        hostUrl = "https://" + hostName;
-        return hostUrl;
-    }
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
index 223e4b86da9..8d2832980cc 100644
--- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
+++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
@@ -41,6 +41,8 @@ public enum JvmSettings {
     // GENERAL SETTINGS
     VERSION(PREFIX, "version"),
     BUILD(PREFIX, "build"),
+    FQDN(PREFIX, "fqdn"),
+    SITE_URL(PREFIX, "siteUrl"),
     
     ;
     
diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
index bd27405fae5..635f47c5800 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
@@ -8,6 +8,7 @@
 import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean;
 import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinAuthenticationProvider;
 import edu.harvard.iq.dataverse.authorization.providers.oauth2.AbstractOAuth2AuthenticationProvider;
+import edu.harvard.iq.dataverse.settings.JvmSettings;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
 import edu.harvard.iq.dataverse.validation.PasswordValidatorUtil;
 import java.io.FileInputStream;
@@ -23,6 +24,7 @@
 import java.util.List;
 import java.util.Map;
 import java.util.MissingResourceException;
+import java.util.Optional;
 import java.util.Properties;
 import java.util.ResourceBundle;
 import java.util.logging.Logger;
@@ -61,23 +63,7 @@ public class SystemConfig {
     AuthenticationServiceBean authenticationService;
     
    public static final String DATAVERSE_PATH = "/dataverse/";
-
-    /**
-     * A JVM option for the advertised fully qualified domain name (hostname) of
-     * the Dataverse installation, such as "dataverse.example.com", which may
-     * differ from the hostname that the server knows itself as.
-     *
-     * The equivalent in DVN 3.x was "dvn.inetAddress".
-     */
-    public static final String FQDN = "dataverse.fqdn";
-    
-    /**
-     * A JVM option for specifying the "official" URL of the site.
-     * Unlike the FQDN option above, this would be a complete URL, 
-     * with the protocol, port number etc. 
-     */
-    public static final String SITE_URL = "dataverse.siteUrl";
-
+   
     /**
      * A JVM option for where files are stored on the file system.
      */
@@ -340,32 +326,58 @@ public static int getMinutesUntilPasswordResetTokenExpires() {
     }
     
     /**
-     * The "official", designated URL of the site;
-     * can be defined as a complete URL; or derived from the 
-     * "official" hostname. If none of these options is set,
-     * defaults to the InetAddress.getLocalHOst() and https;
-     * These are legacy JVM options. Will be eventualy replaced
-     * by the Settings Service configuration.
+     * Lookup (or construct) the designated URL of this instance from configuration.
+     *
+     * Can be defined as a complete URL via <code>dataverse.siteUrl</code>; or derived from the hostname
+     * <code>dataverse.fqdn</code> and HTTPS. If none of these options is set, defaults to the
+     * {@link InetAddress#getLocalHost} and HTTPS.
+     *
+     * NOTE: This method does not provide any validation.
+     * TODO: The behaviour of this method is subject to a later change, see
+     *       https://github.com/IQSS/dataverse/issues/6636
+     *
+     * @return The designated URL of this instance as per configuration.
      */
     public String getDataverseSiteUrl() {
         return getDataverseSiteUrlStatic();
     }
     
+    /**
+     * Lookup (or construct) the designated URL of this instance from configuration.
+     *
+     * Can be defined as a complete URL via <code>dataverse.siteUrl</code>; or derived from the hostname
+     * <code>dataverse.fqdn</code> and HTTPS. If none of these options is set, defaults to the
+     * {@link InetAddress#getLocalHost} and HTTPS.
+     *
+     * NOTE: This method does not provide any validation.
+     * TODO: The behaviour of this method is subject to a later change, see
+     *       https://github.com/IQSS/dataverse/issues/6636
+     *
+     * @return The designated URL of this instance as per configuration.
+     */
     public static String getDataverseSiteUrlStatic() {
-        String hostUrl = System.getProperty(SITE_URL);
-        if (hostUrl != null && !"".equals(hostUrl)) {
-            return hostUrl;
+        // If dataverse.siteUrl has been configured, simply return it
+        Optional<String> siteUrl = JvmSettings.SITE_URL.lookupOptional();
+        if (siteUrl.isPresent()) {
+            return siteUrl.get();
         }
-        String hostName = System.getProperty(FQDN);
-        if (hostName == null) {
-            try {
-                hostName = InetAddress.getLocalHost().getCanonicalHostName();
-            } catch (UnknownHostException e) {
-                return null;
-            }
+        
+        // Other wise try to lookup dataverse.fqdn setting and default to HTTPS
+        Optional<String> fqdn = JvmSettings.FQDN.lookupOptional();
+        if (fqdn.isPresent()) {
+            return "https://" + fqdn.get();
+        }
+        
+        // Last resort - get the servers local name and use it.
+        // BEWARE - this is dangerous.
+        // 1) A server might have a different name than your repository URL.
+        // 2) The underlying reverse DNS lookup might point to a different name than your repository URL.
+        // 3) If this server has multiple IPs assigned, which one will it be for the lookup?
+        try {
+            return "https://" + InetAddress.getLocalHost().getCanonicalHostName();
+        } catch (UnknownHostException e) {
+            return null;
         }
-        hostUrl = "https://" + hostName;
-        return hostUrl;
     }
     
     /**
@@ -375,22 +387,6 @@ public String getPageURLWithQueryString() {
         return PrettyContext.getCurrentInstance().getRequestURL().toURL() + PrettyContext.getCurrentInstance().getRequestQueryString().toQueryString();
     }
 
-    /**
-     * The "official" server's fully-qualified domain name: 
-     */
-    public String getDataverseServer() {
-        // still reliese on a JVM option: 
-        String fqdn = System.getProperty(FQDN);
-        if (fqdn == null) {
-            try {
-                fqdn = InetAddress.getLocalHost().getCanonicalHostName();
-            } catch (UnknownHostException e) {
-                return null;
-            }
-        }
-        return fqdn;
-    }
-
     public String getGuidesBaseUrl() {
         String saneDefault = "https://guides.dataverse.org";
         String guidesBaseUrl = settingsService.getValueForKey(SettingsServiceBean.Key.GuidesBaseUrl, saneDefault);
diff --git a/src/main/resources/META-INF/microprofile-config.properties b/src/main/resources/META-INF/microprofile-config.properties
index 16298d83118..a9ee8236c7e 100644
--- a/src/main/resources/META-INF/microprofile-config.properties
+++ b/src/main/resources/META-INF/microprofile-config.properties
@@ -3,6 +3,11 @@
 dataverse.version=${project.version}
 dataverse.build=
 
+# Default only for containers! (keep mimicking the current behaviour -
+# changing that is part of https://github.com/IQSS/dataverse/issues/6636)
+%ct.dataverse.fqdn=localhost
+%ct.dataverse.siteUrl=http://${dataverse.fqdn}:8080
+
 # DATABASE
 dataverse.db.host=localhost
 dataverse.db.port=5432
diff --git a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java
index b5453e75fe5..7119dfaf834 100644
--- a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java
@@ -6,9 +6,9 @@
 import edu.harvard.iq.dataverse.license.LicenseServiceBean;
 import edu.harvard.iq.dataverse.mocks.MockDatasetFieldSvc;
 
-import static edu.harvard.iq.dataverse.util.SystemConfig.SITE_URL;
 import static edu.harvard.iq.dataverse.util.SystemConfig.FILES_HIDE_SCHEMA_DOT_ORG_DOWNLOAD_URLS;
 
+import edu.harvard.iq.dataverse.settings.JvmSettings;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
 import edu.harvard.iq.dataverse.util.json.JsonParser;
 import edu.harvard.iq.dataverse.util.json.JsonUtil;
@@ -31,6 +31,8 @@
 import javax.json.Json;
 import javax.json.JsonObject;
 import javax.json.JsonReader;
+
+import edu.harvard.iq.dataverse.util.testing.JvmSetting;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.Test;
@@ -64,6 +66,7 @@ public static void tearDownClass() {
      * Test of exportDataset method, of class SchemaDotOrgExporter.
      */
     @Test
+    @JvmSetting(key = JvmSettings.SITE_URL, value = "https://librascholar.org")
     public void testExportDataset() throws Exception {
         File datasetVersionJson = new File("src/test/resources/json/dataset-finch2.json");
         String datasetVersionAsJson = new String(Files.readAllBytes(Paths.get(datasetVersionJson.getAbsolutePath())));
@@ -92,7 +95,6 @@ public void testExportDataset() throws Exception {
         Dataverse dataverse = new Dataverse();
         dataverse.setName("LibraScholar");
         dataset.setOwner(dataverse);
-        System.setProperty(SITE_URL, "https://librascholar.org");
         boolean hideFileUrls = false;
         if (hideFileUrls) {
             System.setProperty(FILES_HIDE_SCHEMA_DOT_ORG_DOWNLOAD_URLS, "true");

From 3f19c121dee708831d48c83a4fc819986379e819 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Mon, 27 Jun 2022 15:05:57 +0200
Subject: [PATCH 020/173] docs(settings): update fqdn and siteUrl desc

- Notes about MPCONFIG usage.
- Rewording to make it more clear how this shall be used.
---
 .../source/installation/config.rst            | 44 ++++++++++++++-----
 1 file changed, 33 insertions(+), 11 deletions(-)

diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst
index ab0bad70206..65912e77245 100644
--- a/doc/sphinx-guides/source/installation/config.rst
+++ b/doc/sphinx-guides/source/installation/config.rst
@@ -1376,30 +1376,52 @@ When changing values these values with ``asadmin``, you'll need to delete the ol
 
 It's also possible to change these values by stopping Payara, editing ``payara5/glassfish/domains/domain1/config/domain.xml``, and restarting Payara.
 
+.. _dataverse.fqdn:
+
 dataverse.fqdn
 ++++++++++++++
 
-If the Dataverse installation has multiple DNS names, this option specifies the one to be used as the "official" host name. For example, you may want to have dataverse.example.edu, and not the less appealing server-123.socsci.example.edu to appear exclusively in all the registered global identifiers, Data Deposit API records, etc.
+The URL to access your Dataverse installation gets used in multiple places:
+
+- Email confirmation links
+- Password reset links
+- Generating a Private URL
+- PID minting
+- Exporting to Schema.org format (and showing JSON-LD in HTML's <meta/> tag)
+- Exporting to DDI format
+- Which Dataverse installation an "external tool" should return to
+- URLs embedded in SWORD API responses
+- ...
 
-The password reset feature requires ``dataverse.fqdn`` to be configured.
+Usually it will follow the pattern ``https://<full-qualified-domain-name>/<some-place-to-go-to>``.
+The FQDN part of the your Dataverse installation URL can be determined by setting ``dataverse.fqdn``.
 
-.. note::
+**Notes:**
 
-	Do note that whenever the system needs to form a service URL, by default, it will be formed with ``https://`` and port 443. I.e.,
-	``https://{dataverse.fqdn}/``
-	If that does not suit your setup, you can define an additional option, ``dataverse.siteUrl``, explained below.
+- The URL will default to using ``https://`` and no additional port information. If that does not suit your setup, you
+  can define an additional option, ``dataverse.siteUrl``, :ref:`explained below <dataverse.siteUrl>`, which always
+  takes precedence.
+- Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_FQDN``.
+  Defaults to ``localhost`` when used with ``mp.config.profile=ct``
 
 .. _dataverse.siteUrl:
 
 dataverse.siteUrl
 +++++++++++++++++
 
-.. note::
+Some environments may require using a different URL pattern to access your installation. You might need to use
+HTTP without "S", a non-standard port and so on. This is especially useful in development or testing environments.
+
+You can provide a custom tailored site URL via ``dataverse.siteUrl``, which always takes precedence.
+Example: ``dataverse.siteUrl=http://localhost:8080``
+
+**Notes:**
+
+- This setting may be used in combination with variable replacement, referencing :ref:`dataverse.fqdn` with
+  ``./asadmin create-jvm-options "\-Ddataverse.siteUrl=http\://\${dataverse.fqdn}\:8080"``
+- Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_SITEURL``.
+  Defaults to ``http://${dataverse.fqdn}:8080`` when used with ``mp.config.profile=ct``
 
-	and specify the protocol and port number you would prefer to be used to advertise the URL for your Dataverse installation.
-	For example, configured in domain.xml:
-	``<jvm-options>-Ddataverse.fqdn=dataverse.example.edu</jvm-options>``
-	``<jvm-options>-Ddataverse.siteUrl=http://${dataverse.fqdn}:8080</jvm-options>``
 
 dataverse.files.directory
 +++++++++++++++++++++++++

From f7e7e4aed8e2e089ac7ce55bb583795230d6849e Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Wed, 22 Jun 2022 18:22:08 +0200
Subject: [PATCH 021/173] refactor(settings): replace lookups of
 dataverse.files.directory with MPCONFIG #7000

- Adding dataverse.files.directory equivalent to JvmSettings
- Remove all System.getPropert("dataverse.files.directory") or similar
- Add default with /tmp/dataverse via microprofile-config.properties as formerly seen at FileUtil and Dataset only
- Refactor SwordConfigurationImpl to reuse the NoSuchElementException thrown by MPCONFIG
- Refactor GoogleCloudSubmitToArchiveCommand to use the JvmSettings.lookup and create file stream in try-with-resources
---
 .../edu/harvard/iq/dataverse/Dataset.java     |  9 ++--
 .../iq/dataverse/EditDatafilesPage.java       |  7 ++-
 .../datadeposit/SwordConfigurationImpl.java   | 52 +++++++++----------
 .../filesystem/FileRecordJobListener.java     |  7 ++-
 .../importer/filesystem/FileRecordReader.java |  9 ++--
 .../GoogleCloudSubmitToArchiveCommand.java    | 31 +++++------
 .../impl/ImportFromFileSystemCommand.java     | 48 +++++++++--------
 .../iq/dataverse/settings/JvmSettings.java    |  4 ++
 .../harvard/iq/dataverse/util/FileUtil.java   |  8 ++-
 .../iq/dataverse/util/SystemConfig.java       |  5 --
 .../META-INF/microprofile-config.properties   |  3 ++
 11 files changed, 94 insertions(+), 89 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java
index a4f82d41bac..e2f00d0b54b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java
+++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java
@@ -33,6 +33,8 @@
 import javax.persistence.Table;
 import javax.persistence.Temporal;
 import javax.persistence.TemporalType;
+
+import edu.harvard.iq.dataverse.settings.JvmSettings;
 import edu.harvard.iq.dataverse.util.StringUtil;
 import edu.harvard.iq.dataverse.util.SystemConfig;
 
@@ -528,11 +530,8 @@ private Collection<String> getCategoryNames() {
     @Deprecated 
     public Path getFileSystemDirectory() {
         Path studyDir = null;
-
-        String filesRootDirectory = System.getProperty("dataverse.files.directory");
-        if (filesRootDirectory == null || filesRootDirectory.equals("")) {
-            filesRootDirectory = "/tmp/files";
-        }
+        
+        String filesRootDirectory = JvmSettings.FILES_DIRECTORY.lookup();
         
         if (this.getAlternativePersistentIndentifiers() != null && !this.getAlternativePersistentIndentifiers().isEmpty()) {
             for (AlternativePersistentIdentifier api : this.getAlternativePersistentIndentifiers()) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java
index f53e2377a69..a895c90dabe 100644
--- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java
@@ -31,6 +31,7 @@
 import edu.harvard.iq.dataverse.ingest.IngestUtil;
 import edu.harvard.iq.dataverse.license.LicenseServiceBean;
 import edu.harvard.iq.dataverse.search.IndexServiceBean;
+import edu.harvard.iq.dataverse.settings.JvmSettings;
 import edu.harvard.iq.dataverse.settings.Setting;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
 import edu.harvard.iq.dataverse.util.FileUtil;
@@ -2425,10 +2426,8 @@ public boolean isTemporaryPreviewAvailable(String fileSystemId, String mimeType)
             return false;
         }
 
-        String filesRootDirectory = System.getProperty("dataverse.files.directory");
-        if (filesRootDirectory == null || filesRootDirectory.isEmpty()) {
-            filesRootDirectory = "/tmp/files";
-        }
+        // Retrieve via MPCONFIG. Has sane default /tmp/dataverse from META-INF/microprofile-config.properties
+        String filesRootDirectory = JvmSettings.FILES_DIRECTORY.lookup();
 
         String fileSystemName = filesRootDirectory + "/temp/" + fileSystemId;
 
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordConfigurationImpl.java b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordConfigurationImpl.java
index ce5f9415fcc..1e506c6a0b1 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordConfigurationImpl.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordConfigurationImpl.java
@@ -1,5 +1,6 @@
 package edu.harvard.iq.dataverse.api.datadeposit;
 
+import edu.harvard.iq.dataverse.settings.JvmSettings;
 import edu.harvard.iq.dataverse.util.SystemConfig;
 import java.io.File;
 import java.util.Arrays;
@@ -86,37 +87,32 @@ public boolean storeAndCheckBinary() {
 
     @Override
     public String getTempDirectory() {
-        String tmpFileDir = System.getProperty(SystemConfig.FILES_DIRECTORY);
-        if (tmpFileDir != null) {
-            String swordDirString = tmpFileDir + File.separator + "sword";
-            File swordDirFile = new File(swordDirString);
-            /**
-             * @todo Do we really need this check? It seems like we do because
-             * if you create a dataset via the native API and then later try to
-             * upload a file via SWORD, the directory defined by
-             * dataverse.files.directory may not exist and we get errors deep in
-             * the SWORD library code. Could maybe use a try catch in the doPost
-             * method of our SWORDv2MediaResourceServlet.
-             */
-            if (swordDirFile.exists()) {
+        // will throw a runtime exception when not found
+        String tmpFileDir = JvmSettings.FILES_DIRECTORY.lookup();
+        
+        String swordDirString = tmpFileDir + File.separator + "sword";
+        File swordDirFile = new File(swordDirString);
+        /**
+         * @todo Do we really need this check? It seems like we do because
+         * if you create a dataset via the native API and then later try to
+         * upload a file via SWORD, the directory defined by
+         * dataverse.files.directory may not exist and we get errors deep in
+         * the SWORD library code. Could maybe use a try catch in the doPost
+         * method of our SWORDv2MediaResourceServlet.
+         */
+        if (swordDirFile.exists()) {
+            return swordDirString;
+        } else {
+            boolean mkdirSuccess = swordDirFile.mkdirs();
+            if (mkdirSuccess) {
+                logger.info("Created directory " + swordDirString);
                 return swordDirString;
             } else {
-                boolean mkdirSuccess = swordDirFile.mkdirs();
-                if (mkdirSuccess) {
-                    logger.info("Created directory " + swordDirString);
-                    return swordDirString;
-                } else {
-                    String msgForSwordUsers = ("Could not determine or create SWORD temp directory. Check logs for details.");
-                    logger.severe(msgForSwordUsers + " Failed to create " + swordDirString);
-                    // sadly, must throw RunTimeException to communicate with SWORD user
-                    throw new RuntimeException(msgForSwordUsers);
-                }
+                String msgForSwordUsers = ("Could not determine or create SWORD temp directory. Check logs for details.");
+                logger.severe(msgForSwordUsers + " Failed to create " + swordDirString);
+                // sadly, must throw RunTimeException to communicate with SWORD user
+                throw new RuntimeException(msgForSwordUsers);
             }
-        } else {
-            String msgForSwordUsers = ("JVM option \"" + SystemConfig.FILES_DIRECTORY + "\" not defined. Check logs for details.");
-            logger.severe(msgForSwordUsers);
-            // sadly, must throw RunTimeException to communicate with SWORD user
-            throw new RuntimeException(msgForSwordUsers);
         }
     }
 
diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java
index 6b82a665c17..ecb998c66af 100644
--- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java
+++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java
@@ -57,6 +57,7 @@
 import javax.inject.Named;
 import javax.servlet.http.HttpServletRequest;
 
+import edu.harvard.iq.dataverse.settings.JvmSettings;
 import org.apache.commons.io.IOUtils;
 
 import java.io.FileReader;
@@ -433,8 +434,10 @@ private void loadChecksumManifest() {
             manifest = checksumManifest;
             getJobLogger().log(Level.INFO, "Checksum manifest = " + manifest + " (FileSystemImportJob.xml property)");
         }
-        // construct full path
-        String manifestAbsolutePath = System.getProperty("dataverse.files.directory")
+        
+        // Construct full path - retrieve base dir via MPCONFIG.
+        // (Has sane default /tmp/dataverse from META-INF/microprofile-config.properties)
+        String manifestAbsolutePath = JvmSettings.FILES_DIRECTORY.lookup()
                 + SEP + dataset.getAuthority()
                 + SEP + dataset.getIdentifier()
                 + SEP + uploadFolder
diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java
index b3d3a7107a6..e3b67e9b0d2 100644
--- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java
+++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java
@@ -24,6 +24,7 @@
 import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean;
 import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
 import edu.harvard.iq.dataverse.batch.jobs.importer.ImportMode;
+import edu.harvard.iq.dataverse.settings.JvmSettings;
 import org.apache.commons.io.filefilter.NotFileFilter;
 import org.apache.commons.io.filefilter.WildcardFileFilter;
 
@@ -96,9 +97,11 @@ public void init() {
 
     @Override
     public void open(Serializable checkpoint) throws Exception {
- 
-        directory = new File(System.getProperty("dataverse.files.directory")
-                + SEP + dataset.getAuthority() + SEP + dataset.getIdentifier() + SEP + uploadFolder);
+    
+        // Retrieve via MPCONFIG. Has sane default /tmp/dataverse from META-INF/microprofile-config.properties
+        String baseDir = JvmSettings.FILES_DIRECTORY.lookup();
+        
+        directory = new File(baseDir + SEP + dataset.getAuthority() + SEP + dataset.getIdentifier() + SEP + uploadFolder);
         // TODO: 
         // The above goes directly to the filesystem directory configured by the 
         // old "dataverse.files.directory" JVM option (otherwise used for temp
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java
index 5d017173685..da2701a41e7 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java
@@ -1,16 +1,27 @@
 package edu.harvard.iq.dataverse.engine.command.impl;
 
+import com.google.auth.oauth2.ServiceAccountCredentials;
+import com.google.cloud.storage.Blob;
+import com.google.cloud.storage.Bucket;
+import com.google.cloud.storage.Storage;
+import com.google.cloud.storage.StorageException;
+import com.google.cloud.storage.StorageOptions;
 import edu.harvard.iq.dataverse.Dataset;
-import edu.harvard.iq.dataverse.DatasetVersion;
 import edu.harvard.iq.dataverse.DatasetLock.Reason;
+import edu.harvard.iq.dataverse.DatasetVersion;
 import edu.harvard.iq.dataverse.authorization.Permission;
 import edu.harvard.iq.dataverse.authorization.users.ApiToken;
 import edu.harvard.iq.dataverse.engine.command.Command;
 import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
 import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
+import edu.harvard.iq.dataverse.settings.JvmSettings;
 import edu.harvard.iq.dataverse.workflow.step.Failure;
 import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult;
+import org.apache.commons.codec.binary.Hex;
 
+import javax.json.Json;
+import javax.json.JsonObjectBuilder;
+import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.PipedInputStream;
@@ -21,17 +32,6 @@
 import java.util.Map;
 import java.util.logging.Logger;
 
-import javax.json.Json;
-import javax.json.JsonObjectBuilder;
-
-import org.apache.commons.codec.binary.Hex;
-import com.google.auth.oauth2.ServiceAccountCredentials;
-import com.google.cloud.storage.Blob;
-import com.google.cloud.storage.Bucket;
-import com.google.cloud.storage.Storage;
-import com.google.cloud.storage.StorageException;
-import com.google.cloud.storage.StorageOptions;
-
 @RequiredPermissions(Permission.PublishDataset)
 public class GoogleCloudSubmitToArchiveCommand extends AbstractSubmitToArchiveCommand implements Command<DatasetVersion> {
 
@@ -56,10 +56,11 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t
             statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_FAILURE);
             statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, "Bag not transferred");
             
-            try {
-                FileInputStream fis = new FileInputStream(System.getProperty("dataverse.files.directory") + System.getProperty("file.separator") + "googlecloudkey.json");
+            String cloudKeyFile = JvmSettings.FILES_DIRECTORY.lookup() + File.separator + "googlecloudkey.json";
+            
+            try (FileInputStream cloudKeyStream = new FileInputStream(cloudKeyFile)) {
                 storage = StorageOptions.newBuilder()
-                        .setCredentials(ServiceAccountCredentials.fromStream(fis))
+                        .setCredentials(ServiceAccountCredentials.fromStream(cloudKeyStream))
                         .setProjectId(projectName)
                         .build()
                         .getService();
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java
index 64beba82450..5f31ea756eb 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java
@@ -12,17 +12,20 @@
 import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
 import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
 import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException;
-import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder;
-import java.io.File;
-import java.util.Properties;
-import java.util.logging.Level;
-import java.util.logging.Logger;
+import edu.harvard.iq.dataverse.settings.JvmSettings;
+
 import javax.batch.operations.JobOperator;
 import javax.batch.operations.JobSecurityException;
 import javax.batch.operations.JobStartException;
 import javax.batch.runtime.BatchRuntime;
 import javax.json.JsonObject;
 import javax.json.JsonObjectBuilder;
+import java.io.File;
+import java.util.Properties;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder;
 
 @RequiredPermissions(Permission.EditDataset)
 public class ImportFromFileSystemCommand extends AbstractCommand<JsonObject> {
@@ -69,18 +72,20 @@ public JsonObject execute(CommandContext ctxt) throws CommandException {
                 logger.info(error);
                 throw new IllegalCommandException(error, this);
             }
-            File directory = new File(System.getProperty("dataverse.files.directory")
-                    + File.separator + dataset.getAuthority() + File.separator + dataset.getIdentifier());
-            // TODO: 
-            // The above goes directly to the filesystem directory configured by the 
-            // old "dataverse.files.directory" JVM option (otherwise used for temp
-            // files only, after the Multistore implementation (#6488). 
-            // We probably want package files to be able to use specific stores instead.
-            // More importantly perhaps, the approach above does not take into account
-            // if the dataset may have an AlternativePersistentIdentifier, that may be 
-            // designated isStorageLocationDesignator() - i.e., if a different identifer
-            // needs to be used to name the storage directory, instead of the main/current
-            // persistent identifier above. 
+            
+            File directory = new File(
+                String.join(File.separator, JvmSettings.FILES_DIRECTORY.lookup(),
+                    dataset.getAuthority(), dataset.getIdentifier()));
+            
+            // TODO: The above goes directly to the filesystem directory configured by the
+            //       old "dataverse.files.directory" JVM option (otherwise used for temp
+            //       files only, after the Multistore implementation (#6488).
+            //       We probably want package files to be able to use specific stores instead.
+            //       More importantly perhaps, the approach above does not take into account
+            //       if the dataset may have an AlternativePersistentIdentifier, that may be
+            //       designated isStorageLocationDesignator() - i.e., if a different identifer
+            //       needs to be used to name the storage directory, instead of the main/current
+            //       persistent identifier above.
             if (!isValidDirectory(directory)) {
                 String error = "Dataset directory is invalid. " + directory;
                 logger.info(error);
@@ -93,11 +98,10 @@ public JsonObject execute(CommandContext ctxt) throws CommandException {
                 throw new IllegalCommandException(error, this);
             }
 
-            File uploadDirectory = new File(System.getProperty("dataverse.files.directory")
-                    + File.separator + dataset.getAuthority() + File.separator + dataset.getIdentifier()
-                    + File.separator + uploadFolder);
-            // TODO: 
-            // see the comment above. 
+            File uploadDirectory = new File(String.join(File.separator, JvmSettings.FILES_DIRECTORY.lookup(),
+                dataset.getAuthority(), dataset.getIdentifier(), uploadFolder));
+            
+            // TODO: see the comment above.
             if (!isValidDirectory(uploadDirectory)) {
                 String error = "Upload folder is not a valid directory.";
                 logger.info(error);
diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
index 223e4b86da9..12e5e311278 100644
--- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
+++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
@@ -42,6 +42,10 @@ public enum JvmSettings {
     VERSION(PREFIX, "version"),
     BUILD(PREFIX, "build"),
     
+    // FILES SETTINGS
+    SCOPE_FILES(PREFIX, "files"),
+    FILES_DIRECTORY(SCOPE_FILES, "directory"),
+
     ;
     
     private static final String SCOPE_SEPARATOR = ".";
diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
index 893c62b3cb0..a2c55d41613 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
@@ -40,6 +40,7 @@
 import edu.harvard.iq.dataverse.ingest.IngestServiceShapefileHelper;
 import edu.harvard.iq.dataverse.ingest.IngestableDataChecker;
 import edu.harvard.iq.dataverse.license.License;
+import edu.harvard.iq.dataverse.settings.JvmSettings;
 import edu.harvard.iq.dataverse.util.file.BagItFileHandler;
 import edu.harvard.iq.dataverse.util.file.CreateDataFileResult;
 import edu.harvard.iq.dataverse.util.file.BagItFileHandlerFactory;
@@ -1389,11 +1390,8 @@ public static boolean canIngestAsTabular(String mimeType) {
     }
     
     public static String getFilesTempDirectory() {
-        String filesRootDirectory = System.getProperty("dataverse.files.directory");
-        if (filesRootDirectory == null || filesRootDirectory.equals("")) {
-            filesRootDirectory = "/tmp/files";
-        }
-
+        
+        String filesRootDirectory = JvmSettings.FILES_DIRECTORY.lookup();
         String filesTempDirectory = filesRootDirectory + "/temp";
 
         if (!Files.exists(Paths.get(filesTempDirectory))) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
index bd27405fae5..e9313e70218 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
@@ -78,11 +78,6 @@ public class SystemConfig {
      */
     public static final String SITE_URL = "dataverse.siteUrl";
 
-    /**
-     * A JVM option for where files are stored on the file system.
-     */
-    public static final String FILES_DIRECTORY = "dataverse.files.directory";
-
     /**
      * Some installations may not want download URLs to their files to be
      * available in Schema.org JSON-LD output.
diff --git a/src/main/resources/META-INF/microprofile-config.properties b/src/main/resources/META-INF/microprofile-config.properties
index 16298d83118..ab219071767 100644
--- a/src/main/resources/META-INF/microprofile-config.properties
+++ b/src/main/resources/META-INF/microprofile-config.properties
@@ -3,6 +3,9 @@
 dataverse.version=${project.version}
 dataverse.build=
 
+# FILES
+dataverse.files.directory=/tmp/dataverse
+
 # DATABASE
 dataverse.db.host=localhost
 dataverse.db.port=5432

From 5c2c7022ad9f11234b0e33ddaf3a0aa2696ab154 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Wed, 22 Jun 2022 22:27:30 +0200
Subject: [PATCH 022/173] docs(settings): provide more detail for
 dataverse.files.directory

---
 doc/sphinx-guides/source/api/native-api.rst      |  2 ++
 doc/sphinx-guides/source/installation/config.rst | 16 +++++++++++++++-
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst
index 339a291bf4d..6dd1bbab728 100644
--- a/doc/sphinx-guides/source/api/native-api.rst
+++ b/doc/sphinx-guides/source/api/native-api.rst
@@ -552,6 +552,8 @@ You should expect an HTTP 200 ("OK") response and JSON indicating the database I
 
 .. note:: Only a Dataverse installation account with superuser permissions is allowed to include files when creating a dataset via this API. Adding files this way only adds their file metadata to the database, you will need to manually add the physical files to the file system.
 
+.. _api-import-dataset:
+
 Import a Dataset into a Dataverse Collection
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst
index ab0bad70206..89329ea3821 100644
--- a/doc/sphinx-guides/source/installation/config.rst
+++ b/doc/sphinx-guides/source/installation/config.rst
@@ -274,6 +274,8 @@ If you wish to change which store is used by default, you'll need to delete the
 
 It is also possible to set maximum file upload size limits per store. See the :ref:`:MaxFileUploadSizeInBytes` setting below.
 
+.. _storage-files-dir:
+
 File Storage
 ++++++++++++
 
@@ -1404,7 +1406,19 @@ dataverse.siteUrl
 dataverse.files.directory
 +++++++++++++++++++++++++
 
-This is how you configure the path Dataverse uses for temporary files. (File store specific dataverse.files.\<id\>.directory options set the permanent data storage locations.)
+Please provide an absolute path to a directory backed by some mounted file system. This directory is used for a number
+of purposes:
+
+1. ``<dataverse.files.directory>/temp`` after uploading, data is temporarily stored here for ingest and/or before
+   shipping to the final storage destination.
+2. ``<dataverse.files.directory>/sword`` a place to store uploads via the :doc:`../api/sword` before transfer
+   to final storage location and/or ingest.
+3. ``<dataverse.files.directory>/<PID Authority>/<PID Identifier>`` data location for file system imports, see
+   :ref:`api-import-dataset`.
+4. ``<dataverse.files.directory>/googlecloudkey.json`` used with :ref:`Google Cloud Configuration` for BagIt exports.
+
+This directory might also be used for permanent storage of data, but this setting is independent from
+:ref:`storage-files-dir` configuration.
 
 dataverse.auth.password-reset-timeout-in-minutes
 ++++++++++++++++++++++++++++++++++++++++++++++++

From d7ab9f6e5359356db3b01ab9e6f87347cf117fe7 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Mon, 27 Jun 2022 15:11:01 +0200
Subject: [PATCH 023/173] style: replace system prop 'file.separator' with
 File.separator

---
 .../batch/jobs/importer/filesystem/FileRecordJobListener.java | 3 ++-
 .../batch/jobs/importer/filesystem/FileRecordReader.java      | 2 +-
 .../java/edu/harvard/iq/dataverse/batch/util/LoggingUtil.java | 4 ++--
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java
index ecb998c66af..7837474fc27 100644
--- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java
+++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java
@@ -60,6 +60,7 @@
 import edu.harvard.iq.dataverse.settings.JvmSettings;
 import org.apache.commons.io.IOUtils;
 
+import java.io.File;
 import java.io.FileReader;
 import java.io.IOException;
 import java.sql.Timestamp;
@@ -80,7 +81,7 @@
 @Dependent
 public class FileRecordJobListener implements ItemReadListener, StepListener, JobListener {
     
-    public static final String SEP = System.getProperty("file.separator");
+    public static final String SEP = File.separator;
     
     private static final UserNotification.Type notifyType = UserNotification.Type.FILESYSTEMIMPORT;
     
diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java
index e3b67e9b0d2..a4f8ffd2378 100644
--- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java
+++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java
@@ -55,7 +55,7 @@
 @Dependent
 public class FileRecordReader extends AbstractItemReader {
     
-    public static final String SEP = System.getProperty("file.separator");
+    public static final String SEP = File.separator;
 
     @Inject
     JobContext jobContext;
diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/util/LoggingUtil.java b/src/main/java/edu/harvard/iq/dataverse/batch/util/LoggingUtil.java
index 4a778dc7abb..a2f76ca953d 100644
--- a/src/main/java/edu/harvard/iq/dataverse/batch/util/LoggingUtil.java
+++ b/src/main/java/edu/harvard/iq/dataverse/batch/util/LoggingUtil.java
@@ -154,8 +154,8 @@ public static Logger getJobLogger(String jobId) {
 	    try {
 		    Logger jobLogger = Logger.getLogger("job-"+jobId);
 		    FileHandler fh;
-		    String logDir = System.getProperty("com.sun.aas.instanceRoot") + System.getProperty("file.separator") 
-			    + "logs" + System.getProperty("file.separator") + "batch-jobs" + System.getProperty("file.separator");
+		    String logDir = System.getProperty("com.sun.aas.instanceRoot") + File.separator
+			    + "logs" + File.separator + "batch-jobs" + File.separator;
 		    checkCreateLogDirectory( logDir );
 		    fh = new FileHandler(logDir + "job-" + jobId + ".log");
 		    logger.log(Level.INFO, "JOB LOG: " + logDir + "job-" + jobId + ".log");

From 2af2d7c6106d890cb7d01872ed66b99143929385 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Mon, 19 Sep 2022 14:52:28 +0200
Subject: [PATCH 024/173] fix(test): make UrlTokenUtilTest not assume site url

For unknown reasons, the test assumed the site url / fqdn
to be "https://librascholar.org", which might be coming
from some test order side effect.

Now the test sets the site URL setting to have control over
the generated data.

On a related note, this meant to upgrade the test from JUnit4
to JUnit5 plus some minor code cleanups.
---
 .../iq/dataverse/util/UrlTokenUtilTest.java   | 35 +++++++++++--------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/util/UrlTokenUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/UrlTokenUtilTest.java
index ffc6b813045..782890627e1 100644
--- a/src/test/java/edu/harvard/iq/dataverse/util/UrlTokenUtilTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/util/UrlTokenUtilTest.java
@@ -6,24 +6,25 @@
 import edu.harvard.iq.dataverse.FileMetadata;
 import edu.harvard.iq.dataverse.GlobalId;
 import edu.harvard.iq.dataverse.authorization.users.ApiToken;
-import static org.junit.Assert.assertEquals;
-import static org.mockito.Mockito.when;
+import edu.harvard.iq.dataverse.settings.JvmSettings;
+import edu.harvard.iq.dataverse.util.testing.JvmSetting;
+import org.junit.jupiter.api.Test;
 
 import java.util.ArrayList;
 import java.util.List;
 
-import org.junit.Test;
-import org.junit.jupiter.api.extension.ExtendWith;
-import org.mockito.Mock;
-import org.mockito.junit.jupiter.MockitoExtension;
+import static org.junit.jupiter.api.Assertions.assertEquals;
 
 public class UrlTokenUtilTest {
 
     @Test
+    @JvmSetting(key = JvmSettings.SITE_URL, value = "https://foobar")
     public void testGetToolUrlWithOptionalQueryParameters() {
-
+        // given
+        String siteUrl = "https://foobar";
+        
         DataFile dataFile = new DataFile();
-        dataFile.setId(42l);
+        dataFile.setId(42L);
         FileMetadata fmd = new FileMetadata();
         DatasetVersion dv = new DatasetVersion();
         Dataset ds = new Dataset();
@@ -31,20 +32,26 @@ public void testGetToolUrlWithOptionalQueryParameters() {
         ds.setGlobalId(new GlobalId("doi:10.5072/FK2ABCDEF"));
         dv.setDataset(ds);
         fmd.setDatasetVersion(dv);
-        List<FileMetadata> fmdl = new ArrayList<FileMetadata>();
+        List<FileMetadata> fmdl = new ArrayList<>();
         fmdl.add(fmd);
         dataFile.setFileMetadatas(fmdl);
+        
         ApiToken apiToken = new ApiToken();
         apiToken.setTokenString("7196b5ce-f200-4286-8809-03ffdbc255d7");
+    
+        // when & then 1/2
         URLTokenUtil urlTokenUtil = new URLTokenUtil(dataFile, apiToken, fmd, "en");
         assertEquals("en", urlTokenUtil.replaceTokensWithValues("{localeCode}"));
         assertEquals("42 test en", urlTokenUtil.replaceTokensWithValues("{fileId} test {localeCode}"));
         assertEquals("42 test en", urlTokenUtil.replaceTokensWithValues("{fileId} test {localeCode}"));
-        
-        assertEquals("https://librascholar.org/api/files/42/metadata?key=" + apiToken.getTokenString(), urlTokenUtil.replaceTokensWithValues("{siteUrl}/api/files/{fileId}/metadata?key={apiToken}"));
-        
+        assertEquals( siteUrl + "/api/files/42/metadata?key=" + apiToken.getTokenString(),
+            urlTokenUtil.replaceTokensWithValues("{siteUrl}/api/files/{fileId}/metadata?key={apiToken}"));
+    
+        // when & then 2/2
         URLTokenUtil urlTokenUtil2 = new URLTokenUtil(ds, apiToken, "en");
-        assertEquals("https://librascholar.org/api/datasets/50?key=" + apiToken.getTokenString(), urlTokenUtil2.replaceTokensWithValues("{siteUrl}/api/datasets/{datasetId}?key={apiToken}"));
-        assertEquals("https://librascholar.org/api/datasets/:persistentId/?persistentId=doi:10.5072/FK2ABCDEF&key=" + apiToken.getTokenString(), urlTokenUtil2.replaceTokensWithValues("{siteUrl}/api/datasets/:persistentId/?persistentId={datasetPid}&key={apiToken}"));
+        assertEquals(siteUrl + "/api/datasets/50?key=" + apiToken.getTokenString(),
+            urlTokenUtil2.replaceTokensWithValues("{siteUrl}/api/datasets/{datasetId}?key={apiToken}"));
+        assertEquals(siteUrl + "/api/datasets/:persistentId/?persistentId=doi:10.5072/FK2ABCDEF&key=" + apiToken.getTokenString(),
+            urlTokenUtil2.replaceTokensWithValues("{siteUrl}/api/datasets/:persistentId/?persistentId={datasetPid}&key={apiToken}"));
     }
 }

From cbc7f8af47e2dccfaa6d55e9f78c07166c2d3b5e Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Mon, 4 Jul 2022 18:54:46 +0200
Subject: [PATCH 025/173] feat(settings): add rserve properties via MPCONFIG
 #7000

---
 .../edu/harvard/iq/dataverse/settings/JvmSettings.java    | 8 ++++++++
 .../resources/META-INF/microprofile-config.properties     | 7 +++++++
 2 files changed, 15 insertions(+)

diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
index 223e4b86da9..6c5131219ff 100644
--- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
+++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
@@ -42,6 +42,14 @@ public enum JvmSettings {
     VERSION(PREFIX, "version"),
     BUILD(PREFIX, "build"),
     
+    // RSERVE CONNECTION
+    SCOPE_RSERVE(PREFIX, "rserve"),
+    RSERVE_HOST(SCOPE_RSERVE, "host"),
+    RSERVE_PORT(SCOPE_RSERVE, "port", "dataverse.ingest.rserve.port"),
+    RSERVE_USER(SCOPE_RSERVE, "user"),
+    RSERVE_PASSWORD(SCOPE_RSERVE, "password"),
+    RSERVE_TEMPDIR(SCOPE_RSERVE, "tempdir"),
+    
     ;
     
     private static final String SCOPE_SEPARATOR = ".";
diff --git a/src/main/resources/META-INF/microprofile-config.properties b/src/main/resources/META-INF/microprofile-config.properties
index 16298d83118..c7b907edb6c 100644
--- a/src/main/resources/META-INF/microprofile-config.properties
+++ b/src/main/resources/META-INF/microprofile-config.properties
@@ -8,3 +8,10 @@ dataverse.db.host=localhost
 dataverse.db.port=5432
 dataverse.db.user=dataverse
 dataverse.db.name=dataverse
+
+# RSERVE
+dataverse.rserve.host=localhost
+dataverse.rserve.port=6311
+dataverse.rserve.username=rserve
+dataverse.rserve.password=rserve
+dataverse.rserve.tempdir=/tmp

From 6732b4bc578ad0b2f410dbed6d482e377c86fde9 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Mon, 4 Jul 2022 18:54:57 +0200
Subject: [PATCH 026/173] refactor(rserve): introduce MPCONFIG settings
 retrieval #7000

1. Instead of reading the configuration from system properties only,
   switch to using MPCONFIG and JvmSettings fluent API.
2. Instead of saving the configuration in a static variable, retrieve
   the config from the constructor.
   This has 2 advantages: 1) no worries about execution order and
   MPCONFIG not yet ready, 2) update the readers with new config
   settings when changed (no need to restart).
---
 .../impl/plugins/rdata/RDATAFileReader.java   | 35 ++++------
 .../rserve/RemoteDataFrameService.java        | 68 ++++++-------------
 2 files changed, 33 insertions(+), 70 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java
index c2899b29d1f..1ec0c389049 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java
@@ -31,6 +31,7 @@
 import javax.inject.Inject;
 
 // Rosuda Wrappers and Methods for R-calls to Rserve
+import edu.harvard.iq.dataverse.settings.JvmSettings;
 import org.rosuda.REngine.REXP;
 import org.rosuda.REngine.REXPMismatchException;
 import org.rosuda.REngine.RList;
@@ -88,10 +89,10 @@ public class RDATAFileReader extends TabularDataFileReader {
   static private String RSCRIPT_WRITE_DVN_TABLE = "";
   
   // RServe static variables
-  private static String RSERVE_HOST = System.getProperty("dataverse.rserve.host");
-  private static String RSERVE_USER = System.getProperty("dataverse.rserve.user");
-  private static String RSERVE_PASSWORD = System.getProperty("dataverse.rserve.password");
-  private static int RSERVE_PORT;
+  private final String RSERVE_HOST;
+  private final int RSERVE_PORT;
+  private final String RSERVE_USER;
+  private final String RSERVE_PASSWORD;
   
   // TODO: 
   // we're not using these time/data formats for anything, are we?
@@ -138,24 +139,6 @@ public class RDATAFileReader extends TabularDataFileReader {
    * This is primarily to construct the R-Script
    */
   static {
-    /*
-     * Set defaults fallbacks for class properties
-     */
-    if (RSERVE_HOST == null)
-      RSERVE_HOST = "localhost";
-
-    if (RSERVE_USER == null)
-      RSERVE_USER = "rserve";
-
-    if (RSERVE_PASSWORD == null)
-      RSERVE_PASSWORD = "rserve";
-
-    if (System.getProperty("dataverse.ingest.rserve.port") == null)
-      RSERVE_PORT = 6311;
-    else
-      RSERVE_PORT = Integer.parseInt(System.getProperty("dataverse.rserve.port"));
-
-    
     // Load R Scripts into memory, so that we can run them via R-serve
     RSCRIPT_WRITE_DVN_TABLE = readLocalResource("scripts/write.table.R");
     RSCRIPT_GET_DATASET = readLocalResource("scripts/get.dataset.R");
@@ -451,7 +434,13 @@ public RDATAFileReader(TabularDataFileReaderSpi originator) {
 
     super(originator);
     
-    
+    // These settings have sane defaults in resources/META-INF/microprofile-config.properties,
+    // ready to be overridden by a sysadmin. Every time a file would be read with this file reader,
+    // a new reader will be created, reading from the cached config source settings with minimal overhead.
+    this.RSERVE_HOST = JvmSettings.RSERVE_HOST.lookup();
+    this.RSERVE_PORT = JvmSettings.RSERVE_PORT.lookup(Integer.class);
+    this.RSERVE_USER = JvmSettings.RSERVE_USER.lookup();
+    this.RSERVE_PASSWORD = JvmSettings.RSERVE_PASSWORD.lookup();
 
     LOG.fine("RDATAFileReader: INSIDE RDATAFileReader");
 
diff --git a/src/main/java/edu/harvard/iq/dataverse/rserve/RemoteDataFrameService.java b/src/main/java/edu/harvard/iq/dataverse/rserve/RemoteDataFrameService.java
index f13b6f11434..df2e44ecb27 100644
--- a/src/main/java/edu/harvard/iq/dataverse/rserve/RemoteDataFrameService.java
+++ b/src/main/java/edu/harvard/iq/dataverse/rserve/RemoteDataFrameService.java
@@ -41,6 +41,7 @@
 import java.util.Set;
 import java.util.logging.Logger;
 
+import edu.harvard.iq.dataverse.settings.JvmSettings;
 import org.apache.commons.io.IOUtils;
 
 import org.apache.commons.lang3.RandomStringUtils;
@@ -72,57 +73,33 @@ public class RemoteDataFrameService {
 
     private static String TMP_TABDATA_FILE_EXT = ".tab";
     private static String TMP_RDATA_FILE_EXT = ".RData";
-
-    private static String RSERVE_HOST = null;
-    private static String RSERVE_USER = null;
-    private static String RSERVE_PWD = null;    
-    private static int    RSERVE_PORT = -1;
+    
+    // These settings have sane defaults in resources/META-INF/microprofile-config.properties,
+    // ready to be overridden by a sysadmin
+    private final String RSERVE_HOST;
+    private final String RSERVE_USER;
+    private final String RSERVE_PWD;
+    private final int    RSERVE_PORT;
+    private final String RSERVE_TMP_DIR;
         
     private static String DATAVERSE_R_FUNCTIONS = "scripts/dataverse_r_functions.R";
     private static String DATAVERSE_R_PREPROCESSING = "scripts/preprocess.R";
-                    
-    public static String LOCAL_TEMP_DIR = System.getProperty("java.io.tmpdir");
-    public static String RSERVE_TMP_DIR=null;
     
     public String PID = null;
     public String tempFileNameIn = null;
     public String tempFileNameOut = null;
- 
-    static {
-    
-        RSERVE_TMP_DIR = System.getProperty("dataverse.rserve.tempdir");
-        
-        if (RSERVE_TMP_DIR == null){
-            RSERVE_TMP_DIR = "/tmp/";            
-        }
-        
-        RSERVE_HOST = System.getProperty("dataverse.rserve.host");
-        if (RSERVE_HOST == null){
-            RSERVE_HOST= "localhost";
-        }
-        
-        RSERVE_USER = System.getProperty("dataverse.rserve.user");
-        if (RSERVE_USER == null){
-            RSERVE_USER= "rserve";
-        }
-        
-        RSERVE_PWD = System.getProperty("dataverse.rserve.password");
-        if (RSERVE_PWD == null){
-            RSERVE_PWD= "rserve";
-        }
-        
-
-        if (System.getProperty("dataverse.rserve.port") == null ){
-            RSERVE_PORT= 6311;
-        } else {
-            RSERVE_PORT = Integer.parseInt(System.getProperty("dataverse.rserve.port"));
-        }
-
-    }
-
-   
 
     public RemoteDataFrameService() {
+        // These settings have sane defaults in resources/META-INF/microprofile-config.properties,
+        // ready to be overridden by a sysadmin. Config sources have their own caches, so adding
+        // these here means the setting can be changed dynamically without too much overhead.
+        this.RSERVE_HOST = JvmSettings.RSERVE_HOST.lookup();
+        this.RSERVE_USER = JvmSettings.RSERVE_USER.lookup();
+        this.RSERVE_PWD = JvmSettings.RSERVE_PASSWORD.lookup();
+        this.RSERVE_PORT = JvmSettings.RSERVE_PORT.lookup(Integer.class);
+        this.RSERVE_TMP_DIR = JvmSettings.RSERVE_TEMPDIR.lookup();
+        
+        
         // initialization
         PID = RandomStringUtils.randomNumeric(6);
 
@@ -703,15 +680,12 @@ public Map<String, String> runDataFrameRequest(RJobRequest jobRequest, RConnecti
     public File transferRemoteFile(RConnection connection, String targetFilename,
             String tmpFilePrefix, String tmpFileExt, int fileSize) {
 
-        // set up a local temp file: 
-        
+        // set up a local temp file:
         File tmpResultFile = null;
-        String resultFile = tmpFilePrefix + PID + "." + tmpFileExt;
-
         RFileInputStream rInStream = null;
         OutputStream outbr = null;
         try {
-            tmpResultFile = new File(LOCAL_TEMP_DIR, resultFile);
+            tmpResultFile = File.createTempFile(tmpFilePrefix + PID, "."+tmpFileExt);
             outbr = new BufferedOutputStream(new FileOutputStream(tmpResultFile));
             // open the input stream
             rInStream = connection.openFile(targetFilename);

From d951f99bfc12440766add7f13cc1afb84f557448 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Mon, 4 Jul 2022 19:05:21 +0200
Subject: [PATCH 027/173] fix(settings): align Rserve tempdir default to docs
 #7000

The docs said the default is "/tmp/Rserve", while the code had
"/tmp". Changing the code default to the documented one.
---
 src/main/resources/META-INF/microprofile-config.properties | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/resources/META-INF/microprofile-config.properties b/src/main/resources/META-INF/microprofile-config.properties
index c7b907edb6c..8d2793eadbf 100644
--- a/src/main/resources/META-INF/microprofile-config.properties
+++ b/src/main/resources/META-INF/microprofile-config.properties
@@ -14,4 +14,4 @@ dataverse.rserve.host=localhost
 dataverse.rserve.port=6311
 dataverse.rserve.username=rserve
 dataverse.rserve.password=rserve
-dataverse.rserve.tempdir=/tmp
+dataverse.rserve.tempdir=/tmp/Rserve

From 507ae82a0b0674cce8d23f77a196894194396ea9 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Mon, 4 Jul 2022 19:06:53 +0200
Subject: [PATCH 028/173] docs(settings): add Rserve MPCONFIG to guide #7000

---
 .../source/installation/config.rst            | 43 ++++++++++++++++---
 1 file changed, 38 insertions(+), 5 deletions(-)

diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst
index ab0bad70206..ae0d2cb0b26 100644
--- a/doc/sphinx-guides/source/installation/config.rst
+++ b/doc/sphinx-guides/source/installation/config.rst
@@ -1465,27 +1465,60 @@ Can also be set via *MicroProfile Config API* sources, e.g. the environment vari
 dataverse.rserve.host
 +++++++++++++++++++++
 
-Host name for Rserve, used for tasks that require use of R (to ingest RData files and to save tabular data as RData frames).
+Host name for Rserve, used for tasks that require use of R (to ingest RData
+files and to save tabular data as RData frames).
+
+Defaults to ``localhost``.
+
+Can also be set via *MicroProfile Config API* sources, e.g. the environment
+variable ``DATAVERSE_RSERVE_HOST``.
 
 dataverse.rserve.port
 +++++++++++++++++++++
 
-Port number for Rserve, used for tasks that require use of R (to ingest RData files and to save tabular data as RData frames).
+Port number for Rserve, used for tasks that require use of R (to ingest RData
+files and to save tabular data as RData frames).
+
+Defaults to ``6311``.
+
+Can also be set via *MicroProfile Config API* sources, e.g. the environment
+variable ``DATAVERSE_RSERVE_PORT``.
 
 dataverse.rserve.user
 +++++++++++++++++++++
 
-Username for Rserve, used for tasks that require use of R (to ingest RData files and to save tabular data as RData frames).
+Username for Rserve, used for tasks that require use of R (to ingest RData
+files and to save tabular data as RData frames).
+
+Defaults to ``rserve``.
+
+Can also be set via *MicroProfile Config API* sources, e.g. the environment
+variable ``DATAVERSE_RSERVE_USER``.
 
 dataverse.rserve.password
 +++++++++++++++++++++++++
 
-Password for Rserve, used for tasks that require use of R (to ingest RData files and to save tabular data as RData frames).
+Password for Rserve, used for tasks that require use of R (to ingest RData
+files and to save tabular data as RData frames).
+
+Defaults to ``rserve``.
+
+Can also be set via *MicroProfile Config API* sources, e.g. the environment
+variable ``DATAVERSE_RSERVE_PASSWORD``.
 
 dataverse.rserve.tempdir
 ++++++++++++++++++++++++
 
-Temporary directory used by Rserve (defaults to /tmp/Rserv). Note that this location is local to the host on which Rserv is running (specified in ``dataverse.rserve.host`` above). When talking to Rserve, Dataverse needs to know this location in order to generate absolute path names of the files on the other end. 
+Temporary directory used by Rserve (defaults to /tmp/Rserv). Note that this
+location is local to the host on which Rserv is running (specified in
+``dataverse.rserve.host`` above). When talking to Rserve, Dataverse needs to
+know this location in order to generate absolute path names of the files on the
+other end.
+
+Defaults to ``/tmp``.
+
+Can also be set via *MicroProfile Config API* sources, e.g. the environment
+variable ``DATAVERSE_RSERVE_TEMPDIR``.
 
 .. _dataverse.dropbox.key:
 

From 139f0f7d5c762533e4fdcec5dc65e487170e570f Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Tue, 8 Mar 2022 17:27:51 +0100
Subject: [PATCH 029/173] feat(ct-base): add new base container image in
 submodule

---
 modules/container-base/pom.xml                |  90 ++++++++
 .../container-base/src/main/docker/Dockerfile | 204 ++++++++++++++++++
 .../src/main/docker/assembly.xml              |  17 ++
 .../src/main/docker/scripts/entrypoint.sh     |  17 ++
 .../init_1_generate_deploy_commands.sh        |  65 ++++++
 .../main/docker/scripts/startInForeground.sh  |  89 ++++++++
 modules/dataverse-parent/pom.xml              |  51 +++++
 7 files changed, 533 insertions(+)
 create mode 100644 modules/container-base/pom.xml
 create mode 100644 modules/container-base/src/main/docker/Dockerfile
 create mode 100644 modules/container-base/src/main/docker/assembly.xml
 create mode 100644 modules/container-base/src/main/docker/scripts/entrypoint.sh
 create mode 100644 modules/container-base/src/main/docker/scripts/init_1_generate_deploy_commands.sh
 create mode 100644 modules/container-base/src/main/docker/scripts/startInForeground.sh

diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml
new file mode 100644
index 00000000000..8cb7e1ac795
--- /dev/null
+++ b/modules/container-base/pom.xml
@@ -0,0 +1,90 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd;">
+    <modelVersion>4.0.0</modelVersion>
+    
+    <parent>
+        <groupId>edu.harvard.iq</groupId>
+        <artifactId>dataverse-parent</artifactId>
+        <version>${revision}</version>
+        <relativePath>../dataverse-parent</relativePath>
+    </parent>
+    
+    <groupId>io.gdcc</groupId>
+    <artifactId>container-base</artifactId>
+    <packaging>${packaging.type}</packaging>
+    <name>Container Base Image</name>
+    <description>This module provides an application server base image to be decorated with the Dataverse app.</description>
+    
+    <properties>
+        <!-- By default, this module will produce a POM only. -->
+        <!-- This will be switched within the container profile! -->
+        <packaging.type>pom</packaging.type>
+    </properties>
+    
+    <profiles>
+        <profile>
+            <id>ct</id>
+            <properties>
+                <packaging.type>docker-build</packaging.type>
+            </properties>
+            
+            <build>
+                <plugins>
+                    <!-- Retrieve Payara Server as a dependency and cache it via Maven -->
+                    <plugin>
+                        <groupId>org.apache.maven.plugins</groupId>
+                        <artifactId>maven-dependency-plugin</artifactId>
+                        <executions>
+                            <execution>
+                                <id>unpack</id>
+                                <phase>initialize</phase>
+                                <goals>
+                                    <goal>unpack</goal>
+                                </goals>
+                                <configuration>
+                                    <artifactItems>
+                                        <artifactItem>
+                                            <groupId>fish.payara.distributions</groupId>
+                                            <artifactId>payara</artifactId>
+                                            <version>${payara.version}</version>
+                                            <type>zip</type>
+                                            <overWrite>false</overWrite>
+                                            <outputDirectory>${project.build.directory}</outputDirectory>
+                                        </artifactItem>
+                                    </artifactItems>
+                                </configuration>
+                            </execution>
+                        </executions>
+                    </plugin>
+        
+                    <!-- Build image via Docker Maven Plugin -->
+                    <plugin>
+                        <groupId>io.fabric8</groupId>
+                        <artifactId>docker-maven-plugin</artifactId>
+                        <extensions>true</extensions>
+                        <configuration>
+                            <images>
+                                <image>
+                                    <alias>base</alias>
+                                    <name>%g/base:jdk${target.java.version}</name>
+                                    <registry>${ct.registry}</registry>
+                                    <build>
+                                        <dockerFile>Dockerfile</dockerFile>
+                                        <args>
+                                            <BASE_IMAGE>openjdk:${target.java.version}-jre</BASE_IMAGE>
+                                        </args>
+                                        <filter>@</filter>
+                                        <assembly>
+                                            <descriptor>assembly.xml</descriptor>
+                                        </assembly>
+                                    </build>
+                                </image>
+                            </images>
+                        </configuration>
+                    </plugin>
+                </plugins>
+            </build>
+        </profile>
+    </profiles>
+</project>
\ No newline at end of file
diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile
new file mode 100644
index 00000000000..635fbd89142
--- /dev/null
+++ b/modules/container-base/src/main/docker/Dockerfile
@@ -0,0 +1,204 @@
+# Copyright 2019 Forschungszentrum Jülich GmbH
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+################################################################################################################
+#
+# THIS FILE IS TO BE USED WITH MAVEN DOCKER BUILD:
+# mvn -Pct clean package docker:build
+#
+################################################################################################################
+#
+# Some commands used are inspired by https://github.com/payara/Payara/tree/master/appserver/extras/docker-images.
+# Most parts origin from older versions of https://github.com/gdcc/dataverse-kubernetes.
+#
+# We are not using upstream Payara images because:
+#  - Using same base image as Solr (https://hub.docker.com/_/solr) is reducing pulls
+#  - Their image is less optimised for production usage by design choices
+#
+
+# Make the Java base image and version configurable (useful for trying newer Java versions and flavors)
+ARG BASE_IMAGE="openjdk:11-jre"
+FROM $BASE_IMAGE
+
+# Default payara ports to expose
+# 4848: admin console
+# 9009: debug port (JPDA)
+# 8080: http
+# 8181: https
+EXPOSE 4848 9009 8080 8181
+
+ENV HOME_DIR="/opt/payara"
+ENV PAYARA_DIR="${HOME_DIR}/appserver" \
+    SCRIPT_DIR="${HOME_DIR}/scripts" \
+    CONFIG_DIR="${HOME_DIR}/config" \
+    DEPLOY_DIR="${HOME_DIR}/deployments" \
+    DOCROOT_DIR="/docroot" \
+    SECRETS_DIR="/secrets" \
+    DUMPS_DIR="/dumps" \
+    PASSWORD_FILE="${HOME_DIR}/passwordFile" \
+    ADMIN_USER="admin" \
+    ADMIN_PASSWORD="admin" \
+    DOMAIN_NAME="domain1" \
+    PAYARA_ARGS=""
+ENV PATH="${PATH}:${PAYARA_DIR}/bin" \
+    DOMAIN_DIR="${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}" \
+    DEPLOY_PROPS="" \
+    PREBOOT_COMMANDS="${CONFIG_DIR}/pre-boot-commands.asadmin" \
+    POSTBOOT_COMMANDS="${CONFIG_DIR}/post-boot-commands.asadmin" \
+    JVM_ARGS="" \
+    MEM_MAX_RAM_PERCENTAGE="70.0" \
+    MEM_XSS="512k" \
+    # Source: https://github.com/fabric8io-images/run-java-sh/blob/master/TUNING.md#recommandations
+    MEM_MIN_HEAP_FREE_RATIO="20" \
+    MEM_MAX_HEAP_FREE_RATIO="40" \
+    MEM_MAX_GC_PAUSE_MILLIS="500" \
+    MEM_METASPACE_SIZE="256m" \
+    MEM_MAX_METASPACE_SIZE="2g" \
+    # Make heap dumps on OOM appear in DUMPS_DIR
+    ENABLE_DUMPS=0 \
+    JVM_DUMPS_ARG="-XX:+HeapDumpOnOutOfMemoryError"
+
+ARG ESH_VERSION=0.3.1
+ARG ESH_CHECKSUM="1e0bd783f930cba13d6708b11c1ac844bbb1eddd02ac1666fc10d47eb9517bd7"
+ARG JATTACH_VERSION="v2.0"
+ARG JATTACH_CHECKSUM="989dc53279c7fb3ec399dbff1692647439286e5a4339c2849fd4323e998af7f8"
+ARG PKGS="jq imagemagick curl unzip wget acl dirmngr gpg lsof procps netcat tini"
+ARG ASADMIN="${PAYARA_DIR}/bin/asadmin --user=${ADMIN_USER} --passwordfile=${PASSWORD_FILE}"
+
+### PART 1: SYSTEM ###
+USER root
+WORKDIR /
+SHELL ["/bin/bash", "-euo", "pipefail", "-c"]
+RUN true && \
+    # Create pathes
+    mkdir -p "${HOME_DIR}" "${PAYARA_DIR}" "${DEPLOY_DIR}" "${CONFIG_DIR}" "${SCRIPT_DIR}" && \
+    mkdir -p "${DOCROOT_DIR}" "${SECRETS_DIR}" "${DUMPS_DIR}" && \
+    # Create user
+    addgroup --gid 1000 payara && \
+    adduser --system --uid 1000 --no-create-home --shell /bin/bash --home "${HOME_DIR}" --gecos "" --ingroup payara payara && \
+    echo payara:payara | chpasswd && \
+    # Set permissions
+    chown -R payara: "${HOME_DIR}" && \
+    chown -R payara: "${DOCROOT_DIR}" "${SECRETS_DIR}" "${DUMPS_DIR}"
+
+# Installing the packages in an extra container layer for better caching
+RUN true && \
+    # Install packages
+    apt-get update -q && \
+    apt-get install -qqy --no-install-recommends ${PKGS} && \
+    # Download & check esh template script
+    curl -sSfL -o /usr/bin/esh "https://raw.githubusercontent.com/jirutka/esh/v${ESH_VERSION}/esh" && \
+    echo "${ESH_CHECKSUM} /usr/bin/esh" | sha256sum -c - && \
+    chmod +x /usr/bin/esh && \
+    # Install jattach
+    curl -sSfL -o /usr/bin/jattach "https://github.com/apangin/jattach/releases/download/${JATTACH_VERSION}/jattach" && \
+    echo "${JATTACH_CHECKSUM} /usr/bin/jattach" | sha256sum -c - && \
+    chmod +x /usr/bin/jattach && \
+    # Cleanup
+    rm -rf "/var/lib/apt/lists/*"
+
+### PART 2: PAYARA ###
+# After setting up system, now configure Payara
+USER payara
+WORKDIR ${HOME_DIR}
+
+# Copy Payara from build context (cached by Maven)
+COPY --chown=payara:payara maven/appserver ${PAYARA_DIR}/
+
+# Copy the system (appserver level) scripts like entrypoint, etc
+COPY --chown=payara:payara maven/scripts ${SCRIPT_DIR}/
+
+# Configure the domain to be container and production ready
+RUN true && \
+    # Set admin password
+    echo "AS_ADMIN_PASSWORD=" > /tmp/password-change-file.txt && \
+    echo "AS_ADMIN_NEWPASSWORD=${ADMIN_PASSWORD}" >> /tmp/password-change-file.txt && \
+    echo "AS_ADMIN_PASSWORD=${ADMIN_PASSWORD}" >> ${PASSWORD_FILE}  && \
+    asadmin --user=${ADMIN_USER} --passwordfile=/tmp/password-change-file.txt change-admin-password --domain_name=${DOMAIN_NAME} && \
+    # Start domain for configuration
+    ${ASADMIN} start-domain ${DOMAIN_NAME} && \
+    # Allow access to admin with password only
+    ${ASADMIN} enable-secure-admin && \
+    ### CONTAINER USAGE ENABLEMENT
+    # List & delete memory settings from domain
+    for MEMORY_JVM_OPTION in $(${ASADMIN} list-jvm-options | grep "Xm[sx]\|Xss\|NewRatio"); \
+       do \
+         ${ASADMIN} delete-jvm-options $(echo $MEMORY_JVM_OPTION | sed -e 's/:/\\:/g'); \
+       done && \
+    # Tweak memory settings for containers
+    ${ASADMIN} create-jvm-options "-XX\:+UseContainerSupport" && \
+    ${ASADMIN} create-jvm-options "-XX\:MaxRAMPercentage=\${ENV=MEM_MAX_RAM_PERCENTAGE}" && \
+    ${ASADMIN} create-jvm-options "-Xss\${ENV=MEM_XSS}" && \
+    ${ASADMIN} create-jvm-options "-XX\:MinHeapFreeRatio=\${ENV=MEM_MIN_HEAP_FREE_RATIO}" && \
+    ${ASADMIN} create-jvm-options "-XX\:MaxHeapFreeRatio=\${ENV=MEM_MAX_HEAP_FREE_RATIO}" && \
+    ${ASADMIN} create-jvm-options "-XX\:HeapDumpPath=\${ENV=DUMPS_DIR}" && \
+    # Set logging to console only for containers
+    ${ASADMIN} set-log-attributes com.sun.enterprise.server.logging.GFFileHandler.logtoFile=false && \
+    ### PRODUCTION READINESS
+    ${ASADMIN} create-jvm-options '-XX\:+UseG1GC' && \
+    ${ASADMIN} create-jvm-options '-XX\:+UseStringDeduplication' && \
+    ${ASADMIN} create-jvm-options '-XX\:MaxGCPauseMillis=${ENV=MEM_MAX_GC_PAUSE_MILLIS}' && \
+    ${ASADMIN} create-jvm-options '-XX\:MetaspaceSize=${ENV=MEM_METASPACE_SIZE}' && \
+    ${ASADMIN} create-jvm-options '-XX\:MaxMetaspaceSize=${ENV=MEM_MAX_METASPACE_SIZE}' && \
+    ${ASADMIN} create-jvm-options '-XX\:+IgnoreUnrecognizedVMOptions' && \
+    # Enlarge thread pools
+    ${ASADMIN} set server-config.thread-pools.thread-pool.http-thread-pool.max-thread-pool-size="50" && \
+    ${ASADMIN} set server-config.thread-pools.thread-pool.http-thread-pool.max-queue-size="" && \
+    ${ASADMIN} set default-config.thread-pools.thread-pool.thread-pool-1.max-thread-pool-size="250" && \
+    # Enable file caching
+    ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-1.http.file-cache.enabled="true" && \
+    ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-2.http.file-cache.enabled="true" && \
+    ${ASADMIN} set default-config.network-config.protocols.protocol.http-listener-1.http.file-cache.enabled="true" && \
+    ${ASADMIN} set default-config.network-config.protocols.protocol.http-listener-2.http.file-cache.enabled="true" && \
+    # Enlarge EJB pools (cannot do this for server-config as set does not create new entries)
+    ${ASADMIN} set default-config.ejb-container.max-pool-size="128" && \
+    # Misc settings
+    ${ASADMIN} create-system-properties fish.payara.classloading.delegate="false" && \
+    ${ASADMIN} create-system-properties jersey.config.client.readTimeout="300000" && \
+    ${ASADMIN} create-system-properties jersey.config.client.connectTimeout="300000" && \
+    ### DATAVERSE APPLICATION SPECIFICS
+    # Configure the MicroProfile directory config source to point to /secrets
+    ${ASADMIN} set-config-dir --directory="${SECRETS_DIR}" && \
+    # Make request timeouts configurable via MPCONFIG (default to 900 secs = 15 min)
+    ${ASADMIN} set 'server-config.network-config.protocols.protocol.http-listener-1.http.request-timeout-seconds=${MPCONFIG=dataverse.http.timeout:900}' && \
+    # TODO: what of the below 3 items can be deleted for container usage?
+    ${ASADMIN} create-network-listener --protocol=http-listener-1 --listenerport=8009 --jkenabled=true jk-connector && \
+    ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-1.http.comet-support-enabled=true && \
+    ${ASADMIN} create-system-properties javax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl && \
+    # Always disable phoning home...
+    ${ASADMIN} disable-phone-home && \
+    ### CLEANUP
+    # Stop domain
+    ${ASADMIN} stop-domain "${DOMAIN_NAME}" && \
+    # Delete generated files
+    rm -rf \
+        "/tmp/password-change-file.txt" \
+        "${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/osgi-cache" \
+        "${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/logs"
+
+# Make docroot of Payara reside in higher level directory for easier targeting
+# Due to gdcc/dataverse-kubernetes#177: create the generated pathes so they are
+# writeable by us. TBR with gdcc/dataverse-kubernetes#178.
+RUN rm -rf "${DOMAIN_DIR}"/docroot && \
+    ln -s "${DOCROOT_DIR}" "${DOMAIN_DIR}"/docroot && \
+    mkdir -p "${DOMAIN_DIR}"/generated/jsp/dataverse
+
+# Set the entrypoint to tini (as a process supervisor)
+ENTRYPOINT ["/usr/bin/tini", "--"]
+# JSON syntax should be used, but bypassed shell. Thus re-add expansion via shell exec.
+CMD ["sh", "-c", "${SCRIPT_DIR}/entrypoint.sh"]
+
+LABEL org.opencontainers.image.created="@git.build.time@" \
+      org.opencontainers.image.authors="Research Data Management at FZJ <forschungsdaten@fz-juelich.de>" \
+      org.opencontainers.image.url="https://k8s-docs.gdcc.io" \
+      org.opencontainers.image.documentation="https://k8s-docs.gdcc.io" \
+      org.opencontainers.image.source="https://github.com/gdcc/dataverse/tree/develop%2Bct/modules/container-base" \
+      org.opencontainers.image.version="@project.version@" \
+      org.opencontainers.image.revision="@git.commit.id.abbrev@" \
+      org.opencontainers.image.vendor="Global Dataverse Community Consortium" \
+      org.opencontainers.image.licenses="Apache-2.0" \
+      org.opencontainers.image.title="dataverse-k8s :: Dataverse containerized" \
+      org.opencontainers.image.description="This container image provides an application server tuned for Dataverse software"
diff --git a/modules/container-base/src/main/docker/assembly.xml b/modules/container-base/src/main/docker/assembly.xml
new file mode 100644
index 00000000000..afd5530fa60
--- /dev/null
+++ b/modules/container-base/src/main/docker/assembly.xml
@@ -0,0 +1,17 @@
+<assembly xmlns="http://maven.apache.org/ASSEMBLY/2.1.0"
+          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+          xsi:schemaLocation="http://maven.apache.org/ASSEMBLY/2.1.0 http://maven.apache.org/xsd/assembly-2.1.0.xsd">
+    <fileSets>
+        <!-- Get the application server (unpacked by dependency plugin) -->
+        <fileSet>
+            <directory>${project.basedir}/target/payara5</directory>
+            <outputDirectory>appserver</outputDirectory>
+        </fileSet>
+        <!-- Init scripts -->
+        <fileSet>
+            <directory>${project.basedir}/src/main/docker/scripts</directory>
+            <outputDirectory>scripts</outputDirectory>
+            <fileMode>0755</fileMode>
+        </fileSet>
+    </fileSets>
+</assembly>
\ No newline at end of file
diff --git a/modules/container-base/src/main/docker/scripts/entrypoint.sh b/modules/container-base/src/main/docker/scripts/entrypoint.sh
new file mode 100644
index 00000000000..6f71dfe013c
--- /dev/null
+++ b/modules/container-base/src/main/docker/scripts/entrypoint.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+##########################################################################################################
+#
+#  This script is a fork of https://github.com/payara/Payara/blob/master/appserver/extras/docker-images/
+#  server-full/src/main/docker/bin/entrypoint.sh and licensed under CDDL 1.1 by the Payara Foundation.
+#
+##########################################################################################################
+
+for f in "${SCRIPT_DIR}"/init_* "${SCRIPT_DIR}"/init.d/*; do
+      case "$f" in
+        *.sh)  echo "[Entrypoint] running $f"; . "$f" ;;
+        *)     echo "[Entrypoint] ignoring $f" ;;
+      esac
+      echo
+done
+
+exec "${SCRIPT_DIR}"/startInForeground.sh "${PAYARA_ARGS}"
diff --git a/modules/container-base/src/main/docker/scripts/init_1_generate_deploy_commands.sh b/modules/container-base/src/main/docker/scripts/init_1_generate_deploy_commands.sh
new file mode 100644
index 00000000000..e2d717af666
--- /dev/null
+++ b/modules/container-base/src/main/docker/scripts/init_1_generate_deploy_commands.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+##########################################################################################################
+#
+# A script to append deploy commands to the post boot command file at
+# $PAYARA_HOME/scripts/post-boot-commands.asadmin file. All applications in the
+# $DEPLOY_DIR (either files or folders) will be deployed.
+# The $POSTBOOT_COMMANDS file can then be used with the start-domain using the
+#  --postbootcommandfile parameter to deploy applications on startup.
+#
+# Usage:
+# ./generate_deploy_commands.sh
+#
+# Optionally, any number of parameters of the asadmin deploy command can be
+# specified as parameters to this script.
+# E.g., to deploy applications with implicit CDI scanning disabled:
+#
+# ./generate_deploy_commands.sh --properties=implicitCdiEnabled=false
+#
+# Environment variables used:
+#   - $PREBOOT_COMMANDS - the pre boot command file.
+#   - $POSTBOOT_COMMANDS - the post boot command file.
+#
+# Note that many parameters to the deploy command can be safely used only when
+# a single application exists in the $DEPLOY_DIR directory.
+#
+##########################################################################################################
+#
+#  This script is a fork of https://github.com/payara/Payara/blob/master/appserver/extras/docker-images/
+#  server-full/src/main/docker/bin/init_1_generate_deploy_commands.sh and licensed under CDDL 1.1
+#  by the Payara Foundation.
+#
+##########################################################################################################
+
+# Check required variables are set
+if [ -z "$DEPLOY_DIR" ]; then echo "Variable DEPLOY_DIR is not set."; exit 1; fi
+if [ -z "$PREBOOT_COMMANDS" ]; then echo "Variable PREBOOT_COMMANDS is not set."; exit 1; fi
+if [ -z "$POSTBOOT_COMMANDS" ]; then echo "Variable POSTBOOT_COMMANDS is not set."; exit 1; fi
+
+# Create pre and post boot command files if they don't exist
+touch "$POSTBOOT_COMMANDS"
+touch "$PREBOOT_COMMANDS"
+
+deploy() {
+
+  if [ -z "$1" ]; then
+    echo "No deployment specified";
+    exit 1;
+  fi
+
+  DEPLOY_STATEMENT="deploy $DEPLOY_PROPS $1"
+  if grep -q "$1" "$POSTBOOT_COMMANDS"; then
+    echo "post boot commands already deploys $1";
+  else
+    echo "Adding deployment target $1 to post boot commands";
+    echo "$DEPLOY_STATEMENT" >> "$POSTBOOT_COMMANDS";
+  fi
+}
+
+# RAR files first
+find "$DEPLOY_DIR" -mindepth 1 -maxdepth 1 -name "*.rar" -print0 \
+  | while IFS= read -r -d '' file; do deploy "$file"; done
+
+# Then every other WAR, EAR, JAR or directory
+find "$DEPLOY_DIR" -mindepth 1 -maxdepth 1 ! -name "*.rar" -a -name "*.war" -o -name "*.ear" -o -name "*.jar" -o -type d -print0 \
+  | while IFS= read -r -d '' file; do deploy "$file"; done
\ No newline at end of file
diff --git a/modules/container-base/src/main/docker/scripts/startInForeground.sh b/modules/container-base/src/main/docker/scripts/startInForeground.sh
new file mode 100644
index 00000000000..4843f6ae055
--- /dev/null
+++ b/modules/container-base/src/main/docker/scripts/startInForeground.sh
@@ -0,0 +1,89 @@
+#!/bin/bash
+##########################################################################################################
+#
+# This script is to execute Payara Server in foreground, mainly in a docker environment.
+# It allows to avoid running 2 instances of JVM, which happens with the start-domain --verbose command.
+#
+# Usage:
+#   Running
+#        startInForeground.sh <arguments>
+#   is equivalent to running
+#        asadmin start-domain <arguments>
+#
+# It's possible to use any arguments of the start-domain command as arguments to startInForeground.sh
+#
+# Environment variables used:
+#   - $ADMIN_USER - the username to use for the asadmin utility.
+#   - $PASSWORD_FILE - the password file to use for the asadmin utility.
+#   - $PREBOOT_COMMANDS - the pre boot command file.
+#   - $POSTBOOT_COMMANDS - the post boot command file.
+#   - $DOMAIN_NAME - the name of the domain to start.
+#   - $JVM_ARGS - extra JVM options to pass to the Payara Server instance.
+#   - $AS_ADMIN_MASTERPASSWORD - the master password for the Payara Server instance.
+#
+# This script executes the asadmin tool which is expected at ~/appserver/bin/asadmin.
+#
+##########################################################################################################
+#
+#  This script is a fork of https://github.com/payara/Payara/blob/master/appserver/
+#  extras/docker-images/server-full/src/main/docker/bin/startInForeground.sh and licensed under CDDL 1.1
+#  by the Payara Foundation.
+#
+##########################################################################################################
+
+# Check required variables are set
+if [ -z "$ADMIN_USER" ]; then echo "Variable ADMIN_USER is not set."; exit 1; fi
+if [ -z "$PASSWORD_FILE" ]; then echo "Variable PASSWORD_FILE is not set."; exit 1; fi
+if [ -z "$PREBOOT_COMMANDS" ]; then echo "Variable PREBOOT_COMMANDS is not set."; exit 1; fi
+if [ -z "$POSTBOOT_COMMANDS" ]; then echo "Variable POSTBOOT_COMMANDS is not set."; exit 1; fi
+if [ -z "$DOMAIN_NAME" ]; then echo "Variable DOMAIN_NAME is not set."; exit 1; fi
+
+# Check if dumps are enabled - add arg to JVM_ARGS in this case
+if [ -n "${ENABLE_DUMPS}" ] && [ "${ENABLE_DUMPS}" = "1" ]; then
+  JVM_ARGS="${JVM_DUMPS_ARG} ${JVM_ARGS}"
+fi
+
+# The following command gets the command line to be executed by start-domain
+# - print the command line to the server with --dry-run, each argument on a separate line
+# - remove -read-string argument
+# - surround each line except with parenthesis to allow spaces in paths
+# - remove lines before and after the command line and squash commands on a single line
+
+# Create pre and post boot command files if they don't exist
+touch "$POSTBOOT_COMMANDS"
+touch "$PREBOOT_COMMANDS"
+
+# shellcheck disable=SC2068
+#   -- Using $@ is necessary here as asadmin cannot deal with options enclosed in ""!
+OUTPUT=$("${PAYARA_DIR}"/bin/asadmin --user="${ADMIN_USER}" --passwordfile="${PASSWORD_FILE}" start-domain --dry-run --prebootcommandfile="${PREBOOT_COMMANDS}" --postbootcommandfile="${POSTBOOT_COMMANDS}" $@ "$DOMAIN_NAME")
+STATUS=$?
+if [ "$STATUS" -ne 0 ]
+  then
+    echo ERROR: "$OUTPUT" >&2
+    exit 1
+fi
+
+COMMAND=$(echo "$OUTPUT"\
+ | sed -n -e '2,/^$/p'\
+ | sed "s|glassfish.jar|glassfish.jar $JVM_ARGS |g")
+
+echo Executing Payara Server with the following command line:
+echo "$COMMAND" | tr ' ' '\n'
+echo
+
+# Run the server in foreground - read master password from variable or file or use the default "changeit" password
+
+set +x
+if test "$AS_ADMIN_MASTERPASSWORD"x = x -a -f "$PASSWORD_FILE"
+  then
+    # shellcheck disable=SC1090
+    source "$PASSWORD_FILE"
+fi
+if test "$AS_ADMIN_MASTERPASSWORD"x = x
+  then
+    AS_ADMIN_MASTERPASSWORD=changeit
+fi
+echo "AS_ADMIN_MASTERPASSWORD=$AS_ADMIN_MASTERPASSWORD" > /tmp/masterpwdfile
+# shellcheck disable=SC2086
+#   -- Unquoted exec var is necessary, as otherwise things get escaped that may not be escaped (parameters for Java)
+exec ${COMMAND} < /tmp/masterpwdfile
diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml
index 14b84f80279..4db2232be7d 100644
--- a/modules/dataverse-parent/pom.xml
+++ b/modules/dataverse-parent/pom.xml
@@ -182,6 +182,10 @@
         <maven-failsafe-plugin.version>3.0.0-M5</maven-failsafe-plugin.version>
         <maven-assembly-plugin.version>3.3.0</maven-assembly-plugin.version>
         <maven-checkstyle-plugin.version>3.1.2</maven-checkstyle-plugin.version>
+        
+        <!-- Container related -->
+        <fabric8-dmp.version>0.39.1</fabric8-dmp.version>
+        <ct.registry>ghcr.io</ct.registry>
     </properties>
     
     <pluginRepositories>
@@ -244,6 +248,11 @@
                         </dependency>
                     </dependencies>
                 </plugin>
+                <plugin>
+                    <groupId>io.fabric8</groupId>
+                    <artifactId>docker-maven-plugin</artifactId>
+                    <version>${fabric8-dmp.version}</version>
+                </plugin>
             </plugins>
         </pluginManagement>
     </build>
@@ -315,4 +324,46 @@
         -->
     </repositories>
     
+    <profiles>
+        <profile>
+            <id>ct</id>
+            <properties>
+                <!--
+                    Note: Temporary workaround due to problems with Payara 5.2021.5
+                    Note: Remove when upstream is at 5.2021.9+
+                    See also: https://github.com/IQSS/dataverse/issues/8048
+                    See also: https://github.com/payara/Payara/issues/5368
+                -->
+                <payara.version>5.2022.1</payara.version>
+            </properties>
+    
+            <build>
+                <plugins>
+                    <!-- This will get the current commit id to include in image tags of container builds as Maven properties -->
+                    <plugin>
+                        <groupId>io.github.git-commit-id</groupId>
+                        <artifactId>git-commit-id-maven-plugin</artifactId>
+                        <version>5.0.0</version>
+                        <executions>
+                            <execution>
+                                <id>retrieve-git-details</id>
+                                <goals>
+                                    <goal>revision</goal>
+                                </goals>
+                                <phase>initialize</phase>
+                            </execution>
+                        </executions>
+                        <configuration>
+                            <dotGitDirectory>${project.basedir}/../../.git</dotGitDirectory>
+                            <dateFormatTimeZone>UTC</dateFormatTimeZone>
+                            <abbrevLength>8</abbrevLength>
+                            <verbose>false</verbose>
+                        </configuration>
+                    </plugin>
+                </plugins>
+            </build>
+            
+        </profile>
+    </profiles>
+    
 </project>

From 2319a4787e0c4e41b633382ed7c9684130933be8 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Thu, 16 Jun 2022 21:22:41 +0200
Subject: [PATCH 030/173] feat(ct-base): remove the esh tool

Will be replaced with a capability to make API endpoints
for authentication providers read from MPCONFIG sources.
---
 modules/container-base/src/main/docker/Dockerfile | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile
index 635fbd89142..491c0747ada 100644
--- a/modules/container-base/src/main/docker/Dockerfile
+++ b/modules/container-base/src/main/docker/Dockerfile
@@ -61,8 +61,6 @@ ENV PATH="${PATH}:${PAYARA_DIR}/bin" \
     ENABLE_DUMPS=0 \
     JVM_DUMPS_ARG="-XX:+HeapDumpOnOutOfMemoryError"
 
-ARG ESH_VERSION=0.3.1
-ARG ESH_CHECKSUM="1e0bd783f930cba13d6708b11c1ac844bbb1eddd02ac1666fc10d47eb9517bd7"
 ARG JATTACH_VERSION="v2.0"
 ARG JATTACH_CHECKSUM="989dc53279c7fb3ec399dbff1692647439286e5a4339c2849fd4323e998af7f8"
 ARG PKGS="jq imagemagick curl unzip wget acl dirmngr gpg lsof procps netcat tini"
@@ -89,10 +87,6 @@ RUN true && \
     # Install packages
     apt-get update -q && \
     apt-get install -qqy --no-install-recommends ${PKGS} && \
-    # Download & check esh template script
-    curl -sSfL -o /usr/bin/esh "https://raw.githubusercontent.com/jirutka/esh/v${ESH_VERSION}/esh" && \
-    echo "${ESH_CHECKSUM} /usr/bin/esh" | sha256sum -c - && \
-    chmod +x /usr/bin/esh && \
     # Install jattach
     curl -sSfL -o /usr/bin/jattach "https://github.com/apangin/jattach/releases/download/${JATTACH_VERSION}/jattach" && \
     echo "${JATTACH_CHECKSUM} /usr/bin/jattach" | sha256sum -c - && \

From f0202cb2c177c5ebeeb176c58c8b27256d32697b Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Fri, 17 Jun 2022 10:29:37 +0200
Subject: [PATCH 031/173] chore(deps): update container plugin and payara
 version for containers

---
 modules/dataverse-parent/pom.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml
index 4db2232be7d..fa693f8a8ac 100644
--- a/modules/dataverse-parent/pom.xml
+++ b/modules/dataverse-parent/pom.xml
@@ -184,7 +184,7 @@
         <maven-checkstyle-plugin.version>3.1.2</maven-checkstyle-plugin.version>
         
         <!-- Container related -->
-        <fabric8-dmp.version>0.39.1</fabric8-dmp.version>
+        <fabric8-dmp.version>0.40.1</fabric8-dmp.version>
         <ct.registry>ghcr.io</ct.registry>
     </properties>
     
@@ -334,7 +334,7 @@
                     See also: https://github.com/IQSS/dataverse/issues/8048
                     See also: https://github.com/payara/Payara/issues/5368
                 -->
-                <payara.version>5.2022.1</payara.version>
+                <payara.version>5.2022.2</payara.version>
             </properties>
     
             <build>

From 2dc0596d8634cadecb691b95a39ba5a3355fcd99 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Thu, 11 Aug 2022 13:54:41 +0200
Subject: [PATCH 032/173] fix(ct-base): unpack Payara to target/payara

Payara 5 defaults to a "payara5" topmost dir, Payara 6 to
"payara6". To avoid adding different directories in the
assembly, cut the number from the directories name when
unpacking.

This does not prevent you from doing stupid things like
not cleaning before switching the version leading to an
unknown state of old and new libs, etc.
---
 modules/container-base/pom.xml                      | 6 ++++++
 modules/container-base/src/main/docker/assembly.xml | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml
index 8cb7e1ac795..765a4c72843 100644
--- a/modules/container-base/pom.xml
+++ b/modules/container-base/pom.xml
@@ -51,6 +51,12 @@
                                             <type>zip</type>
                                             <overWrite>false</overWrite>
                                             <outputDirectory>${project.build.directory}</outputDirectory>
+                                            <fileMappers>
+                                                <org.codehaus.plexus.components.io.filemappers.RegExpFileMapper>
+                                                    <pattern>^payara\d</pattern>
+                                                    <replacement>payara</replacement>
+                                                </org.codehaus.plexus.components.io.filemappers.RegExpFileMapper>
+                                            </fileMappers>
                                         </artifactItem>
                                     </artifactItems>
                                 </configuration>
diff --git a/modules/container-base/src/main/docker/assembly.xml b/modules/container-base/src/main/docker/assembly.xml
index afd5530fa60..9fc62d49fa1 100644
--- a/modules/container-base/src/main/docker/assembly.xml
+++ b/modules/container-base/src/main/docker/assembly.xml
@@ -4,7 +4,7 @@
     <fileSets>
         <!-- Get the application server (unpacked by dependency plugin) -->
         <fileSet>
-            <directory>${project.basedir}/target/payara5</directory>
+            <directory>${project.basedir}/target/payara</directory>
             <outputDirectory>appserver</outputDirectory>
         </fileSet>
         <!-- Init scripts -->

From 246f8b8cbfd18356c6f2cb63481d1fa02afad390 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Thu, 11 Aug 2022 14:03:19 +0200
Subject: [PATCH 033/173] fix(ct-base): migrate base image from OpenJDK to
 Eclipse Temurin

There was an ongoing discussion that the Docker Hub Image "openjdk"
is not backed by any official supported project but complete
goodwill of Oracle shipping their JRE/JDK.

There is no "real" release of OpenJDK . There exist only real
distributions like Oracle JDK, Eclipse Temurin, Azul JDK,
AWS Corretto etc (see https://whichjdk.com).

As for this reason the "openjdk" image has been deprecated,
switching to Eclipse Temurin JRE here.

See also: https://github.com/docker-library/openjdk/issues/505
---
 modules/container-base/pom.xml                    | 2 +-
 modules/container-base/src/main/docker/Dockerfile | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml
index 765a4c72843..5ebaa9ea323 100644
--- a/modules/container-base/pom.xml
+++ b/modules/container-base/pom.xml
@@ -78,7 +78,7 @@
                                     <build>
                                         <dockerFile>Dockerfile</dockerFile>
                                         <args>
-                                            <BASE_IMAGE>openjdk:${target.java.version}-jre</BASE_IMAGE>
+                                            <BASE_IMAGE>eclipse-temurin:${target.java.version}-jre</BASE_IMAGE>
                                         </args>
                                         <filter>@</filter>
                                         <assembly>
diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile
index 491c0747ada..2fed83db59f 100644
--- a/modules/container-base/src/main/docker/Dockerfile
+++ b/modules/container-base/src/main/docker/Dockerfile
@@ -20,7 +20,7 @@
 #
 
 # Make the Java base image and version configurable (useful for trying newer Java versions and flavors)
-ARG BASE_IMAGE="openjdk:11-jre"
+ARG BASE_IMAGE="eclipse-temurin:11-jre"
 FROM $BASE_IMAGE
 
 # Default payara ports to expose

From 76ea50871bafe028d1edad35f441e7731398ed00 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Thu, 11 Aug 2022 14:05:06 +0200
Subject: [PATCH 034/173] chore(deps): update Docker Maven Plugin to the latest
 release

---
 modules/dataverse-parent/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml
index fa693f8a8ac..eaa09b61bd7 100644
--- a/modules/dataverse-parent/pom.xml
+++ b/modules/dataverse-parent/pom.xml
@@ -184,7 +184,7 @@
         <maven-checkstyle-plugin.version>3.1.2</maven-checkstyle-plugin.version>
         
         <!-- Container related -->
-        <fabric8-dmp.version>0.40.1</fabric8-dmp.version>
+        <fabric8-dmp.version>0.40.2</fabric8-dmp.version>
         <ct.registry>ghcr.io</ct.registry>
     </properties>
     

From f62dee2ec6a5dd237e2fbc10346bdebeb6a3c2f1 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Thu, 11 Aug 2022 15:13:48 +0200
Subject: [PATCH 035/173] feat(ct-base): enable multiarch image build via
 docker buildx

With the rise of Apple M1/M2 silicons, we need to provide ARM64
based images in addition to AMD64.
---
 modules/container-base/pom.xml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml
index 5ebaa9ea323..add8a120a58 100644
--- a/modules/container-base/pom.xml
+++ b/modules/container-base/pom.xml
@@ -76,6 +76,12 @@
                                     <name>%g/base:jdk${target.java.version}</name>
                                     <registry>${ct.registry}</registry>
                                     <build>
+                                        <buildx>
+                                            <platforms>
+                                                <platform>linux/arm64</platform>
+                                                <platform>linux/amd64</platform>
+                                            </platforms>
+                                        </buildx>
                                         <dockerFile>Dockerfile</dockerFile>
                                         <args>
                                             <BASE_IMAGE>eclipse-temurin:${target.java.version}-jre</BASE_IMAGE>

From 72935d481e1e1ab260e763a000bfef172629cc16 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Fri, 12 Aug 2022 12:08:13 +0200
Subject: [PATCH 036/173] chore(ct-base): add maintainer details to POM

---
 modules/container-base/pom.xml | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml
index add8a120a58..015ebba598d 100644
--- a/modules/container-base/pom.xml
+++ b/modules/container-base/pom.xml
@@ -16,6 +16,18 @@
     <name>Container Base Image</name>
     <description>This module provides an application server base image to be decorated with the Dataverse app.</description>
     
+    <developers>
+        <developer>
+            <id>poikilotherm</id>
+            <name>Oliver Bertuch</name>
+            <email>github@bertuch.eu</email>
+            <timezone>Europe/Berlin</timezone>
+            <roles>
+                <role>maintainer</role>
+            </roles>
+        </developer>
+    </developers>
+    
     <properties>
         <!-- By default, this module will produce a POM only. -->
         <!-- This will be switched within the container profile! -->

From 17d8b53bb985fc77faebc8273b84012fac2bb525 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Fri, 12 Aug 2022 12:09:21 +0200
Subject: [PATCH 037/173] docs(ct-base): update OCI tag labels

---
 modules/container-base/src/main/docker/Dockerfile | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile
index 2fed83db59f..036e2f17831 100644
--- a/modules/container-base/src/main/docker/Dockerfile
+++ b/modules/container-base/src/main/docker/Dockerfile
@@ -187,12 +187,12 @@ CMD ["sh", "-c", "${SCRIPT_DIR}/entrypoint.sh"]
 
 LABEL org.opencontainers.image.created="@git.build.time@" \
       org.opencontainers.image.authors="Research Data Management at FZJ <forschungsdaten@fz-juelich.de>" \
-      org.opencontainers.image.url="https://k8s-docs.gdcc.io" \
-      org.opencontainers.image.documentation="https://k8s-docs.gdcc.io" \
-      org.opencontainers.image.source="https://github.com/gdcc/dataverse/tree/develop%2Bct/modules/container-base" \
+      org.opencontainers.image.url="https://guides.dataverse.org/en/latest/container/" \
+      org.opencontainers.image.documentation="https://guides.dataverse.org/en/latest/container/" \
+      org.opencontainers.image.source="https://github.com/IQSS/dataverse/tree/develop/modules/container-base" \
       org.opencontainers.image.version="@project.version@" \
       org.opencontainers.image.revision="@git.commit.id.abbrev@" \
       org.opencontainers.image.vendor="Global Dataverse Community Consortium" \
       org.opencontainers.image.licenses="Apache-2.0" \
-      org.opencontainers.image.title="dataverse-k8s :: Dataverse containerized" \
+      org.opencontainers.image.title="Dataverse Base Image" \
       org.opencontainers.image.description="This container image provides an application server tuned for Dataverse software"

From 0a9947bd6868b9b45314b6fe0cfc918c48ed4eeb Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Fri, 12 Aug 2022 12:11:14 +0200
Subject: [PATCH 038/173] feat(ct-base): add debug/develop mode script

---
 .../container-base/src/main/docker/Dockerfile |  4 +-
 .../init_1_generate_devmode_commands.sh       | 61 +++++++++++++++++++
 2 files changed, 64 insertions(+), 1 deletion(-)
 create mode 100644 modules/container-base/src/main/docker/scripts/init_1_generate_devmode_commands.sh

diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile
index 036e2f17831..fe44fc61847 100644
--- a/modules/container-base/src/main/docker/Dockerfile
+++ b/modules/container-base/src/main/docker/Dockerfile
@@ -59,7 +59,9 @@ ENV PATH="${PATH}:${PAYARA_DIR}/bin" \
     MEM_MAX_METASPACE_SIZE="2g" \
     # Make heap dumps on OOM appear in DUMPS_DIR
     ENABLE_DUMPS=0 \
-    JVM_DUMPS_ARG="-XX:+HeapDumpOnOutOfMemoryError"
+    JVM_DUMPS_ARG="-XX:+HeapDumpOnOutOfMemoryError" \
+    ENABLE_JMX=0 \
+    ENABLE_JDWP=0
 
 ARG JATTACH_VERSION="v2.0"
 ARG JATTACH_CHECKSUM="989dc53279c7fb3ec399dbff1692647439286e5a4339c2849fd4323e998af7f8"
diff --git a/modules/container-base/src/main/docker/scripts/init_1_generate_devmode_commands.sh b/modules/container-base/src/main/docker/scripts/init_1_generate_devmode_commands.sh
new file mode 100644
index 00000000000..9d71e3bb81b
--- /dev/null
+++ b/modules/container-base/src/main/docker/scripts/init_1_generate_devmode_commands.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+set -euo pipefail
+
+###### ###### ###### ###### ###### ###### ###### ###### ###### ###### ######
+# This script enables different development options, like a JMX connector
+# usable with VisualVM, JRebel hot-reload support and JDWP debugger service.
+# Enable it by adding env vars on startup (e.g. via ConfigMap)
+#
+# As this script is "sourced" from entrypoint.sh, we can manipulate env vars
+# for the parent shell before executing Payara.
+###### ###### ###### ###### ###### ###### ###### ###### ###### ###### ######
+
+# 0. Init variables
+ENABLE_JMX=${ENABLE_JMX:-0}
+ENABLE_JDWP=${ENABLE_JDWP:-0}
+
+DV_PREBOOT=${PAYARA_DIR}/dataverse_preboot
+echo "# Dataverse preboot configuration for Payara" > "${DV_PREBOOT}"
+
+# 1. Configure JMX (enabled by default on port 8686, but requires SSL)
+# See also https://blog.payara.fish/monitoring-payara-server-with-jconsole
+# To still use it, you can use a sidecar container proxying or using JMX via localhost without SSL.
+if [ "${ENABLE_JMX}" = "1" ]; then
+  echo "Enabling unsecured JMX on 0.0.0.0:8686. You'll need a sidecar for this, as access is allowed from same machine only (without SSL)."
+  { \
+    echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.jvm=HIGH"
+    echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.connector-service=HIGH"
+    echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.connector-connection-pool=HIGH"
+    echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.jdbc-connection-pool=HIGH"
+    echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.web-services-container=HIGH"
+    echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.ejb-container=HIGH"
+    echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.thread-pool=HIGH"
+    echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.http-service=HIGH"
+    echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.security=HIGH"
+    echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.jms-service=HIGH"
+    echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.jersey=HIGH"
+    echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.transaction-service=HIGH"
+    echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.jpa=HIGH"
+    echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.web-container=HIGH"
+    echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.orb=HIGH"
+    echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.deployment=HIGH"
+    #echo "set configs.config.server-config.admin-service.jmx-connector.system.address=127.0.0.1"
+    echo "set configs.config.server-config.admin-service.jmx-connector.system.security-enabled=false"
+  } >> "${DV_PREBOOT}"
+fi
+
+# 2. Enable JDWP via debugging switch
+if [ "${ENABLE_JDWP}" = "1" ]; then
+  echo "Enabling JDWP remote debugging support via asadmin debugging switch."
+  export PAYARA_ARGS="${PAYARA_ARGS} --debug=true"
+fi
+
+# 3. Add the commands to the existing preboot file, but insert BEFORE deployment
+TMP_PREBOOT=$(mktemp)
+cat "${DV_PREBOOT}" "${PREBOOT_COMMANDS}" > "${TMP_PREBOOT}"
+mv "${TMP_PREBOOT}" "${PREBOOT_COMMANDS}"
+echo "DEBUG: preboot contains the following commands:"
+echo "--------------------------------------------------"
+cat "${PREBOOT_COMMANDS}"
+echo "--------------------------------------------------"
\ No newline at end of file

From 2e812dcc15413d5814072b86971b924ee13824e4 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Mon, 22 Aug 2022 13:50:48 +0200
Subject: [PATCH 039/173] deps(ct-base): update to jattach v2.1

---
 modules/container-base/src/main/docker/Dockerfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile
index fe44fc61847..d13808c3272 100644
--- a/modules/container-base/src/main/docker/Dockerfile
+++ b/modules/container-base/src/main/docker/Dockerfile
@@ -63,8 +63,8 @@ ENV PATH="${PATH}:${PAYARA_DIR}/bin" \
     ENABLE_JMX=0 \
     ENABLE_JDWP=0
 
-ARG JATTACH_VERSION="v2.0"
-ARG JATTACH_CHECKSUM="989dc53279c7fb3ec399dbff1692647439286e5a4339c2849fd4323e998af7f8"
+ARG JATTACH_VERSION="v2.1"
+ARG JATTACH_CHECKSUM="07885fdc782e02e7302c6d190f54c3930afa10a38140365adf54076ec1086a8e"
 ARG PKGS="jq imagemagick curl unzip wget acl dirmngr gpg lsof procps netcat tini"
 ARG ASADMIN="${PAYARA_DIR}/bin/asadmin --user=${ADMIN_USER} --passwordfile=${PASSWORD_FILE}"
 

From 7e836c70dd44a538bf1fdd0d73045730da053951 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Tue, 23 Aug 2022 00:10:24 +0200
Subject: [PATCH 040/173] chore(ct-base): add JMX to exposed ports and make it
 default enabled as in Payara

---
 modules/container-base/src/main/docker/Dockerfile | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile
index d13808c3272..ba459607826 100644
--- a/modules/container-base/src/main/docker/Dockerfile
+++ b/modules/container-base/src/main/docker/Dockerfile
@@ -25,10 +25,11 @@ FROM $BASE_IMAGE
 
 # Default payara ports to expose
 # 4848: admin console
-# 9009: debug port (JPDA)
+# 9009: debug port (JDWP)
 # 8080: http
 # 8181: https
-EXPOSE 4848 9009 8080 8181
+# 8686: JMX
+EXPOSE 4848 9009 8080 8181 8686
 
 ENV HOME_DIR="/opt/payara"
 ENV PAYARA_DIR="${HOME_DIR}/appserver" \
@@ -60,7 +61,7 @@ ENV PATH="${PATH}:${PAYARA_DIR}/bin" \
     # Make heap dumps on OOM appear in DUMPS_DIR
     ENABLE_DUMPS=0 \
     JVM_DUMPS_ARG="-XX:+HeapDumpOnOutOfMemoryError" \
-    ENABLE_JMX=0 \
+    ENABLE_JMX=1 \
     ENABLE_JDWP=0
 
 ARG JATTACH_VERSION="v2.1"

From fe7b2d06148e6a2e6d6b2939f366de9ea2162cff Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Tue, 23 Aug 2022 00:11:08 +0200
Subject: [PATCH 041/173] docs(ct): add container guide to guides index

---
 doc/sphinx-guides/source/container/index.rst | 26 ++++++++++++++++++++
 doc/sphinx-guides/source/index.rst           |  7 ++++--
 2 files changed, 31 insertions(+), 2 deletions(-)
 create mode 100644 doc/sphinx-guides/source/container/index.rst

diff --git a/doc/sphinx-guides/source/container/index.rst b/doc/sphinx-guides/source/container/index.rst
new file mode 100644
index 00000000000..1bf86f16f43
--- /dev/null
+++ b/doc/sphinx-guides/source/container/index.rst
@@ -0,0 +1,26 @@
+Container Guide
+===============
+
+**Contents:**
+
+.. toctree::
+
+  base-image
+  app-image
+
+Running Dataverse software in containers is quite different than in a :doc:`classic installation <../installation/prep>`.
+
+Both approaches have pros and cons. These days (2022) containers are very often used for development and testing,
+but there is an ever rising move for running applications in the cloud using container technology.
+
+**NOTE:**
+**As the "Institute for Quantitative Social Sciences" at Harvard is running their installations in the classic
+deployment way, the container support is mostly created and maintained by the Dataverse community.**
+
+This guide is *not* about installation on technology like Docker Swarm, Kubernetes, Rancher or other
+solutions to run containers in production. There is the `Dataverse on K8s project <https://k8s-docs.gdcc.io>`_ for this
+purpose.
+
+This guide focuses on describing the container images managed from the main Dataverse repository (again: by the
+community, not IQSS), their features and limitations. Instructions on how to build the images yourself, how to
+extend them and how to use them for development purposes may be found in respective subpages.
\ No newline at end of file
diff --git a/doc/sphinx-guides/source/index.rst b/doc/sphinx-guides/source/index.rst
index f7e81756e5b..f15a973544d 100755
--- a/doc/sphinx-guides/source/index.rst
+++ b/doc/sphinx-guides/source/index.rst
@@ -19,17 +19,20 @@ These documentation guides are for the |version| version of Dataverse. To find g
   installation/index
   developers/index
   style/index
+  container/index
 
 How the Guides Are Organized
 ----------------------------
 
 The guides are documentation that explain how to use Dataverse,
 which are divided into the following sections: User Guide,
-Installation Guide, Developer Guide, API Guide and Style Guide. The User Guide is further divided into primary activities: finding & using
+Installation Guide, Developer Guide, API Guide, Style Guide and Container Guide.
+The User Guide is further divided into primary activities: finding & using
 data, adding Datasets, administering dataverses or Datasets, and Dataset exploration/visualizations. Details
 on all of the above tasks can be found in the Users Guide. The
 Installation Guide is for people or organizations who want to host their
-own Dataverse. The Developer Guide contains instructions for
+own Dataverse. The Container Guide adds to this information on container-based installations.
+The Developer Guide contains instructions for
 people who want to contribute to the Open Source Dataverse
 project or who want to modify the code to suit their own needs. Finally, the API Guide is for
 Developers that work on other applications and are interested in connecting with Dataverse through our APIs.

From a93dbbdb4c5d2cfed80a13f265238a59f551999a Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Tue, 23 Aug 2022 00:12:35 +0200
Subject: [PATCH 042/173] docs(ct-base): add extensive base image module
 documentation

---
 .../source/container/base-image.rst           | 229 ++++++++++++++++++
 1 file changed, 229 insertions(+)
 create mode 100644 doc/sphinx-guides/source/container/base-image.rst

diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst
new file mode 100644
index 00000000000..4f441f79ad7
--- /dev/null
+++ b/doc/sphinx-guides/source/container/base-image.rst
@@ -0,0 +1,229 @@
+Application Base Image
+======================
+
+Within the main repository, you may find the base image's files at ``<git root>/modules/container-base``.
+This Maven module uses the `Maven Docker Plugin <https://dmp.fabric8.io>`_ to build and ship the image.
+
+Contents
+++++++++
+
+The base image provides:
+
+- `Eclipse Temurin JRE using Java 11 <https://adoptium.net/temurin/releases?version=11>`_
+- `Payara Community Application Server <https://docs.payara.fish/community>`_
+- CLI tools necessary to run Dataverse (i. e. ``curl`` or ``jq`` - see also :doc:`../installation/prerequisites` in Installation Guide)
+- Linux tools for analysis, monitoring and so on
+- `Jattach <https://github.com/apangin/jattach>`_
+
+This image is created as a "multi-arch image", supporting the most common architectures Dataverse usually runs on:
+AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2).
+
+It inherits being built on an Ubuntu environment from the upstream
+`base image of Eclipse Temurin <https://hub.docker.com/_/eclipse-temurin>`_.
+You are free to change the JRE/JDK image to your liking (see below).
+
+
+
+Build Instructions
+++++++++++++++++++
+
+Assuming you have `Docker <https://docs.docker.com/engine/install/>`_, `Docker Desktop <https://www.docker.com/products/docker-desktop/>`_,
+`Moby <https://mobyproject.org/>`_ or some remote Docker host configured, up and running from here on.
+
+Simply execute the Maven modules packaging target with activated "container profile. Either from the projects Git root:
+
+``mvn -Pct -f modules/container-base package``
+
+Or move to the module and execute:
+
+``cd modules/container-base && mvn -Pct package``
+
+Some additional notes, using Maven parameters to change the build and use ...:
+
+- ... a different Payara version: add ``-Dpayara.version=V.YYYY.R``.
+- | ... a different Temurin JRE version ``A``: add ``-Dtarget.java.version=A`` (i.e. ``11``, ``17``, ...).
+  | *Note:* must resolve to an available Docker tag ``A-jre`` of Eclipse Temurin!
+- ... a different Java Distribution: add ``-Ddocker.buildArg.BASE_IMAGE="name:tag"`` with precise reference to an
+  image available from local or remote (e. g. Docker Hub).
+
+
+
+Tunables
+++++++++
+
+The base image provides a Payara domain suited for production use, but can also be used during development.
+Many settings have been carefully selected for best performance and stability of the Dataverse application.
+
+As with any service, you should always monitor any metrics and make use of the tuning capabilities the base image
+provides. These are mostly based on environment variables (very common with containers) and provide sane defaults.
+
+.. list-table::
+    :align: left
+    :width: 100
+    :widths: 10 10 10 50
+    :header-rows: 1
+
+    * - Env. variable
+      - Default
+      - Type
+      - Description
+    * - ``DEPLOY_PROPS``
+      - (empty)
+      - String
+      - Set to add arguments to generated `asadmin deploy` commands.
+    * - ``PREBOOT_COMMANDS``
+      - [preboot]_
+      - Abs. path
+      - Provide path to file with ``asadmin`` commands to run **before** boot of application server.
+        See also `Pre/postboot script docs`_.
+    * - ``POSTBOOT_COMMANDS``
+      - [postboot]_
+      - Abs. path
+      - Provide path to file with ``asadmin`` commands to run **after** boot of application server.
+        See also `Pre/postboot script docs`_.
+    * - ``JVM_ARGS``
+      - (empty)
+      - String
+      - Additional arguments to pass to application server's JVM on start.
+    * - ``MEM_MAX_RAM_PERCENTAGE``
+      - ``70.0``
+      - Percentage
+      - Maximum amount of container's allocated RAM to be used as heap space.
+        Make sure to leave some room for native memory, OS overhead etc!
+    * - ``MEM_XSS``
+      - ``512k``
+      - Size
+      - Tune the maximum JVM stack size.
+    * - ``MEM_MIN_HEAP_FREE_RATIO``
+      - ``20``
+      - Integer
+      - Make the heap shrink aggressively and grow conservatively. See also `run-java-sh recommendations`_.
+    * - ``MEM_MAX_HEAP_FREE_RATIO``
+      - ``40``
+      - Integer
+      - Make the heap shrink aggressively and grow conservatively. See also `run-java-sh recommendations`_.
+    * - ``MEM_MAX_GC_PAUSE_MILLIS``
+      - ``500``
+      - Milliseconds
+      - Shorter pause times might result in lots of collections causing overhead without much gain.
+        This needs monitoring and tuning. It's a complex matter.
+    * - ``MEM_METASPACE_SIZE``
+      - ``256m``
+      - Size
+      - Initial size of memory reserved for class metadata, also used as trigger to run a garbage collection
+        once passing this size.
+    * - ``MEM_MAX_METASPACE_SIZE``
+      - ``2g``
+      - Size
+      - The metaspace's size will not outgrow this limit.
+    * - ``ENABLE_DUMPS``
+      - ``0``
+      - Bool, ``0|1``
+      - If enabled, the argument(s) given in ``JVM_DUMP_ARG`` will be added to the JVM starting up.
+        This means it will enable dumping the heap to ``${DUMPS_DIR}`` (see below) in "out of memory" cases.
+        (You should back this location with disk space / ramdisk, so it does not write into an overlay filesystem!)
+    * - ``JVM_DUMPS_ARG``
+      - [dump-option]_
+      - String
+      - Can be fine tuned for more grained controls of dumping behaviour.
+    * - ``ENABLE_JMX``
+      - ``1``
+      - Bool, ``0|1``
+      - Enable JMX - Payara enables this by default, hard to deactivate.
+    * - ``ENABLE_JDWP``
+      - ``0``
+      - Bool, ``0|1``
+      - Enable the "Java Debug Wire Protocol" to attach a remote debugger to the JVM in this container.
+        Listens on port 9009 when enabled. Search the internet for numerous tutorials to use it.
+    * - ``DATAVERSE_HTTP_TIMEOUT``
+      - ``900``
+      - Seconds
+      - See :ref:`:ApplicationServerSettings` ``http.request-timeout-seconds``.
+
+        *Note:* can also be set using any other `MicroProfile Config Sources`_ available via ``dataverse.http.timeout``.
+
+
+.. [preboot] ``${CONFIG_DIR}/pre-boot-commands.asadmin``
+.. [postboot] ``${CONFIG_DIR}/post-boot-commands.asadmin``
+.. [dump-option] ``-XX:+HeapDumpOnOutOfMemoryError``
+
+
+
+Locations
++++++++++
+
+This environment variables represent certain locations and might be reused in your scripts etc.
+These variables aren't meant to be reconfigurable and reflect state in the filesystem layout!
+
+.. list-table::
+    :align: left
+    :width: 100
+    :widths: 10 10 50
+    :header-rows: 1
+
+    * - Env. variable
+      - Value
+      - Description
+    * - ``HOME_DIR``
+      - ``/opt/payara``
+      - Home base to Payara and the application
+    * - ``PAYARA_DIR``
+      - ``${HOME_DIR}/appserver``
+      - Installation directory of Payara server
+    * - ``SCRIPT_DIR``
+      - ``${HOME_DIR}/scripts``
+      - Any scripts like the container entrypoint, init scripts, etc
+    * - ``CONFIG_DIR``
+      - ``${HOME_DIR}/config``
+      - Payara Server configurations like pre/postboot command files go here
+        (Might be reused for Dataverse one day)
+    * - ``DEPLOY_DIR``
+      - ``${HOME_DIR}/deployments``
+      - Any EAR or WAR file, exploded WAR directory etc are autodeployed on start
+    * - ``DOCROOT_DIR``
+      - ``/docroot``
+      - Mount a volume here to store i18n language bundle files, sitemaps, images for Dataverse collections, logos,
+        custom themes and stylesheets, etc here. You might need to replicate this data or place on shared file storage.
+    * - ``SECRETS_DIR``
+      - ``/secrets``
+      - Mount secrets or other here, being picked up automatically by
+        `Directory Config Source <https://docs.payara.fish/community/docs/Technical%20Documentation/MicroProfile/Config/Directory.html>`_.
+        See also various :doc:`../installation/config` options involving secrets.
+    * - ``DUMPS_DIR``
+      - ``/dumps``
+      - Default location where heap dumps will be stored (see above).
+        You should mount some storage here (disk or ephemeral).
+    * - ``DOMAIN_DIR``
+      - ``${PAYARA_DIR}/glassfish`` ``/domains/${DOMAIN_NAME}``
+      - Path to root of the Payara domain applications will be deployed into. Usually ``${DOMAIN_NAME}`` will be ``domain1``.
+
+
+
+Exposed Ports
++++++++++++++
+
+The default ports that are exposed by this image are:
+
+- 8080 - HTTP listener
+- 8181 - HTTPS listener
+- 4848 - Admin Service HTTPS listener
+- 8686 - JMX listener
+- 9009 - "Java Debug Wire Protocol" port (when ``ENABLE_JDWP=1``)
+
+
+
+Hints
++++++
+
+By default, ``domain1`` is enabled to use the ``G1GC`` garbage collector.
+
+For running a Java application within a Linux based container, the support for CGroups is essential. It has been
+included and activated by default since Java 8u192, Java 11 LTS and later. If you are interested in more details,
+you can read about those in a few places like https://developers.redhat.com/articles/2022/04/19/java-17-whats-new-openjdks-container-awareness,
+https://www.eclipse.org/openj9/docs/xxusecontainersupport, etc. The other memory defaults are inspired
+from `run-java-sh recommendations`_.
+
+
+.. _Pre/postboot script docs: https://docs.payara.fish/community/docs/Technical%20Documentation/Payara%20Micro%20Documentation/Payara%20Micro%20Configuration%20and%20Management/Micro%20Management/Asadmin%20Commands/Pre%20and%20Post%20Boot%20Commands.html
+.. _MicroProfile Config Sources: https://docs.payara.fish/community/docs/Technical%20Documentation/MicroProfile/Config/Overview.html
+.. _run-java-sh recommendations: https://github.com/fabric8io-images/run-java-sh/blob/master/TUNING.md#recommandations
\ No newline at end of file

From 67db02ff0249720c47e3025820c30fb6d737ec83 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Wed, 24 Aug 2022 15:08:30 +0200
Subject: [PATCH 043/173] docs(ct-base): remove reference to not (yet) existing
 docs page

---
 doc/sphinx-guides/source/container/index.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/sphinx-guides/source/container/index.rst b/doc/sphinx-guides/source/container/index.rst
index 1bf86f16f43..801ded7d0a5 100644
--- a/doc/sphinx-guides/source/container/index.rst
+++ b/doc/sphinx-guides/source/container/index.rst
@@ -6,7 +6,6 @@ Container Guide
 .. toctree::
 
   base-image
-  app-image
 
 Running Dataverse software in containers is quite different than in a :doc:`classic installation <../installation/prep>`.
 

From d5f80754e0ebf1ed56d34c1d7dbbe3d5fdc49b4a Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Wed, 24 Aug 2022 17:38:29 +0200
Subject: [PATCH 044/173] docs(ct-base): add Docker Hub Eclipse Temurin tag
 search example

---
 doc/sphinx-guides/source/container/base-image.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst
index 4f441f79ad7..4333bf38d5c 100644
--- a/doc/sphinx-guides/source/container/base-image.rst
+++ b/doc/sphinx-guides/source/container/base-image.rst
@@ -43,6 +43,7 @@ Some additional notes, using Maven parameters to change the build and use ...:
 - ... a different Payara version: add ``-Dpayara.version=V.YYYY.R``.
 - | ... a different Temurin JRE version ``A``: add ``-Dtarget.java.version=A`` (i.e. ``11``, ``17``, ...).
   | *Note:* must resolve to an available Docker tag ``A-jre`` of Eclipse Temurin!
+    (See also `Docker Hub search example <https://hub.docker.com/_/eclipse-temurin/tags?page=1&name=11-jre>`_)
 - ... a different Java Distribution: add ``-Ddocker.buildArg.BASE_IMAGE="name:tag"`` with precise reference to an
   image available from local or remote (e. g. Docker Hub).
 

From 5e61241a27229fdbe7ce6fb7e84c520b609fdb33 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Fri, 26 Aug 2022 18:26:30 +0200
Subject: [PATCH 045/173] style(ct-base): incorporate requested changes by
 @pdurbin

- Change order of guides
- Remove unnecessary quotes from IQSS
- Add TOC to base image docs
- Add flag again about community support only to base image docs
---
 doc/sphinx-guides/source/container/base-image.rst | 14 ++++++++++++--
 doc/sphinx-guides/source/container/index.rst      |  5 +++--
 doc/sphinx-guides/source/index.rst                |  2 +-
 3 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst
index 4333bf38d5c..ac64323eeea 100644
--- a/doc/sphinx-guides/source/container/base-image.rst
+++ b/doc/sphinx-guides/source/container/base-image.rst
@@ -1,11 +1,21 @@
 Application Base Image
 ======================
 
+.. contents:: |toctitle|
+    :local:
+
 Within the main repository, you may find the base image's files at ``<git root>/modules/container-base``.
 This Maven module uses the `Maven Docker Plugin <https://dmp.fabric8.io>`_ to build and ship the image.
 
-Contents
-++++++++
+**NOTE: This image is created, maintained and supported by the Dataverse community on a best-effort basis.**
+IQSS will not offer you support how to deploy or run it, please reach out to the community for help on using it.
+You might be interested in taking a look at :doc:`../developers/containers`, linking you to some (community-based)
+efforts.
+
+
+
+Image Contents
+++++++++++++++
 
 The base image provides:
 
diff --git a/doc/sphinx-guides/source/container/index.rst b/doc/sphinx-guides/source/container/index.rst
index 801ded7d0a5..f6c99bfc19e 100644
--- a/doc/sphinx-guides/source/container/index.rst
+++ b/doc/sphinx-guides/source/container/index.rst
@@ -13,8 +13,9 @@ Both approaches have pros and cons. These days (2022) containers are very often
 but there is an ever rising move for running applications in the cloud using container technology.
 
 **NOTE:**
-**As the "Institute for Quantitative Social Sciences" at Harvard is running their installations in the classic
-deployment way, the container support is mostly created and maintained by the Dataverse community.**
+**As the Institute for Quantitative Social Sciences (IQSS) at Harvard is running their installations in the classic
+deployment way, the container support is mostly created and maintained by the Dataverse community on a best-effort
+basis.**
 
 This guide is *not* about installation on technology like Docker Swarm, Kubernetes, Rancher or other
 solutions to run containers in production. There is the `Dataverse on K8s project <https://k8s-docs.gdcc.io>`_ for this
diff --git a/doc/sphinx-guides/source/index.rst b/doc/sphinx-guides/source/index.rst
index f15a973544d..cbfafb419ab 100755
--- a/doc/sphinx-guides/source/index.rst
+++ b/doc/sphinx-guides/source/index.rst
@@ -18,8 +18,8 @@ These documentation guides are for the |version| version of Dataverse. To find g
   api/index
   installation/index
   developers/index
-  style/index
   container/index
+  style/index
 
 How the Guides Are Organized
 ----------------------------

From a3a70998b9fcacc1a96e8357d459cba489425785 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Fri, 26 Aug 2022 18:29:30 +0200
Subject: [PATCH 046/173] feat(ct-base): make image names configurable and
 rename

Add new Maven properties to choose a different Java base image
and change the name of the target base image when people customize it.
Also changes the build arg for the Java base image name.

With this, the image name changes to follow the same convention
as the Java base image.
---
 doc/sphinx-guides/source/container/base-image.rst | 4 +++-
 modules/container-base/pom.xml                    | 7 ++++---
 modules/container-base/src/main/docker/Dockerfile | 4 ++--
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst
index ac64323eeea..834381e6779 100644
--- a/doc/sphinx-guides/source/container/base-image.rst
+++ b/doc/sphinx-guides/source/container/base-image.rst
@@ -50,11 +50,13 @@ Or move to the module and execute:
 
 Some additional notes, using Maven parameters to change the build and use ...:
 
+- | ... a different image name and tag: add ``-Dbase.image=name:tag``.
+  | *Note:* default is ``gdcc/base:${target.java.version}-jre``
 - ... a different Payara version: add ``-Dpayara.version=V.YYYY.R``.
 - | ... a different Temurin JRE version ``A``: add ``-Dtarget.java.version=A`` (i.e. ``11``, ``17``, ...).
   | *Note:* must resolve to an available Docker tag ``A-jre`` of Eclipse Temurin!
     (See also `Docker Hub search example <https://hub.docker.com/_/eclipse-temurin/tags?page=1&name=11-jre>`_)
-- ... a different Java Distribution: add ``-Ddocker.buildArg.BASE_IMAGE="name:tag"`` with precise reference to an
+- ... a different Java Distribution: add ``-Djava.image="name:tag"`` with precise reference to an
   image available from local or remote (e. g. Docker Hub).
 
 
diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml
index 015ebba598d..f8e97bb4349 100644
--- a/modules/container-base/pom.xml
+++ b/modules/container-base/pom.xml
@@ -39,6 +39,8 @@
             <id>ct</id>
             <properties>
                 <packaging.type>docker-build</packaging.type>
+                <base.image>gdcc/base:${target.java.version}-jre</base.image>
+                <java.image>eclipse-temurin:${target.java.version}-jre</java.image>
             </properties>
             
             <build>
@@ -85,8 +87,7 @@
                             <images>
                                 <image>
                                     <alias>base</alias>
-                                    <name>%g/base:jdk${target.java.version}</name>
-                                    <registry>${ct.registry}</registry>
+                                    <name>${base.image}</name>
                                     <build>
                                         <buildx>
                                             <platforms>
@@ -96,7 +97,7 @@
                                         </buildx>
                                         <dockerFile>Dockerfile</dockerFile>
                                         <args>
-                                            <BASE_IMAGE>eclipse-temurin:${target.java.version}-jre</BASE_IMAGE>
+                                            <JAVA_IMAGE>${java.image}</JAVA_IMAGE>
                                         </args>
                                         <filter>@</filter>
                                         <assembly>
diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile
index ba459607826..6fdc790a21a 100644
--- a/modules/container-base/src/main/docker/Dockerfile
+++ b/modules/container-base/src/main/docker/Dockerfile
@@ -20,8 +20,8 @@
 #
 
 # Make the Java base image and version configurable (useful for trying newer Java versions and flavors)
-ARG BASE_IMAGE="eclipse-temurin:11-jre"
-FROM $BASE_IMAGE
+ARG JAVA_IMAGE="eclipse-temurin:11-jre"
+FROM $JAVA_IMAGE
 
 # Default payara ports to expose
 # 4848: admin console

From 06d31fde25c3bfa812339c0afad94b7a83e92e59 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Fri, 26 Aug 2022 18:34:40 +0200
Subject: [PATCH 047/173] fix(ct-base): make container build use install not
 package goal

By switching to `mvn install` instead of `mvn package`, we allow
the main image carrying the application to declare a dependency
on the container-base module (to make sure it get's built alongside,
as we might want to change the Payara version!)

This commits also adds the Maven install plugin to the parent POM
for versioning plus to the container-base POM for having the target available.
(This is a necessary workaround for a Maven Docker Plugin shortcoming.)
---
 .../source/container/base-image.rst           |  4 ++--
 modules/container-base/pom.xml                | 19 +++++++++++++++++++
 modules/dataverse-parent/pom.xml              |  6 ++++++
 3 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst
index 834381e6779..585fe1184e7 100644
--- a/doc/sphinx-guides/source/container/base-image.rst
+++ b/doc/sphinx-guides/source/container/base-image.rst
@@ -42,11 +42,11 @@ Assuming you have `Docker <https://docs.docker.com/engine/install/>`_, `Docker D
 
 Simply execute the Maven modules packaging target with activated "container profile. Either from the projects Git root:
 
-``mvn -Pct -f modules/container-base package``
+``mvn -Pct -f modules/container-base install``
 
 Or move to the module and execute:
 
-``cd modules/container-base && mvn -Pct package``
+``cd modules/container-base && mvn -Pct install``
 
 Some additional notes, using Maven parameters to change the build and use ...:
 
diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml
index f8e97bb4349..0e8f24a781b 100644
--- a/modules/container-base/pom.xml
+++ b/modules/container-base/pom.xml
@@ -108,6 +108,25 @@
                             </images>
                         </configuration>
                     </plugin>
+                    
+                    <!--
+                      This is here as a workaround to install the generated POM,
+                      so we can pick this modules build up as a dependency in others.
+                      
+                      See also https://github.com/fabric8io/docker-maven-plugin/issues/1537
+                    -->
+                    <plugin>
+                        <artifactId>maven-install-plugin</artifactId>
+                        <executions>
+                            <execution>
+                                <id>default-install</id>
+                                <phase>install</phase>
+                                <goals>
+                                    <goal>install</goal>
+                                </goals>
+                            </execution>
+                        </executions>
+                    </plugin>
                 </plugins>
             </build>
         </profile>
diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml
index eaa09b61bd7..411ce85b2fa 100644
--- a/modules/dataverse-parent/pom.xml
+++ b/modules/dataverse-parent/pom.xml
@@ -178,6 +178,7 @@
         <maven-jar-plugin.version>3.2.2</maven-jar-plugin.version>
         <maven-war-plugin.version>3.3.2</maven-war-plugin.version>
         <maven-dependency-plugin.version>3.2.0</maven-dependency-plugin.version>
+        <maven-install-plugin.version>3.0.0-M1</maven-install-plugin.version>
         <maven-surefire-plugin.version>3.0.0-M5</maven-surefire-plugin.version>
         <maven-failsafe-plugin.version>3.0.0-M5</maven-failsafe-plugin.version>
         <maven-assembly-plugin.version>3.3.0</maven-assembly-plugin.version>
@@ -226,6 +227,11 @@
                     <artifactId>maven-dependency-plugin</artifactId>
                     <version>${maven-dependency-plugin.version}</version>
                 </plugin>
+                <plugin>
+                    <groupId>org.apache.maven.plugins</groupId>
+                    <artifactId>maven-install-plugin</artifactId>
+                    <version>${maven-install-plugin.version}</version>
+                </plugin>
                 <plugin>
                     <groupId>org.apache.maven.plugins</groupId>
                     <artifactId>maven-surefire-plugin</artifactId>

From 98ad9361843519b3f904ecc3df5d7b877802c30a Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Fri, 26 Aug 2022 18:37:28 +0200
Subject: [PATCH 048/173] fix(ct-base): flatten container-base POM

By using the flattening POM plugin, the installed POM will
not carry references to the dataverse-parent module.
This reference is a) unnecessary and b) troublesome because of
the ${revision} hack. (And we do not provide it as a dependency
from Central/...)
---
 modules/container-base/.gitignore |  1 +
 modules/container-base/pom.xml    | 36 +++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+)
 create mode 100644 modules/container-base/.gitignore

diff --git a/modules/container-base/.gitignore b/modules/container-base/.gitignore
new file mode 100644
index 00000000000..d75620abf70
--- /dev/null
+++ b/modules/container-base/.gitignore
@@ -0,0 +1 @@
+.flattened-pom.xml
diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml
index 0e8f24a781b..cee3989661a 100644
--- a/modules/container-base/pom.xml
+++ b/modules/container-base/pom.xml
@@ -108,6 +108,42 @@
                             </images>
                         </configuration>
                     </plugin>
+    
+                    <!--
+                      Flatten the generated POM before installing it, so modules depending on us are not
+                      trying to lookup the parent POM, which isn't of much use to them anyway.
+                    -->
+                    <plugin>
+                        <groupId>org.codehaus.mojo</groupId>
+                        <artifactId>flatten-maven-plugin</artifactId>
+                        <version>1.2.7</version>
+                        <configuration>
+                            <updatePomFile>true</updatePomFile>
+                            <flattenMode>oss</flattenMode>
+                            <pomElements>
+                                <distributionManagement>remove</distributionManagement>
+                                <repositories>remove</repositories>
+                            </pomElements>
+                        </configuration>
+                        <executions>
+                            <!-- enable flattening -->
+                            <execution>
+                                <id>flatten</id>
+                                <phase>process-resources</phase>
+                                <goals>
+                                    <goal>flatten</goal>
+                                </goals>
+                            </execution>
+                            <!-- ensure proper cleanup -->
+                            <execution>
+                                <id>flatten.clean</id>
+                                <phase>clean</phase>
+                                <goals>
+                                    <goal>clean</goal>
+                                </goals>
+                            </execution>
+                        </executions>
+                    </plugin>
                     
                     <!--
                       This is here as a workaround to install the generated POM,

From a3e6e3d052325fb68f45a60f922072dec9b6b57a Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Fri, 26 Aug 2022 18:39:47 +0200
Subject: [PATCH 049/173] docs(ct-base): add notes about publishing and updates
 of the image

Also mentioning the Github Action secrets as requested by @pdurbin.
---
 .../source/container/base-image.rst           | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst
index 585fe1184e7..46b708980bc 100644
--- a/doc/sphinx-guides/source/container/base-image.rst
+++ b/doc/sphinx-guides/source/container/base-image.rst
@@ -225,8 +225,23 @@ The default ports that are exposed by this image are:
 
 
 
-Hints
-+++++
+Publishing and Updates
+++++++++++++++++++++++
+
+This image is sourced within the main upstream code repository of the Dataverse software. Development and maintenance
+happens there (again, by the community).
+
+To make reusing most simple, the image is built with a Github Action within the IQSS repository and then pushed
+to `Docker Hub gdcc/base repository <https://hub.docker.com/r/gdcc/base>`_. It is built and pushed on every edit to
+its sources plus uncached scheduled nightly builds to make sure security updates are finding their way in.
+
+Note: for the Github Action to be able to push to Docker Hub, two repository secrets
+(DOCKERHUB_USERNAME, DOCKERHUB_TOKEN) have been added by IQSS admins to their repository.
+
+
+
+Other Hints
++++++++++++
 
 By default, ``domain1`` is enabled to use the ``G1GC`` garbage collector.
 

From 520337dd2159c7fbe24644cfa356813bdecd49b2 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Fri, 26 Aug 2022 18:40:40 +0200
Subject: [PATCH 050/173] fix(parent): add container-base as submodule

Including here to add to reactor builds.
---
 modules/dataverse-parent/pom.xml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml
index 411ce85b2fa..5a32ee5794a 100644
--- a/modules/dataverse-parent/pom.xml
+++ b/modules/dataverse-parent/pom.xml
@@ -13,6 +13,7 @@
     <modules>
         <module>../../pom.xml</module>
         <module>../../scripts/zipdownload</module>
+        <module>../container-base</module>
     </modules>
     
     <!-- Transitive dependencies, bigger library "bill of materials" (BOM) and

From 4642a7cc20f818de57f3774939ed8dacdf9fa4df Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Fri, 26 Aug 2022 18:42:18 +0200
Subject: [PATCH 051/173] feat(ct-base): remove expired CA certificates at
 build time

The base image uses Payara Community edition which might come
with expired CA certificates. These trigger ugly log messages
on startup. Removing them at build time helps with this.
---
 .../container-base/src/main/docker/Dockerfile |  2 +
 .../docker/scripts/removeExpiredCaCerts.sh    | 42 +++++++++++++++++++
 2 files changed, 44 insertions(+)
 create mode 100644 modules/container-base/src/main/docker/scripts/removeExpiredCaCerts.sh

diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile
index 6fdc790a21a..9cbb0cb0689 100644
--- a/modules/container-base/src/main/docker/Dockerfile
+++ b/modules/container-base/src/main/docker/Dockerfile
@@ -170,6 +170,8 @@ RUN true && \
     ### CLEANUP
     # Stop domain
     ${ASADMIN} stop-domain "${DOMAIN_NAME}" && \
+    # Cleanup old CA certificates to avoid unnecessary log clutter during startup
+    ${SCRIPT_DIR}/removeExpiredCaCerts.sh && \
     # Delete generated files
     rm -rf \
         "/tmp/password-change-file.txt" \
diff --git a/modules/container-base/src/main/docker/scripts/removeExpiredCaCerts.sh b/modules/container-base/src/main/docker/scripts/removeExpiredCaCerts.sh
new file mode 100644
index 00000000000..205a9eda5d7
--- /dev/null
+++ b/modules/container-base/src/main/docker/scripts/removeExpiredCaCerts.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Remove expired certs from a keystore
+# ------------------------------------
+# This script was copied from https://gist.github.com/damkh/a4a0d74891f92b0285a3853418357c1e (thanks @damkh)
+# and slightly modified to be used within our scenario and comply with shellcheck good practices.
+
+set -euo pipefail
+
+KEYSTORE="${DOMAIN_DIR}/config/cacerts.jks"
+keytool -list -v -keystore "${KEYSTORE}" -storepass changeit 2>/dev/null | \
+    grep -i 'alias\|until' > aliases.txt
+
+i=1
+# Split dates and aliases to different arrays
+while read -r p; do
+    # uneven lines are dates, evens are aliases
+    if ! ((i % 2)); then
+        arr_date+=("$p")
+    else
+        arr_cn+=("$p")
+    fi
+    i=$((i+1))
+done < aliases.txt
+i=0
+
+# Parse until-dates ->
+# convert until-dates to "seconds from 01-01-1970"-format ->
+# compare until-dates with today-date ->
+# delete expired aliases
+for date_idx in $(seq 0 $((${#arr_date[*]}-1)));
+do
+    a_date=$(echo "${arr_date[$date_idx]}" | awk -F"until: " '{print $2}')
+    if [ "$(date +%s --date="$a_date")" -lt "$(date +%s)" ];
+    then
+        echo "removing ${arr_cn[$i]} expired: $a_date"
+        alias_name=$(echo "${arr_cn[$i]}" | awk -F"name: " '{print $2}')
+        keytool -delete -alias "$alias_name" -keystore "${KEYSTORE}" -storepass changeit
+    fi
+    i=$((i+1))
+done
+echo "Done."
\ No newline at end of file

From 297b73986888bc68869134e6214c2c2edc1e25f2 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Fri, 26 Aug 2022 23:24:30 +0200
Subject: [PATCH 052/173] feat(ct-base): make Payara UID/GID a configurable
 buildarg

---
 doc/sphinx-guides/source/container/base-image.rst | 2 +-
 modules/container-base/pom.xml                    | 4 ++++
 modules/container-base/src/main/docker/Dockerfile | 6 ++++--
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst
index 46b708980bc..6bfd9f4116b 100644
--- a/doc/sphinx-guides/source/container/base-image.rst
+++ b/doc/sphinx-guides/source/container/base-image.rst
@@ -58,7 +58,7 @@ Some additional notes, using Maven parameters to change the build and use ...:
     (See also `Docker Hub search example <https://hub.docker.com/_/eclipse-temurin/tags?page=1&name=11-jre>`_)
 - ... a different Java Distribution: add ``-Djava.image="name:tag"`` with precise reference to an
   image available from local or remote (e. g. Docker Hub).
-
+- ... a different UID/GID for the ``payara`` user/group: add ``-Dbase.image.uid=1234`` (or ``.gid``)
 
 
 Tunables
diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml
index cee3989661a..12eb3b137ff 100644
--- a/modules/container-base/pom.xml
+++ b/modules/container-base/pom.xml
@@ -41,6 +41,8 @@
                 <packaging.type>docker-build</packaging.type>
                 <base.image>gdcc/base:${target.java.version}-jre</base.image>
                 <java.image>eclipse-temurin:${target.java.version}-jre</java.image>
+                <base.image.uid>1000</base.image.uid>
+                <base.image.gid>1000</base.image.gid>
             </properties>
             
             <build>
@@ -98,6 +100,8 @@
                                         <dockerFile>Dockerfile</dockerFile>
                                         <args>
                                             <JAVA_IMAGE>${java.image}</JAVA_IMAGE>
+                                            <UID>${base.image.uid}</UID>
+                                            <GID>${base.image.gid}</GID>
                                         </args>
                                         <filter>@</filter>
                                         <assembly>
diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile
index 9cbb0cb0689..c6e20c36b94 100644
--- a/modules/container-base/src/main/docker/Dockerfile
+++ b/modules/container-base/src/main/docker/Dockerfile
@@ -70,6 +70,8 @@ ARG PKGS="jq imagemagick curl unzip wget acl dirmngr gpg lsof procps netcat tini
 ARG ASADMIN="${PAYARA_DIR}/bin/asadmin --user=${ADMIN_USER} --passwordfile=${PASSWORD_FILE}"
 
 ### PART 1: SYSTEM ###
+ARG UID=1000
+ARG GID=1000
 USER root
 WORKDIR /
 SHELL ["/bin/bash", "-euo", "pipefail", "-c"]
@@ -78,8 +80,8 @@ RUN true && \
     mkdir -p "${HOME_DIR}" "${PAYARA_DIR}" "${DEPLOY_DIR}" "${CONFIG_DIR}" "${SCRIPT_DIR}" && \
     mkdir -p "${DOCROOT_DIR}" "${SECRETS_DIR}" "${DUMPS_DIR}" && \
     # Create user
-    addgroup --gid 1000 payara && \
-    adduser --system --uid 1000 --no-create-home --shell /bin/bash --home "${HOME_DIR}" --gecos "" --ingroup payara payara && \
+    addgroup --gid ${GID} payara && \
+    adduser --system --uid ${UID} --no-create-home --shell /bin/bash --home "${HOME_DIR}" --gecos "" --ingroup payara payara && \
     echo payara:payara | chpasswd && \
     # Set permissions
     chown -R payara: "${HOME_DIR}" && \

From 783f83cae87dc4d5938a08fe8e48be3b9330aacd Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Sat, 27 Aug 2022 00:14:18 +0200
Subject: [PATCH 053/173] feat(ct-base): harden domain, disable hot reload,
 enable dev mode

The production-ready domain should have autodeploy and hot reloading
turned off for security reasons. On the other hand, hot reload is
very useful for development. Introducing an env variable
 ENABLE_RELOAD to enable it on request.
---
 doc/sphinx-guides/source/container/base-image.rst        | 5 +++++
 modules/container-base/src/main/docker/Dockerfile        | 6 +++++-
 .../docker/scripts/init_1_generate_devmode_commands.sh   | 9 ++++++++-
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst
index 6bfd9f4116b..88c80bcade0 100644
--- a/doc/sphinx-guides/source/container/base-image.rst
+++ b/doc/sphinx-guides/source/container/base-image.rst
@@ -148,6 +148,11 @@ provides. These are mostly based on environment variables (very common with cont
       - Bool, ``0|1``
       - Enable the "Java Debug Wire Protocol" to attach a remote debugger to the JVM in this container.
         Listens on port 9009 when enabled. Search the internet for numerous tutorials to use it.
+    * - ``ENABLE_RELOAD``
+      - ``0``
+      - Bool, ``0|1``
+      - Enable the dynamic "hot" reloads of files when changed in a deployment. Useful for development,
+        when new artifacts are copied into the running domain.
     * - ``DATAVERSE_HTTP_TIMEOUT``
       - ``900``
       - Seconds
diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile
index c6e20c36b94..779ef6a9721 100644
--- a/modules/container-base/src/main/docker/Dockerfile
+++ b/modules/container-base/src/main/docker/Dockerfile
@@ -62,7 +62,8 @@ ENV PATH="${PATH}:${PAYARA_DIR}/bin" \
     ENABLE_DUMPS=0 \
     JVM_DUMPS_ARG="-XX:+HeapDumpOnOutOfMemoryError" \
     ENABLE_JMX=1 \
-    ENABLE_JDWP=0
+    ENABLE_JDWP=0 \
+    ENABLE_RELOAD=0
 
 ARG JATTACH_VERSION="v2.1"
 ARG JATTACH_CHECKSUM="07885fdc782e02e7302c6d190f54c3930afa10a38140365adf54076ec1086a8e"
@@ -143,6 +144,9 @@ RUN true && \
     ${ASADMIN} create-jvm-options '-XX\:MetaspaceSize=${ENV=MEM_METASPACE_SIZE}' && \
     ${ASADMIN} create-jvm-options '-XX\:MaxMetaspaceSize=${ENV=MEM_MAX_METASPACE_SIZE}' && \
     ${ASADMIN} create-jvm-options '-XX\:+IgnoreUnrecognizedVMOptions' && \
+    # Disable autodeploy and hot reload
+    ${ASADMIN} set configs.config.server-config.admin-service.das-config.dynamic-reload-enabled="false" && \
+    ${ASADMIN} set configs.config.server-config.admin-service.das-config.autodeploy-enabled="false" && \
     # Enlarge thread pools
     ${ASADMIN} set server-config.thread-pools.thread-pool.http-thread-pool.max-thread-pool-size="50" && \
     ${ASADMIN} set server-config.thread-pools.thread-pool.http-thread-pool.max-queue-size="" && \
diff --git a/modules/container-base/src/main/docker/scripts/init_1_generate_devmode_commands.sh b/modules/container-base/src/main/docker/scripts/init_1_generate_devmode_commands.sh
index 9d71e3bb81b..6924ff46c87 100644
--- a/modules/container-base/src/main/docker/scripts/init_1_generate_devmode_commands.sh
+++ b/modules/container-base/src/main/docker/scripts/init_1_generate_devmode_commands.sh
@@ -14,6 +14,7 @@ set -euo pipefail
 # 0. Init variables
 ENABLE_JMX=${ENABLE_JMX:-0}
 ENABLE_JDWP=${ENABLE_JDWP:-0}
+ENABLE_RELOAD=${ENABLE_RELOAD:-0}
 
 DV_PREBOOT=${PAYARA_DIR}/dataverse_preboot
 echo "# Dataverse preboot configuration for Payara" > "${DV_PREBOOT}"
@@ -51,7 +52,13 @@ if [ "${ENABLE_JDWP}" = "1" ]; then
   export PAYARA_ARGS="${PAYARA_ARGS} --debug=true"
 fi
 
-# 3. Add the commands to the existing preboot file, but insert BEFORE deployment
+# 3. Enable hot reload
+if [ "${ENABLE_RELOAD}" = "1" ]; then
+  echo "Enabling hot reload of deployments."
+  echo "set configs.config.server-config.admin-service.das-config.dynamic-reload-enabled=true" >> "${DV_PREBOOT}"
+fi
+
+# 4. Add the commands to the existing preboot file, but insert BEFORE deployment
 TMP_PREBOOT=$(mktemp)
 cat "${DV_PREBOOT}" "${PREBOOT_COMMANDS}" > "${TMP_PREBOOT}"
 mv "${TMP_PREBOOT}" "${PREBOOT_COMMANDS}"

From 6d52fefbada8ae4b0570d0a9af58309f3ea371fe Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Sat, 27 Aug 2022 01:14:39 +0200
Subject: [PATCH 054/173] refactor(ct-base): enable JMX, AMX and tune
 monitoring levels

Instead of acivating the JMX settings by default, switch back to off.
Extending this with enabling AMX and with all the log levels set to HIGH,
this might cause a huge performance overhead.
Also lacking is a MicroProfile Metrics mapping to retrieve JMX data
via the /metrics endpoint for collection.
---
 doc/sphinx-guides/source/container/base-image.rst        | 9 +++++++--
 modules/container-base/src/main/docker/Dockerfile        | 2 +-
 .../docker/scripts/init_1_generate_devmode_commands.sh   | 4 ++--
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst
index 88c80bcade0..d092171b3be 100644
--- a/doc/sphinx-guides/source/container/base-image.rst
+++ b/doc/sphinx-guides/source/container/base-image.rst
@@ -140,9 +140,11 @@ provides. These are mostly based on environment variables (very common with cont
       - String
       - Can be fine tuned for more grained controls of dumping behaviour.
     * - ``ENABLE_JMX``
-      - ``1``
+      - ``0``
       - Bool, ``0|1``
-      - Enable JMX - Payara enables this by default, hard to deactivate.
+      - Allow insecure JMX connections, enable AMX and tune all JMX monitoring levels to ``HIGH``.
+        See also `Payara Docs - Basic Monitoring <https://docs.payara.fish/community/docs/Technical%20Documentation/Payara%20Server%20Documentation/Logging%20and%20Monitoring/Monitoring%20Service/Basic%20Monitoring%20Configuration.html>`_.
+        A basic JMX service is enabled by default in Payara, exposing basic JVM MBeans, but especially no Payara MBeans.
     * - ``ENABLE_JDWP``
       - ``0``
       - Bool, ``0|1``
@@ -256,6 +258,9 @@ you can read about those in a few places like https://developers.redhat.com/arti
 https://www.eclipse.org/openj9/docs/xxusecontainersupport, etc. The other memory defaults are inspired
 from `run-java-sh recommendations`_.
 
+*Note: the build process used the newer ``buildx`` feature of Docker to provide multiarch images.*
+
+
 
 .. _Pre/postboot script docs: https://docs.payara.fish/community/docs/Technical%20Documentation/Payara%20Micro%20Documentation/Payara%20Micro%20Configuration%20and%20Management/Micro%20Management/Asadmin%20Commands/Pre%20and%20Post%20Boot%20Commands.html
 .. _MicroProfile Config Sources: https://docs.payara.fish/community/docs/Technical%20Documentation/MicroProfile/Config/Overview.html
diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile
index 779ef6a9721..d0f4f9e0323 100644
--- a/modules/container-base/src/main/docker/Dockerfile
+++ b/modules/container-base/src/main/docker/Dockerfile
@@ -61,7 +61,7 @@ ENV PATH="${PATH}:${PAYARA_DIR}/bin" \
     # Make heap dumps on OOM appear in DUMPS_DIR
     ENABLE_DUMPS=0 \
     JVM_DUMPS_ARG="-XX:+HeapDumpOnOutOfMemoryError" \
-    ENABLE_JMX=1 \
+    ENABLE_JMX=0 \
     ENABLE_JDWP=0 \
     ENABLE_RELOAD=0
 
diff --git a/modules/container-base/src/main/docker/scripts/init_1_generate_devmode_commands.sh b/modules/container-base/src/main/docker/scripts/init_1_generate_devmode_commands.sh
index 6924ff46c87..bb0984332f7 100644
--- a/modules/container-base/src/main/docker/scripts/init_1_generate_devmode_commands.sh
+++ b/modules/container-base/src/main/docker/scripts/init_1_generate_devmode_commands.sh
@@ -23,8 +23,9 @@ echo "# Dataverse preboot configuration for Payara" > "${DV_PREBOOT}"
 # See also https://blog.payara.fish/monitoring-payara-server-with-jconsole
 # To still use it, you can use a sidecar container proxying or using JMX via localhost without SSL.
 if [ "${ENABLE_JMX}" = "1" ]; then
-  echo "Enabling unsecured JMX on 0.0.0.0:8686. You'll need a sidecar for this, as access is allowed from same machine only (without SSL)."
+  echo "Enabling unsecured JMX on 0.0.0.0:8686, enabling AMX and tuning monitoring levels to HIGH. You'll need a sidecar for this, as access is allowed from same machine only (without SSL)."
   { \
+    echo "set configs.config.server-config.amx-configuration.enabled=true"
     echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.jvm=HIGH"
     echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.connector-service=HIGH"
     echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.connector-connection-pool=HIGH"
@@ -41,7 +42,6 @@ if [ "${ENABLE_JMX}" = "1" ]; then
     echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.web-container=HIGH"
     echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.orb=HIGH"
     echo "set configs.config.server-config.monitoring-service.module-monitoring-levels.deployment=HIGH"
-    #echo "set configs.config.server-config.admin-service.jmx-connector.system.address=127.0.0.1"
     echo "set configs.config.server-config.admin-service.jmx-connector.system.security-enabled=false"
   } >> "${DV_PREBOOT}"
 fi

From 92b97c06174d26d5e0e68ca06541c9c23b2dc832 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Sat, 27 Aug 2022 01:52:08 +0200
Subject: [PATCH 055/173] feat(ct-base): incorporate more production level
 domain tuning

Adapting most of
https://blog.payara.fish/fine-tuning-payara-server-5-in-production here
---
 modules/container-base/src/main/docker/Dockerfile | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile
index d0f4f9e0323..5fa331c2192 100644
--- a/modules/container-base/src/main/docker/Dockerfile
+++ b/modules/container-base/src/main/docker/Dockerfile
@@ -112,6 +112,7 @@ COPY --chown=payara:payara maven/appserver ${PAYARA_DIR}/
 COPY --chown=payara:payara maven/scripts ${SCRIPT_DIR}/
 
 # Configure the domain to be container and production ready
+# -- This is mostly inherited from the "production domain template", experience with Dataverse and https://blog.payara.fish/fine-tuning-payara-server-5-in-production
 RUN true && \
     # Set admin password
     echo "AS_ADMIN_PASSWORD=" > /tmp/password-change-file.txt && \
@@ -140,6 +141,7 @@ RUN true && \
     ### PRODUCTION READINESS
     ${ASADMIN} create-jvm-options '-XX\:+UseG1GC' && \
     ${ASADMIN} create-jvm-options '-XX\:+UseStringDeduplication' && \
+    ${ASADMIN} create-jvm-options '-XX\:+DisableExplicitGC' && \
     ${ASADMIN} create-jvm-options '-XX\:MaxGCPauseMillis=${ENV=MEM_MAX_GC_PAUSE_MILLIS}' && \
     ${ASADMIN} create-jvm-options '-XX\:MetaspaceSize=${ENV=MEM_METASPACE_SIZE}' && \
     ${ASADMIN} create-jvm-options '-XX\:MaxMetaspaceSize=${ENV=MEM_MAX_METASPACE_SIZE}' && \
@@ -156,8 +158,12 @@ RUN true && \
     ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-2.http.file-cache.enabled="true" && \
     ${ASADMIN} set default-config.network-config.protocols.protocol.http-listener-1.http.file-cache.enabled="true" && \
     ${ASADMIN} set default-config.network-config.protocols.protocol.http-listener-2.http.file-cache.enabled="true" && \
-    # Enlarge EJB pools (cannot do this for server-config as set does not create new entries)
+    # Disable the HTTPS listener (we are always fronting our appservers with a reverse proxy handling SSL)
+    ${ASADMIN} set configs.config.server-config.network-config.network-listeners.network-listener.http-listener-2.enabled="false" && \
+    # Enlarge and tune EJB pools (cannot do this for server-config as set does not create new entries) \
+    ${ASADMIN} set default-config.ejb-container.pool-resize-quantity="2" && \
     ${ASADMIN} set default-config.ejb-container.max-pool-size="128" && \
+    ${ASADMIN} set default-config.ejb-container.steady-pool-size="10" && \
     # Misc settings
     ${ASADMIN} create-system-properties fish.payara.classloading.delegate="false" && \
     ${ASADMIN} create-system-properties jersey.config.client.readTimeout="300000" && \
@@ -176,6 +182,8 @@ RUN true && \
     ### CLEANUP
     # Stop domain
     ${ASADMIN} stop-domain "${DOMAIN_NAME}" && \
+    # Disable JSP servlet dynamic reloads \
+    sed -i 's#<servlet-class>org.apache.jasper.servlet.JspServlet</servlet-class>#<servlet-class>org.apache.jasper.servlet.JspServlet</servlet-class>\n    <init-param>\n      <param-name>development</param-name>\n      <param-value>false</param-value>\n    </init-param>\n    <init-param>\n      <param-name>genStrAsCharArray</param-name>\n      <param-value>true</param-value>\n    </init-param>#' "${DOMAIN_DIR}/config/default-web.xml" && \
     # Cleanup old CA certificates to avoid unnecessary log clutter during startup
     ${SCRIPT_DIR}/removeExpiredCaCerts.sh && \
     # Delete generated files

From 934e42e3a11c4cc4fb1dc13e848f95c6f6e279d5 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Mon, 29 Aug 2022 07:50:56 +0200
Subject: [PATCH 056/173] perf(ct-base): disable HTTPS listener port 8181

By disabling the unnecessary HTTPS listener, we'll save
some memory and CPU cycles on startup
---
 doc/sphinx-guides/source/container/base-image.rst | 3 ++-
 modules/container-base/src/main/docker/Dockerfile | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst
index d092171b3be..524ef8a7fbe 100644
--- a/doc/sphinx-guides/source/container/base-image.rst
+++ b/doc/sphinx-guides/source/container/base-image.rst
@@ -225,11 +225,12 @@ Exposed Ports
 The default ports that are exposed by this image are:
 
 - 8080 - HTTP listener
-- 8181 - HTTPS listener
 - 4848 - Admin Service HTTPS listener
 - 8686 - JMX listener
 - 9009 - "Java Debug Wire Protocol" port (when ``ENABLE_JDWP=1``)
 
+The HTTPS listener (on port 8181) becomes deactivated during the build, as we will always need to reverse-proxy the
+application server and handle SSL/TLS termination at this point. Save the memory and some CPU cycles!
 
 
 Publishing and Updates
diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile
index 5fa331c2192..44040cb4b5d 100644
--- a/modules/container-base/src/main/docker/Dockerfile
+++ b/modules/container-base/src/main/docker/Dockerfile
@@ -27,9 +27,9 @@ FROM $JAVA_IMAGE
 # 4848: admin console
 # 9009: debug port (JDWP)
 # 8080: http
-# 8181: https
+# 8181: https - but http-listener-2 is disabled here!
 # 8686: JMX
-EXPOSE 4848 9009 8080 8181 8686
+EXPOSE 4848 9009 8080 8686
 
 ENV HOME_DIR="/opt/payara"
 ENV PAYARA_DIR="${HOME_DIR}/appserver" \

From 3c71f31b3da990451ca3376f86e7a22c796aeff8 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Mon, 29 Aug 2022 09:39:48 +0200
Subject: [PATCH 057/173] ci(ct-base): add base image scripts to shellcheck CI
 job

---
 .github/workflows/shellspec.yml                              | 2 ++
 modules/container-base/src/main/docker/scripts/entrypoint.sh | 1 +
 2 files changed, 3 insertions(+)

diff --git a/.github/workflows/shellspec.yml b/.github/workflows/shellspec.yml
index 7a56b8c2f7d..2b127a7be5c 100644
--- a/.github/workflows/shellspec.yml
+++ b/.github/workflows/shellspec.yml
@@ -4,11 +4,13 @@ on:
         paths:
             - tests/shell/**
             - conf/solr/**
+            - modules/container-base/**
             # add more when more specs are written relying on data
     pull_request:
         paths:
             - tests/shell/**
             - conf/solr/**
+            - modules/container-base/**
             # add more when more specs are written relying on data
 env:
     SHELLSPEC_VERSION: 0.28.1
diff --git a/modules/container-base/src/main/docker/scripts/entrypoint.sh b/modules/container-base/src/main/docker/scripts/entrypoint.sh
index 6f71dfe013c..a12458d008b 100644
--- a/modules/container-base/src/main/docker/scripts/entrypoint.sh
+++ b/modules/container-base/src/main/docker/scripts/entrypoint.sh
@@ -7,6 +7,7 @@
 ##########################################################################################################
 
 for f in "${SCRIPT_DIR}"/init_* "${SCRIPT_DIR}"/init.d/*; do
+      # shellcheck disable=SC1090
       case "$f" in
         *.sh)  echo "[Entrypoint] running $f"; . "$f" ;;
         *)     echo "[Entrypoint] ignoring $f" ;;

From 270b0648f54f3ef4484bd772b0faa0fb2a7a4d17 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Mon, 29 Aug 2022 09:51:37 +0200
Subject: [PATCH 058/173] docs(ct-base): minor docs precision in Dockerfile

---
 modules/container-base/src/main/docker/Dockerfile | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile
index 44040cb4b5d..caec4ee6619 100644
--- a/modules/container-base/src/main/docker/Dockerfile
+++ b/modules/container-base/src/main/docker/Dockerfile
@@ -16,7 +16,9 @@
 #
 # We are not using upstream Payara images because:
 #  - Using same base image as Solr (https://hub.docker.com/_/solr) is reducing pulls
-#  - Their image is less optimised for production usage by design choices
+#  - Their image is less optimised for production usage and Dataverse by design choices
+#  - We provide multi-arch images
+#  - We provide some tweaks for development and monitoring
 #
 
 # Make the Java base image and version configurable (useful for trying newer Java versions and flavors)

From c9f3e55559676762076da0124eaf02e4989e22cb Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Mon, 29 Aug 2022 17:24:04 +0200
Subject: [PATCH 059/173] ci(ct-base): add action to build base container image

This will not (yet?) push to Docker Hub, as this might not be the
best of all ideas after all. But it ensures the build doesn't fail
on changes to the sources.
---
 .github/workflows/container_base_push.yml | 54 +++++++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100644 .github/workflows/container_base_push.yml

diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml
new file mode 100644
index 00000000000..1ca54497ecf
--- /dev/null
+++ b/.github/workflows/container_base_push.yml
@@ -0,0 +1,54 @@
+---
+name: Container Base Module
+
+on:
+    push:
+        branches:
+            - 'develop'
+        paths:
+            - 'modules/container-base/**'
+            - 'modules/dataverse-parent/pom.xml'
+    pull_request:
+        branches:
+            - 'develop'
+        paths:
+            - 'modules/container-base/**'
+            - 'modules/dataverse-parent/pom.xml'
+
+
+jobs:
+    build:
+        name: Build image
+        runs-on: ubuntu-latest
+        permissions:
+            contents: read
+            packages: read
+        strategy:
+            matrix:
+                jdk: [ '11' ]
+
+        steps:
+            - name: Checkout repository
+              uses: actions/checkout@v2
+
+            - name: Set up JDK ${{ matrix.jdk }}
+              uses: actions/setup-java@v2
+              with:
+                  java-version: ${{ matrix.jdk }}
+                  distribution: 'adopt'
+            - name: Cache Maven packages
+              uses: actions/cache@v2
+              with:
+                  path: ~/.m2
+                  key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
+                  restore-keys: ${{ runner.os }}-m2
+
+            - name: Build base container image
+              run: mvn -f modules/container-base -Pct package -Dtarget.java.version=${{ matrix.jdk }}
+
+            - if: ${{ github.event_name == 'push' }} # run only if this is a push - PRs have no access to secrets
+              name: Log in to the Container registry
+              uses: docker/login-action@v1
+              with:
+                  username: ${{ secrets.DOCKERHUB_USERNAME }}
+                  password: ${{ secrets.DOCKERHUB_TOKEN }}
\ No newline at end of file

From e144dcd099a870cb77309b6856cd6ef06366dd7b Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Mon, 29 Aug 2022 17:34:21 +0200
Subject: [PATCH 060/173] ci(ct-base): add multiarch caps to base image action

---
 .github/workflows/container_base_push.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml
index 1ca54497ecf..71217d06ef8 100644
--- a/.github/workflows/container_base_push.yml
+++ b/.github/workflows/container_base_push.yml
@@ -43,6 +43,8 @@ jobs:
                   key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
                   restore-keys: ${{ runner.os }}-m2
 
+            - name: Set up QEMU for multi-arch builds
+              uses: docker/setup-qemu-action@v2
             - name: Build base container image
               run: mvn -f modules/container-base -Pct package -Dtarget.java.version=${{ matrix.jdk }}
 

From d99921fd8e76847ef24d29ac7dc6f6b30a51f64f Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Tue, 30 Aug 2022 08:46:13 +0200
Subject: [PATCH 061/173] ci(ct-base): limit base image build to IQSS upstream

Avoids unnecessary tries to push things when people develop in their
forked repos.
---
 .github/workflows/container_base_push.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml
index 71217d06ef8..fc23b30d8ad 100644
--- a/.github/workflows/container_base_push.yml
+++ b/.github/workflows/container_base_push.yml
@@ -26,6 +26,8 @@ jobs:
         strategy:
             matrix:
                 jdk: [ '11' ]
+        # Only run in upstream repo - avoid unnecessary runs in forks
+        if: ${{ github.repository_owner == 'IQSS' }}
 
         steps:
             - name: Checkout repository

From f6ccd064e155013493df94fdcad58e4d62b3e45e Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Wed, 31 Aug 2022 01:53:39 +0200
Subject: [PATCH 062/173] chore(ct-base): remove unused property from parent
 POM

---
 modules/dataverse-parent/pom.xml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml
index 5a32ee5794a..7a3b71fb68c 100644
--- a/modules/dataverse-parent/pom.xml
+++ b/modules/dataverse-parent/pom.xml
@@ -187,7 +187,6 @@
         
         <!-- Container related -->
         <fabric8-dmp.version>0.40.2</fabric8-dmp.version>
-        <ct.registry>ghcr.io</ct.registry>
     </properties>
     
     <pluginRepositories>

From 64f84ea461d0fc8d1e4147b1bdcb8b86c2bafcd0 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Wed, 31 Aug 2022 01:56:51 +0200
Subject: [PATCH 063/173] style(ct-base): make up base image name from tag and
 add default

With defaulting to develop, we rest on using any build of the image during
experimentation etc to go with a (local) develop tag. Removing
the Java version from the tag makes it easier to use and reflects
the nature of it. It aligns image builds with the release schema
of the actual application while still allowing for experiments
and having different sources of truth for released and develop code.
---
 modules/container-base/pom.xml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml
index 12eb3b137ff..67e2c2f9911 100644
--- a/modules/container-base/pom.xml
+++ b/modules/container-base/pom.xml
@@ -39,7 +39,8 @@
             <id>ct</id>
             <properties>
                 <packaging.type>docker-build</packaging.type>
-                <base.image>gdcc/base:${target.java.version}-jre</base.image>
+                <base.image>gdcc/base:${base.image.tag}</base.image>
+                <base.image.tag>develop</base.image.tag>
                 <java.image>eclipse-temurin:${target.java.version}-jre</java.image>
                 <base.image.uid>1000</base.image.uid>
                 <base.image.gid>1000</base.image.gid>

From 5a986af6cc7651fd43ec5a4207349dab17b6651e Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Wed, 31 Aug 2022 01:57:16 +0200
Subject: [PATCH 064/173] chore(deps): make container profile use Payara
 5.2022.3

---
 modules/dataverse-parent/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml
index 7a3b71fb68c..86b46817635 100644
--- a/modules/dataverse-parent/pom.xml
+++ b/modules/dataverse-parent/pom.xml
@@ -340,7 +340,7 @@
                     See also: https://github.com/IQSS/dataverse/issues/8048
                     See also: https://github.com/payara/Payara/issues/5368
                 -->
-                <payara.version>5.2022.2</payara.version>
+                <payara.version>5.2022.3</payara.version>
             </properties>
     
             <build>

From 65f9d6356b8caca3ddd54e323c838e6b9749f3cc Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Wed, 31 Aug 2022 02:02:32 +0200
Subject: [PATCH 065/173] feat(ct-base): enable base image pushes for master
 and develop branch

- Make pushes to develop or master branch release a container image
  to Docker Hub by default (can be changed / extended).
- Defaulting to the develop tag by default makes it more reusable for
  depending workflows based on pull requests.
- Moving all multi-arch building to only happen on pushes,
  as it will be done during push/deploy phase only and those need
  credentials only avail in git push context running at repo owner of
  CI action.
- Removing the Java version matrix parameter, too - we are gonna stick
  with what is default for releasing the images as they are meant to
  be a somewhat reliable base. It's still open for experiments.
---
 .github/workflows/container_base_push.yml | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml
index fc23b30d8ad..82c7a376ae0 100644
--- a/.github/workflows/container_base_push.yml
+++ b/.github/workflows/container_base_push.yml
@@ -5,16 +5,21 @@ on:
     push:
         branches:
             - 'develop'
+            - 'master'
         paths:
             - 'modules/container-base/**'
             - 'modules/dataverse-parent/pom.xml'
     pull_request:
         branches:
             - 'develop'
+            - 'master'
         paths:
             - 'modules/container-base/**'
             - 'modules/dataverse-parent/pom.xml'
 
+env:
+    IMAGE_TAG: develop
+    REGISTRY: docker.io
 
 jobs:
     build:
@@ -45,14 +50,21 @@ jobs:
                   key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
                   restore-keys: ${{ runner.os }}-m2
 
-            - name: Set up QEMU for multi-arch builds
-              uses: docker/setup-qemu-action@v2
-            - name: Build base container image
-              run: mvn -f modules/container-base -Pct package -Dtarget.java.version=${{ matrix.jdk }}
+            - name: Build base container image with local architecture
+              run: mvn -f modules/container-base -Pct package
 
             - if: ${{ github.event_name == 'push' }} # run only if this is a push - PRs have no access to secrets
               name: Log in to the Container registry
               uses: docker/login-action@v1
               with:
                   username: ${{ secrets.DOCKERHUB_USERNAME }}
-                  password: ${{ secrets.DOCKERHUB_TOKEN }}
\ No newline at end of file
+                  password: ${{ secrets.DOCKERHUB_TOKEN }}
+            - if: ${{ github.event_name == 'push' }} # run only if this is a push - multi-arch makes no sense with PR
+              name: Set up QEMU for multi-arch builds
+              uses: docker/setup-qemu-action@v2
+            - name: Re-set image tag based on branch
+              if: ${{ github.ref == 'master' }}
+              run: echo "IMAGE_TAG=release"
+            - if: ${{ github.event_name == 'push' }} # run only if this is a push - tag push will only succeed in upstream
+              name: Deploy multi-arch base container image to Docker Hub
+              run: mvn -f modules/container-base -Pct deploy -Dbase.image.tag=${{ env.IMAGE_TAG }} -Ddocker.registry=${{ env.REGISTRY }}

From 8f39ef2c6e564af53756895a0115e0d58f24d602 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Wed, 14 Sep 2022 16:42:29 +0200
Subject: [PATCH 066/173] style(ct-base): upgrade Dockerfile with heredocs
 #8932

Instead of using "&& \" style continuation of a RUN layer,
newer Docker versions (since 2021) allow usage of heredocs.
Also move some ARG to more suitable places
---
 .../container-base/src/main/docker/Dockerfile | 158 ++++++++++--------
 1 file changed, 85 insertions(+), 73 deletions(-)

diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile
index caec4ee6619..68b9da13c67 100644
--- a/modules/container-base/src/main/docker/Dockerfile
+++ b/modules/container-base/src/main/docker/Dockerfile
@@ -67,43 +67,47 @@ ENV PATH="${PATH}:${PAYARA_DIR}/bin" \
     ENABLE_JDWP=0 \
     ENABLE_RELOAD=0
 
-ARG JATTACH_VERSION="v2.1"
-ARG JATTACH_CHECKSUM="07885fdc782e02e7302c6d190f54c3930afa10a38140365adf54076ec1086a8e"
-ARG PKGS="jq imagemagick curl unzip wget acl dirmngr gpg lsof procps netcat tini"
-ARG ASADMIN="${PAYARA_DIR}/bin/asadmin --user=${ADMIN_USER} --passwordfile=${PASSWORD_FILE}"
-
 ### PART 1: SYSTEM ###
 ARG UID=1000
 ARG GID=1000
 USER root
 WORKDIR /
 SHELL ["/bin/bash", "-euo", "pipefail", "-c"]
-RUN true && \
+RUN <<EOF
     # Create pathes
-    mkdir -p "${HOME_DIR}" "${PAYARA_DIR}" "${DEPLOY_DIR}" "${CONFIG_DIR}" "${SCRIPT_DIR}" && \
-    mkdir -p "${DOCROOT_DIR}" "${SECRETS_DIR}" "${DUMPS_DIR}" && \
+    mkdir -p "${HOME_DIR}" "${PAYARA_DIR}" "${DEPLOY_DIR}" "${CONFIG_DIR}" "${SCRIPT_DIR}"
+    mkdir -p "${DOCROOT_DIR}" "${SECRETS_DIR}" "${DUMPS_DIR}"
     # Create user
-    addgroup --gid ${GID} payara && \
-    adduser --system --uid ${UID} --no-create-home --shell /bin/bash --home "${HOME_DIR}" --gecos "" --ingroup payara payara && \
-    echo payara:payara | chpasswd && \
+    addgroup --gid ${GID} payara
+    adduser --system --uid ${UID} --no-create-home --shell /bin/bash --home "${HOME_DIR}" --gecos "" --ingroup payara payara
+    echo payara:payara | chpasswd
     # Set permissions
-    chown -R payara: "${HOME_DIR}" && \
+    chown -R payara: "${HOME_DIR}"
     chown -R payara: "${DOCROOT_DIR}" "${SECRETS_DIR}" "${DUMPS_DIR}"
+EOF
+
+ARG JATTACH_VERSION="v2.1"
+ARG JATTACH_CHECKSUM="07885fdc782e02e7302c6d190f54c3930afa10a38140365adf54076ec1086a8e"
+ARG PKGS="jq imagemagick curl unzip wget acl dirmngr gpg lsof procps netcat tini"
 
 # Installing the packages in an extra container layer for better caching
-RUN true && \
+RUN <<EOF
     # Install packages
-    apt-get update -q && \
-    apt-get install -qqy --no-install-recommends ${PKGS} && \
+    apt-get update -q
+    apt-get install -qqy --no-install-recommends ${PKGS}
     # Install jattach
-    curl -sSfL -o /usr/bin/jattach "https://github.com/apangin/jattach/releases/download/${JATTACH_VERSION}/jattach" && \
-    echo "${JATTACH_CHECKSUM} /usr/bin/jattach" | sha256sum -c - && \
-    chmod +x /usr/bin/jattach && \
+    curl -sSfL -o /usr/bin/jattach "https://github.com/apangin/jattach/releases/download/${JATTACH_VERSION}/jattach"
+    echo "${JATTACH_CHECKSUM} /usr/bin/jattach" | sha256sum -c -
+    chmod +x /usr/bin/jattach
     # Cleanup
     rm -rf "/var/lib/apt/lists/*"
+EOF
 
 ### PART 2: PAYARA ###
 # After setting up system, now configure Payara
+
+ARG ASADMIN="${PAYARA_DIR}/bin/asadmin --user=${ADMIN_USER} --passwordfile=${PASSWORD_FILE}"
+
 USER payara
 WORKDIR ${HOME_DIR}
 
@@ -114,92 +118,100 @@ COPY --chown=payara:payara maven/appserver ${PAYARA_DIR}/
 COPY --chown=payara:payara maven/scripts ${SCRIPT_DIR}/
 
 # Configure the domain to be container and production ready
-# -- This is mostly inherited from the "production domain template", experience with Dataverse and https://blog.payara.fish/fine-tuning-payara-server-5-in-production
-RUN true && \
+# -- This is mostly inherited from the "production domain template", experience with Dataverse and
+#    https://blog.payara.fish/fine-tuning-payara-server-5-in-production
+RUN <<EOF
     # Set admin password
-    echo "AS_ADMIN_PASSWORD=" > /tmp/password-change-file.txt && \
-    echo "AS_ADMIN_NEWPASSWORD=${ADMIN_PASSWORD}" >> /tmp/password-change-file.txt && \
-    echo "AS_ADMIN_PASSWORD=${ADMIN_PASSWORD}" >> ${PASSWORD_FILE}  && \
-    asadmin --user=${ADMIN_USER} --passwordfile=/tmp/password-change-file.txt change-admin-password --domain_name=${DOMAIN_NAME} && \
+    echo "AS_ADMIN_PASSWORD=" > /tmp/password-change-file.txt
+    echo "AS_ADMIN_NEWPASSWORD=${ADMIN_PASSWORD}" >> /tmp/password-change-file.txt
+    echo "AS_ADMIN_PASSWORD=${ADMIN_PASSWORD}" >> ${PASSWORD_FILE}
+    asadmin --user=${ADMIN_USER} --passwordfile=/tmp/password-change-file.txt change-admin-password --domain_name=${DOMAIN_NAME}
     # Start domain for configuration
-    ${ASADMIN} start-domain ${DOMAIN_NAME} && \
+    ${ASADMIN} start-domain ${DOMAIN_NAME}
     # Allow access to admin with password only
-    ${ASADMIN} enable-secure-admin && \
+    ${ASADMIN} enable-secure-admin
+
     ### CONTAINER USAGE ENABLEMENT
     # List & delete memory settings from domain
-    for MEMORY_JVM_OPTION in $(${ASADMIN} list-jvm-options | grep "Xm[sx]\|Xss\|NewRatio"); \
-       do \
-         ${ASADMIN} delete-jvm-options $(echo $MEMORY_JVM_OPTION | sed -e 's/:/\\:/g'); \
-       done && \
+    for MEMORY_JVM_OPTION in $(${ASADMIN} list-jvm-options | grep "Xm[sx]\|Xss\|NewRatio");
+       do
+         ${ASADMIN} delete-jvm-options $(echo $MEMORY_JVM_OPTION | sed -e 's/:/\\:/g');
+       done
     # Tweak memory settings for containers
-    ${ASADMIN} create-jvm-options "-XX\:+UseContainerSupport" && \
-    ${ASADMIN} create-jvm-options "-XX\:MaxRAMPercentage=\${ENV=MEM_MAX_RAM_PERCENTAGE}" && \
-    ${ASADMIN} create-jvm-options "-Xss\${ENV=MEM_XSS}" && \
-    ${ASADMIN} create-jvm-options "-XX\:MinHeapFreeRatio=\${ENV=MEM_MIN_HEAP_FREE_RATIO}" && \
-    ${ASADMIN} create-jvm-options "-XX\:MaxHeapFreeRatio=\${ENV=MEM_MAX_HEAP_FREE_RATIO}" && \
-    ${ASADMIN} create-jvm-options "-XX\:HeapDumpPath=\${ENV=DUMPS_DIR}" && \
+    ${ASADMIN} create-jvm-options "-XX\:+UseContainerSupport"
+    ${ASADMIN} create-jvm-options "-XX\:MaxRAMPercentage=\${ENV=MEM_MAX_RAM_PERCENTAGE}"
+    ${ASADMIN} create-jvm-options "-Xss\${ENV=MEM_XSS}"
+    ${ASADMIN} create-jvm-options "-XX\:MinHeapFreeRatio=\${ENV=MEM_MIN_HEAP_FREE_RATIO}"
+    ${ASADMIN} create-jvm-options "-XX\:MaxHeapFreeRatio=\${ENV=MEM_MAX_HEAP_FREE_RATIO}"
+    ${ASADMIN} create-jvm-options "-XX\:HeapDumpPath=\${ENV=DUMPS_DIR}"
     # Set logging to console only for containers
-    ${ASADMIN} set-log-attributes com.sun.enterprise.server.logging.GFFileHandler.logtoFile=false && \
+    ${ASADMIN} set-log-attributes com.sun.enterprise.server.logging.GFFileHandler.logtoFile=false \
+
     ### PRODUCTION READINESS
-    ${ASADMIN} create-jvm-options '-XX\:+UseG1GC' && \
-    ${ASADMIN} create-jvm-options '-XX\:+UseStringDeduplication' && \
-    ${ASADMIN} create-jvm-options '-XX\:+DisableExplicitGC' && \
-    ${ASADMIN} create-jvm-options '-XX\:MaxGCPauseMillis=${ENV=MEM_MAX_GC_PAUSE_MILLIS}' && \
-    ${ASADMIN} create-jvm-options '-XX\:MetaspaceSize=${ENV=MEM_METASPACE_SIZE}' && \
-    ${ASADMIN} create-jvm-options '-XX\:MaxMetaspaceSize=${ENV=MEM_MAX_METASPACE_SIZE}' && \
-    ${ASADMIN} create-jvm-options '-XX\:+IgnoreUnrecognizedVMOptions' && \
+    ${ASADMIN} create-jvm-options '-XX\:+UseG1GC'
+    ${ASADMIN} create-jvm-options '-XX\:+UseStringDeduplication'
+    ${ASADMIN} create-jvm-options '-XX\:+DisableExplicitGC'
+    ${ASADMIN} create-jvm-options '-XX\:MaxGCPauseMillis=${ENV=MEM_MAX_GC_PAUSE_MILLIS}'
+    ${ASADMIN} create-jvm-options '-XX\:MetaspaceSize=${ENV=MEM_METASPACE_SIZE}'
+    ${ASADMIN} create-jvm-options '-XX\:MaxMetaspaceSize=${ENV=MEM_MAX_METASPACE_SIZE}'
+    ${ASADMIN} create-jvm-options '-XX\:+IgnoreUnrecognizedVMOptions'
     # Disable autodeploy and hot reload
-    ${ASADMIN} set configs.config.server-config.admin-service.das-config.dynamic-reload-enabled="false" && \
-    ${ASADMIN} set configs.config.server-config.admin-service.das-config.autodeploy-enabled="false" && \
+    ${ASADMIN} set configs.config.server-config.admin-service.das-config.dynamic-reload-enabled="false"
+    ${ASADMIN} set configs.config.server-config.admin-service.das-config.autodeploy-enabled="false"
     # Enlarge thread pools
-    ${ASADMIN} set server-config.thread-pools.thread-pool.http-thread-pool.max-thread-pool-size="50" && \
-    ${ASADMIN} set server-config.thread-pools.thread-pool.http-thread-pool.max-queue-size="" && \
-    ${ASADMIN} set default-config.thread-pools.thread-pool.thread-pool-1.max-thread-pool-size="250" && \
+    ${ASADMIN} set server-config.thread-pools.thread-pool.http-thread-pool.max-thread-pool-size="50"
+    ${ASADMIN} set server-config.thread-pools.thread-pool.http-thread-pool.max-queue-size=""
+    ${ASADMIN} set default-config.thread-pools.thread-pool.thread-pool-1.max-thread-pool-size="250"
     # Enable file caching
-    ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-1.http.file-cache.enabled="true" && \
-    ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-2.http.file-cache.enabled="true" && \
-    ${ASADMIN} set default-config.network-config.protocols.protocol.http-listener-1.http.file-cache.enabled="true" && \
-    ${ASADMIN} set default-config.network-config.protocols.protocol.http-listener-2.http.file-cache.enabled="true" && \
+    ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-1.http.file-cache.enabled="true"
+    ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-2.http.file-cache.enabled="true"
+    ${ASADMIN} set default-config.network-config.protocols.protocol.http-listener-1.http.file-cache.enabled="true"
+    ${ASADMIN} set default-config.network-config.protocols.protocol.http-listener-2.http.file-cache.enabled="true"
     # Disable the HTTPS listener (we are always fronting our appservers with a reverse proxy handling SSL)
-    ${ASADMIN} set configs.config.server-config.network-config.network-listeners.network-listener.http-listener-2.enabled="false" && \
-    # Enlarge and tune EJB pools (cannot do this for server-config as set does not create new entries) \
-    ${ASADMIN} set default-config.ejb-container.pool-resize-quantity="2" && \
-    ${ASADMIN} set default-config.ejb-container.max-pool-size="128" && \
-    ${ASADMIN} set default-config.ejb-container.steady-pool-size="10" && \
+    ${ASADMIN} set configs.config.server-config.network-config.network-listeners.network-listener.http-listener-2.enabled="false"
+    # Enlarge and tune EJB pools (cannot do this for server-config as set does not create new entries)
+    ${ASADMIN} set default-config.ejb-container.pool-resize-quantity="2"
+    ${ASADMIN} set default-config.ejb-container.max-pool-size="128"
+    ${ASADMIN} set default-config.ejb-container.steady-pool-size="10"
     # Misc settings
-    ${ASADMIN} create-system-properties fish.payara.classloading.delegate="false" && \
-    ${ASADMIN} create-system-properties jersey.config.client.readTimeout="300000" && \
-    ${ASADMIN} create-system-properties jersey.config.client.connectTimeout="300000" && \
+    ${ASADMIN} create-system-properties fish.payara.classloading.delegate="false"
+    ${ASADMIN} create-system-properties jersey.config.client.readTimeout="300000"
+    ${ASADMIN} create-system-properties jersey.config.client.connectTimeout="300000" \
+
     ### DATAVERSE APPLICATION SPECIFICS
     # Configure the MicroProfile directory config source to point to /secrets
-    ${ASADMIN} set-config-dir --directory="${SECRETS_DIR}" && \
+    ${ASADMIN} set-config-dir --directory="${SECRETS_DIR}"
     # Make request timeouts configurable via MPCONFIG (default to 900 secs = 15 min)
-    ${ASADMIN} set 'server-config.network-config.protocols.protocol.http-listener-1.http.request-timeout-seconds=${MPCONFIG=dataverse.http.timeout:900}' && \
+    ${ASADMIN} set 'server-config.network-config.protocols.protocol.http-listener-1.http.request-timeout-seconds=${MPCONFIG=dataverse.http.timeout:900}'
     # TODO: what of the below 3 items can be deleted for container usage?
-    ${ASADMIN} create-network-listener --protocol=http-listener-1 --listenerport=8009 --jkenabled=true jk-connector && \
-    ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-1.http.comet-support-enabled=true && \
-    ${ASADMIN} create-system-properties javax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl && \
+    ${ASADMIN} create-network-listener --protocol=http-listener-1 --listenerport=8009 --jkenabled=true jk-connector
+    ${ASADMIN} set server-config.network-config.protocols.protocol.http-listener-1.http.comet-support-enabled=true
+    ${ASADMIN} create-system-properties javax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl
     # Always disable phoning home...
-    ${ASADMIN} disable-phone-home && \
+    ${ASADMIN} disable-phone-home \
+
     ### CLEANUP
     # Stop domain
-    ${ASADMIN} stop-domain "${DOMAIN_NAME}" && \
-    # Disable JSP servlet dynamic reloads \
-    sed -i 's#<servlet-class>org.apache.jasper.servlet.JspServlet</servlet-class>#<servlet-class>org.apache.jasper.servlet.JspServlet</servlet-class>\n    <init-param>\n      <param-name>development</param-name>\n      <param-value>false</param-value>\n    </init-param>\n    <init-param>\n      <param-name>genStrAsCharArray</param-name>\n      <param-value>true</param-value>\n    </init-param>#' "${DOMAIN_DIR}/config/default-web.xml" && \
+    ${ASADMIN} stop-domain "${DOMAIN_NAME}"
+    # Disable JSP servlet dynamic reloads
+    sed -i 's#<servlet-class>org.apache.jasper.servlet.JspServlet</servlet-class>#<servlet-class>org.apache.jasper.servlet.JspServlet</servlet-class>\n    <init-param>\n      <param-name>development</param-name>\n      <param-value>false</param-value>\n    </init-param>\n    <init-param>\n      <param-name>genStrAsCharArray</param-name>\n      <param-value>true</param-value>\n    </init-param>#' "${DOMAIN_DIR}/config/default-web.xml"
     # Cleanup old CA certificates to avoid unnecessary log clutter during startup
-    ${SCRIPT_DIR}/removeExpiredCaCerts.sh && \
+    ${SCRIPT_DIR}/removeExpiredCaCerts.sh
     # Delete generated files
     rm -rf \
         "/tmp/password-change-file.txt" \
         "${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/osgi-cache" \
         "${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/logs"
+EOF
 
 # Make docroot of Payara reside in higher level directory for easier targeting
 # Due to gdcc/dataverse-kubernetes#177: create the generated pathes so they are
 # writeable by us. TBR with gdcc/dataverse-kubernetes#178.
-RUN rm -rf "${DOMAIN_DIR}"/docroot && \
-    ln -s "${DOCROOT_DIR}" "${DOMAIN_DIR}"/docroot && \
+RUN <<EOF
+    rm -rf "${DOMAIN_DIR}"/docroot
+    ln -s "${DOCROOT_DIR}" "${DOMAIN_DIR}"/docroot
     mkdir -p "${DOMAIN_DIR}"/generated/jsp/dataverse
+EOF
 
 # Set the entrypoint to tini (as a process supervisor)
 ENTRYPOINT ["/usr/bin/tini", "--"]

From be82c36e120b116a1c8ef738ee39b5fe4fbe936a Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Wed, 14 Sep 2022 21:18:28 +0200
Subject: [PATCH 067/173] feat,fix(ct-base): add extension point for background
 script #8932

By moving from tini to dumb-init, we can offer a new extension point:
if an application image extending this base image provides an executable
script at ${SCRIPT_DIR}/startInBackground.sh, it will be executed
after the init scripts and in parallel to the application server.

By adding ${SCRIPT_DIR} to $PATH, we can now also skip variable expansion,
fixing a bug: formerly, the "exec" in entrypoint.sh and startInForeground.sh
where not replacing the shell properly.

The switch to dumb-init makes sure signals will be transferred also to any
background processes!
---
 .../container-base/src/main/docker/Dockerfile   | 10 +++++-----
 .../src/main/docker/scripts/entrypoint.sh       | 17 ++++++++++++++++-
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile
index 68b9da13c67..c56abb975e2 100644
--- a/modules/container-base/src/main/docker/Dockerfile
+++ b/modules/container-base/src/main/docker/Dockerfile
@@ -46,7 +46,7 @@ ENV PAYARA_DIR="${HOME_DIR}/appserver" \
     ADMIN_PASSWORD="admin" \
     DOMAIN_NAME="domain1" \
     PAYARA_ARGS=""
-ENV PATH="${PATH}:${PAYARA_DIR}/bin" \
+ENV PATH="${PATH}:${PAYARA_DIR}/bin:${SCRIPT_DIR}" \
     DOMAIN_DIR="${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}" \
     DEPLOY_PROPS="" \
     PREBOOT_COMMANDS="${CONFIG_DIR}/pre-boot-commands.asadmin" \
@@ -88,7 +88,7 @@ EOF
 
 ARG JATTACH_VERSION="v2.1"
 ARG JATTACH_CHECKSUM="07885fdc782e02e7302c6d190f54c3930afa10a38140365adf54076ec1086a8e"
-ARG PKGS="jq imagemagick curl unzip wget acl dirmngr gpg lsof procps netcat tini"
+ARG PKGS="jq imagemagick curl unzip wget acl dirmngr gpg lsof procps netcat dumb-init"
 
 # Installing the packages in an extra container layer for better caching
 RUN <<EOF
@@ -214,9 +214,9 @@ RUN <<EOF
 EOF
 
 # Set the entrypoint to tini (as a process supervisor)
-ENTRYPOINT ["/usr/bin/tini", "--"]
-# JSON syntax should be used, but bypassed shell. Thus re-add expansion via shell exec.
-CMD ["sh", "-c", "${SCRIPT_DIR}/entrypoint.sh"]
+ENTRYPOINT ["/usr/bin/dumb-init", "--"]
+# This works because we add ${SCRIPT_DIR} to $PATH above!
+CMD ["entrypoint.sh"]
 
 LABEL org.opencontainers.image.created="@git.build.time@" \
       org.opencontainers.image.authors="Research Data Management at FZJ <forschungsdaten@fz-juelich.de>" \
diff --git a/modules/container-base/src/main/docker/scripts/entrypoint.sh b/modules/container-base/src/main/docker/scripts/entrypoint.sh
index a12458d008b..47933bd42e2 100644
--- a/modules/container-base/src/main/docker/scripts/entrypoint.sh
+++ b/modules/container-base/src/main/docker/scripts/entrypoint.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/dumb-init /bin/bash
 ##########################################################################################################
 #
 #  This script is a fork of https://github.com/payara/Payara/blob/master/appserver/extras/docker-images/
@@ -6,6 +6,12 @@
 #
 ##########################################################################################################
 
+# This shellscript is supposed to be executed by https://github.com/Yelp/dumb-init to keep subprocesses
+# and zombies under control. If the ENTRYPOINT command is changed, it will still use dumb-init because shebang.
+# dumb-init takes care to send any signals to subshells, too! (Which might run in the background...)
+
+
+# Execute any scripts BEFORE the appserver starts
 for f in "${SCRIPT_DIR}"/init_* "${SCRIPT_DIR}"/init.d/*; do
       # shellcheck disable=SC1090
       case "$f" in
@@ -15,4 +21,13 @@ for f in "${SCRIPT_DIR}"/init_* "${SCRIPT_DIR}"/init.d/*; do
       echo
 done
 
+# If present, run a startInBackground.sh in the background (e.g. to run tasks AFTER the application server starts)
+if [ -x "${SCRIPT_DIR}/startInBackground.sh" ]; then
+    echo "[Entrypoint] running ${SCRIPT_DIR}/startInBackground.sh in background"
+    "${SCRIPT_DIR}"/startInBackground.sh &
+fi
+
+# Start the application server and make it REPLACE this shell, so init system and Java directly interact
+# Remember - this means no code below this statement will be run!
+echo "[Entrypoint] running ${SCRIPT_DIR}/startInForeground.sh in foreground"
 exec "${SCRIPT_DIR}"/startInForeground.sh "${PAYARA_ARGS}"

From b386c069aa20ce578b0de7b6c0dc9e99dc6a3d8c Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Wed, 14 Sep 2022 21:32:51 +0200
Subject: [PATCH 068/173] docs(ct-base): document startInBackground.sh #8932

---
 .../source/container/base-image.rst           | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst
index 524ef8a7fbe..3f7b3b46c85 100644
--- a/doc/sphinx-guides/source/container/base-image.rst
+++ b/doc/sphinx-guides/source/container/base-image.rst
@@ -23,7 +23,8 @@ The base image provides:
 - `Payara Community Application Server <https://docs.payara.fish/community>`_
 - CLI tools necessary to run Dataverse (i. e. ``curl`` or ``jq`` - see also :doc:`../installation/prerequisites` in Installation Guide)
 - Linux tools for analysis, monitoring and so on
-- `Jattach <https://github.com/apangin/jattach>`_
+- `Jattach <https://github.com/apangin/jattach>`__ (attach to running JVM)
+- `dumb-init <https://github.com/Yelp/dumb-init>`__ (see :ref:`below <base-entrypoint>` for details)
 
 This image is created as a "multi-arch image", supporting the most common architectures Dataverse usually runs on:
 AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2).
@@ -246,6 +247,22 @@ its sources plus uncached scheduled nightly builds to make sure security updates
 Note: for the Github Action to be able to push to Docker Hub, two repository secrets
 (DOCKERHUB_USERNAME, DOCKERHUB_TOKEN) have been added by IQSS admins to their repository.
 
+.. _base-entrypoint:
+
+Entry & Extension Points
+++++++++++++++++++++++++
+
+The entrypoint shell script provided by this base image will by default ensure to:
+
+- Run any scripts named ``${SCRIPT_DIR}/init_*`` or in ``${SCRIPT_DIR}/init.d/*`` directory for initialization
+  **before** the application server starts.
+- Run an executable script ``${SCRIPT_DIR}/startInBackground.sh`` in the background - if present.
+- Run the application server startup scripting in foreground (``${SCRIPT_DIR}/startInForeground.sh``).
+
+If you need to create some scripting that runs in parallel under supervision of `dumb-init <https://github.com/Yelp/dumb-init>`_,
+e.g. to wait for the application to deploy before executing something, this is your point of extension: simply provide
+the ``${SCRIPT_DIR}/startInBackground.sh`` executable script with your application image.
+
 
 
 Other Hints

From f8bf73479708a0d1cfb6882db9a118e12d70d34d Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Wed, 14 Sep 2022 21:50:06 +0200
Subject: [PATCH 069/173] ci(shellcheck,shellspec): split ShellCheck and
 ShellSpec

To avoid unnecessary Shellspec runs for scripts that have no
such tests, branch out the Shellcheck part of it into different
workflow.

Also make "bash" explicit as the container base image using
an "unknown shebang" via dumb-init, but it's simply bash.
---
 .github/workflows/shellcheck.yml | 24 ++++++++++++++++++++++++
 .github/workflows/shellspec.yml  | 14 --------------
 2 files changed, 24 insertions(+), 14 deletions(-)
 create mode 100644 .github/workflows/shellcheck.yml

diff --git a/.github/workflows/shellcheck.yml b/.github/workflows/shellcheck.yml
new file mode 100644
index 00000000000..2d910f54127
--- /dev/null
+++ b/.github/workflows/shellcheck.yml
@@ -0,0 +1,24 @@
+name: "Shellcheck"
+on:
+    push:
+        paths:
+            - conf/solr/**
+            - modules/container-base/**
+    pull_request:
+        paths:
+            - conf/solr/**
+            - modules/container-base/**
+jobs:
+    shellcheck:
+        name: Shellcheck
+        runs-on: ubuntu-latest
+        steps:
+            - uses: actions/checkout@v2
+            - name: shellcheck
+              uses: reviewdog/action-shellcheck@v1
+              with:
+                  github_token: ${{ secrets.github_token }}
+                  reporter: github-pr-review # Change reporter.
+                  fail_on_error: true
+                  # Container base image uses dumb-init shebang, so nail to using bash
+                  shellcheck_flags: "--shell=bash --external-sources"
\ No newline at end of file
diff --git a/.github/workflows/shellspec.yml b/.github/workflows/shellspec.yml
index 2b127a7be5c..5c251cfc897 100644
--- a/.github/workflows/shellspec.yml
+++ b/.github/workflows/shellspec.yml
@@ -4,29 +4,15 @@ on:
         paths:
             - tests/shell/**
             - conf/solr/**
-            - modules/container-base/**
             # add more when more specs are written relying on data
     pull_request:
         paths:
             - tests/shell/**
             - conf/solr/**
-            - modules/container-base/**
             # add more when more specs are written relying on data
 env:
     SHELLSPEC_VERSION: 0.28.1
 jobs:
-    shellcheck:
-        name: Shellcheck
-        runs-on: ubuntu-latest
-        steps:
-            - uses: actions/checkout@v2
-            - name: shellcheck
-              uses: reviewdog/action-shellcheck@v1
-              with:
-                  github_token: ${{ secrets.github_token }}
-                  reporter: github-pr-review # Change reporter.
-                  fail_on_error: true
-                  exclude: "./tests/shell/*"
     shellspec-ubuntu:
         name: "Ubuntu"
         runs-on: ubuntu-latest

From 626b4951cfbf163895ce75e605b4daec455e0aae Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Tue, 20 Sep 2022 00:22:38 +0200
Subject: [PATCH 070/173] docs(ct-base): clarify support image tags #8932

Adding notes about the image tags produced by the community
for reuse in the community. Document final tagging
strategy, using the branch name (develop/main) instead of
the Java version or sth.

Reshape the automated builds and publishing part to be included
in the supported tags and build instructions section to reduce
text complexity and group matching parts together.
---
 .../source/container/base-image.rst           | 40 +++++++++++++------
 1 file changed, 27 insertions(+), 13 deletions(-)

diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst
index 3f7b3b46c85..ea54ecbebd2 100644
--- a/doc/sphinx-guides/source/container/base-image.rst
+++ b/doc/sphinx-guides/source/container/base-image.rst
@@ -12,6 +12,17 @@ IQSS will not offer you support how to deploy or run it, please reach out to the
 You might be interested in taking a look at :doc:`../developers/containers`, linking you to some (community-based)
 efforts.
 
+Supported Image Tags
+++++++++++++++++++++
+
+This image is sourced within the main upstream code repository of the Dataverse software. Development and maintenance
+happens there (again, by the community). Community supported image tags are based on the two most important branches:
+
+- ``develop`` representing the unstable state of affairs in Dataverse's development branch
+  (`Dockerfile <https://github.com/IQSS/dataverse/tree/develop/modules/container-base/src/main/docker/Dockerfile>`__)
+- ``release`` representing the latest stable release in Dataverse's main branch
+  (`Dockerfile <https://github.com/IQSS/dataverse/tree/master/modules/container-base/src/main/docker/Dockerfile>`__)
+
 
 
 Image Contents
@@ -51,8 +62,12 @@ Or move to the module and execute:
 
 Some additional notes, using Maven parameters to change the build and use ...:
 
+- | ... a different tag only: add ``-Dbase.image.tag=tag``.
+  | *Note:* default is ``develop``
 - | ... a different image name and tag: add ``-Dbase.image=name:tag``.
-  | *Note:* default is ``gdcc/base:${target.java.version}-jre``
+  | *Note:* default is ``gdcc/base:${base.image.tag}``
+- ... a different image registry than *Docker Hub*: add ``-Ddocker.registry=registry.example.org`` (see also
+  `DMP docs on registries <https://dmp.fabric8.io/#registry>`__)
 - ... a different Payara version: add ``-Dpayara.version=V.YYYY.R``.
 - | ... a different Temurin JRE version ``A``: add ``-Dtarget.java.version=A`` (i.e. ``11``, ``17``, ...).
   | *Note:* must resolve to an available Docker tag ``A-jre`` of Eclipse Temurin!
@@ -61,6 +76,17 @@ Some additional notes, using Maven parameters to change the build and use ...:
   image available from local or remote (e. g. Docker Hub).
 - ... a different UID/GID for the ``payara`` user/group: add ``-Dbase.image.uid=1234`` (or ``.gid``)
 
+Automated Builds & Publishing
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+To make reusing most simple, the image is built with a Github Action within the IQSS repository and then pushed
+to `Docker Hub gdcc/base repository <https://hub.docker.com/r/gdcc/base>`_. It is built and pushed on every edit to
+its sources plus uncached scheduled nightly builds to make sure security updates are finding their way in.
+
+*Note:* For the Github Action to be able to push to Docker Hub, two repository secrets
+(DOCKERHUB_USERNAME, DOCKERHUB_TOKEN) have been added by IQSS admins to their repository.
+
+
 
 Tunables
 ++++++++
@@ -234,18 +260,6 @@ The HTTPS listener (on port 8181) becomes deactivated during the build, as we wi
 application server and handle SSL/TLS termination at this point. Save the memory and some CPU cycles!
 
 
-Publishing and Updates
-++++++++++++++++++++++
-
-This image is sourced within the main upstream code repository of the Dataverse software. Development and maintenance
-happens there (again, by the community).
-
-To make reusing most simple, the image is built with a Github Action within the IQSS repository and then pushed
-to `Docker Hub gdcc/base repository <https://hub.docker.com/r/gdcc/base>`_. It is built and pushed on every edit to
-its sources plus uncached scheduled nightly builds to make sure security updates are finding their way in.
-
-Note: for the Github Action to be able to push to Docker Hub, two repository secrets
-(DOCKERHUB_USERNAME, DOCKERHUB_TOKEN) have been added by IQSS admins to their repository.
 
 .. _base-entrypoint:
 

From 77592113f310d314d7de11b372a60cf3b4e08600 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Tue, 20 Sep 2022 00:27:06 +0200
Subject: [PATCH 071/173] style,docs(ct-base): small word adjusts for some
 build options

---
 doc/sphinx-guides/source/container/base-image.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst
index ea54ecbebd2..3e83af23bfb 100644
--- a/doc/sphinx-guides/source/container/base-image.rst
+++ b/doc/sphinx-guides/source/container/base-image.rst
@@ -70,10 +70,10 @@ Some additional notes, using Maven parameters to change the build and use ...:
   `DMP docs on registries <https://dmp.fabric8.io/#registry>`__)
 - ... a different Payara version: add ``-Dpayara.version=V.YYYY.R``.
 - | ... a different Temurin JRE version ``A``: add ``-Dtarget.java.version=A`` (i.e. ``11``, ``17``, ...).
-  | *Note:* must resolve to an available Docker tag ``A-jre`` of Eclipse Temurin!
+  | *Note:* must resolve to an available image tag ``A-jre`` of Eclipse Temurin!
     (See also `Docker Hub search example <https://hub.docker.com/_/eclipse-temurin/tags?page=1&name=11-jre>`_)
 - ... a different Java Distribution: add ``-Djava.image="name:tag"`` with precise reference to an
-  image available from local or remote (e. g. Docker Hub).
+  image available local or remote.
 - ... a different UID/GID for the ``payara`` user/group: add ``-Dbase.image.uid=1234`` (or ``.gid``)
 
 Automated Builds & Publishing

From 2141bcafae5fea8ac2414a0aecede81b988a7306 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Tue, 20 Sep 2022 00:48:16 +0200
Subject: [PATCH 072/173] docs(ct-base): add notes about multiarch builds #8932

Addin description on requirements to build cross platform
added as subsection of the build instructions seemed valuable.
---
 .../source/container/base-image.rst           | 23 +++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst
index 3e83af23bfb..41d88c97e2d 100644
--- a/doc/sphinx-guides/source/container/base-image.rst
+++ b/doc/sphinx-guides/source/container/base-image.rst
@@ -37,8 +37,7 @@ The base image provides:
 - `Jattach <https://github.com/apangin/jattach>`__ (attach to running JVM)
 - `dumb-init <https://github.com/Yelp/dumb-init>`__ (see :ref:`below <base-entrypoint>` for details)
 
-This image is created as a "multi-arch image", supporting the most common architectures Dataverse usually runs on:
-AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2).
+This image is created as a "multi-arch image", see :ref:`below <base-multiarch>`.
 
 It inherits being built on an Ubuntu environment from the upstream
 `base image of Eclipse Temurin <https://hub.docker.com/_/eclipse-temurin>`_.
@@ -86,6 +85,24 @@ its sources plus uncached scheduled nightly builds to make sure security updates
 *Note:* For the Github Action to be able to push to Docker Hub, two repository secrets
 (DOCKERHUB_USERNAME, DOCKERHUB_TOKEN) have been added by IQSS admins to their repository.
 
+.. _base-multiarch:
+
+Processor Architecture and Multiarch
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This image is created as a "multi-arch image", supporting the most common architectures Dataverse usually runs on:
+AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2), by using Maven Docker Plugin's *BuildX* mode.
+
+Building the image via ``mvn -Pct package`` or ``mvn -Pct install`` as above will only build for the architecture of
+the Docker maschine's CPU.
+
+Only ``mvn -Pct deploy`` will trigger building on all enabled architectures.
+Yet, to enable building with non-native code on your build machine, you will need to setup a cross-platform builder.
+
+On Linux, you should install `qemu-user-static <https://github.com/multiarch/qemu-user-static>`__ (preferably via
+your package management) on the host and run ``docker run --rm --privileged multiarch/qemu-user-static --reset -p yes``
+to enable that builder. The Docker plugin will setup everything else for you.
+
 
 
 Tunables
@@ -290,8 +307,6 @@ you can read about those in a few places like https://developers.redhat.com/arti
 https://www.eclipse.org/openj9/docs/xxusecontainersupport, etc. The other memory defaults are inspired
 from `run-java-sh recommendations`_.
 
-*Note: the build process used the newer ``buildx`` feature of Docker to provide multiarch images.*
-
 
 
 .. _Pre/postboot script docs: https://docs.payara.fish/community/docs/Technical%20Documentation/Payara%20Micro%20Documentation/Payara%20Micro%20Configuration%20and%20Management/Micro%20Management/Asadmin%20Commands/Pre%20and%20Post%20Boot%20Commands.html

From 276b3b5159471bd44cff99bfb1b9e6b279634b4a Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Tue, 20 Sep 2022 10:49:19 +0200
Subject: [PATCH 073/173] feat(ct-base): add wait-for script to image

Many scripts shipped with an app image might rely
on the availability of an external service, API or simply
the database or search index.

Adding a standard script here to make it easier to wait for
their availability.
---
 doc/sphinx-guides/source/container/base-image.rst |  1 +
 modules/container-base/src/main/docker/Dockerfile | 11 +++++++++--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst
index 41d88c97e2d..197f4175538 100644
--- a/doc/sphinx-guides/source/container/base-image.rst
+++ b/doc/sphinx-guides/source/container/base-image.rst
@@ -35,6 +35,7 @@ The base image provides:
 - CLI tools necessary to run Dataverse (i. e. ``curl`` or ``jq`` - see also :doc:`../installation/prerequisites` in Installation Guide)
 - Linux tools for analysis, monitoring and so on
 - `Jattach <https://github.com/apangin/jattach>`__ (attach to running JVM)
+- `wait-for <https://github.com/eficode/wait-for>`__ (tool to "wait for" a service to be available)
 - `dumb-init <https://github.com/Yelp/dumb-init>`__ (see :ref:`below <base-entrypoint>` for details)
 
 This image is created as a "multi-arch image", see :ref:`below <base-multiarch>`.
diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile
index c56abb975e2..cafeb2ffb59 100644
--- a/modules/container-base/src/main/docker/Dockerfile
+++ b/modules/container-base/src/main/docker/Dockerfile
@@ -88,6 +88,8 @@ EOF
 
 ARG JATTACH_VERSION="v2.1"
 ARG JATTACH_CHECKSUM="07885fdc782e02e7302c6d190f54c3930afa10a38140365adf54076ec1086a8e"
+ARG WAIT_FOR_VERSION="v2.2.3"
+ARG WAIT_FOR_CHECKSUM="70271181be69cd2c7265b2746f97fccfd7e8aa1059894138a775369c23589ff4"
 ARG PKGS="jq imagemagick curl unzip wget acl dirmngr gpg lsof procps netcat dumb-init"
 
 # Installing the packages in an extra container layer for better caching
@@ -95,12 +97,17 @@ RUN <<EOF
     # Install packages
     apt-get update -q
     apt-get install -qqy --no-install-recommends ${PKGS}
+    rm -rf "/var/lib/apt/lists/*"
+
     # Install jattach
     curl -sSfL -o /usr/bin/jattach "https://github.com/apangin/jattach/releases/download/${JATTACH_VERSION}/jattach"
     echo "${JATTACH_CHECKSUM} /usr/bin/jattach" | sha256sum -c -
     chmod +x /usr/bin/jattach
-    # Cleanup
-    rm -rf "/var/lib/apt/lists/*"
+
+    # Install wait-for
+    curl -sSfL -o /usr/bin/wait-for "https://github.com/eficode/wait-for/releases/download/${WAIT_FOR_VERSION}/wait-for"
+    echo "${WAIT_FOR_CHECKSUM} /usr/bin/wait-for" | sha256sum -c -
+    chmod +x /usr/bin/wait-for
 EOF
 
 ### PART 2: PAYARA ###

From 0959c84b0c0f35ef7602c5e48e0943aa89711982 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Thu, 22 Sep 2022 14:17:58 +0200
Subject: [PATCH 074/173] chore(deps): remove Payara version from Maven ct
 profile

With the merge of #8949 the custom version is no longer necessary.
---
 modules/dataverse-parent/pom.xml | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml
index 9326ba71263..ce4dfb56257 100644
--- a/modules/dataverse-parent/pom.xml
+++ b/modules/dataverse-parent/pom.xml
@@ -334,13 +334,7 @@
         <profile>
             <id>ct</id>
             <properties>
-                <!--
-                    Note: Temporary workaround due to problems with Payara 5.2021.5
-                    Note: Remove when upstream is at 5.2021.9+
-                    See also: https://github.com/IQSS/dataverse/issues/8048
-                    See also: https://github.com/payara/Payara/issues/5368
-                -->
-                <payara.version>5.2022.3</payara.version>
+                <!--<payara.version>5.2022.3</payara.version>-->
             </properties>
     
             <build>

From 3ea4e92b48452c3785f3e7c60df4acdf40f8bd1e Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 30 Sep 2022 09:21:16 -0400
Subject: [PATCH 075/173] todo is done

---
 src/main/java/edu/harvard/iq/dataverse/api/Files.java | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java
index 9dc0c3be524..d1ecd2d8824 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java
@@ -231,7 +231,6 @@ public Response replaceFileInDataset(
         if (null == contentDispositionHeader) {
             if (optionalFileParams.hasStorageIdentifier()) {
                 newStorageIdentifier = optionalFileParams.getStorageIdentifier();
-                // ToDo - check that storageIdentifier is valid
                 if (optionalFileParams.hasFileName()) {
                     newFilename = optionalFileParams.getFileName();
                     if (optionalFileParams.hasMimetype()) {

From cb5007a6a5ad46e27dce34dbd5c2bd16bdc9044e Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 30 Sep 2022 09:21:37 -0400
Subject: [PATCH 076/173] add getjsonarray

---
 .../java/edu/harvard/iq/dataverse/util/json/JsonUtil.java   | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java
index f4a3c635f8b..21ff0e03773 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java
@@ -63,4 +63,10 @@ public static javax.json.JsonObject getJsonObject(String serializedJson) {
             return Json.createReader(rdr).readObject();
         }
     }
+    
+    public static javax.json.JsonArray getJsonArray(String serializedJson) {
+        try (StringReader rdr = new StringReader(serializedJson)) {
+            return Json.createReader(rdr).readArray();
+        }
+    }
 }

From e06ec36b2a4a78e8c64e42858542faaccf62841b Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 30 Sep 2022 10:04:55 -0400
Subject: [PATCH 077/173] Add /replaceFiles call

refactor to make multifile a separate boolean
remove unused LicenseBean from constructor
updated /addFiles logic to use clone
refactored steps 70/80 to work for multi-replace. i.e. by tracking
filesToDelete and the physical files to delete.
replace local Json readers with JsonUtil method
move sanity check on file deletes to DataFileServiceBean
---
 .../iq/dataverse/DataFileServiceBean.java     |   4 +
 .../iq/dataverse/EditDatafilesPage.java       |   3 +-
 .../harvard/iq/dataverse/api/Datasets.java    |  77 +++-
 .../edu/harvard/iq/dataverse/api/Files.java   |   3 +-
 .../datasetutility/AddReplaceFileHelper.java  | 415 +++++++++++++-----
 5 files changed, 375 insertions(+), 127 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
index 0b935183182..7da06f36be4 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
@@ -1544,6 +1544,10 @@ public void finalizeFileDelete(Long dataFileId, String storageLocation) throws I
             throw new IOException("Attempted to permanently delete a physical file still associated with an existing DvObject "
                     + "(id: " + dataFileId + ", location: " + storageLocation);
         }
+        if(storageLocation == null || storageLocation.isBlank()) {
+            throw new IOException("Attempted to delete a physical file with no location "
+                    + "(id: " + dataFileId + ", location: " + storageLocation);
+        }
         StorageIO<DvObject> directStorageAccess = DataAccess.getDirectStorageIO(storageLocation);
         directStorageAccess.delete();
     }
diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java
index 6cf294ffd6d..f5e137a1981 100644
--- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java
@@ -586,8 +586,7 @@ public String init() {
                                                 datafileService,
                                                 permissionService,
                                                 commandEngine,
-                                                systemConfig,
-                                                licenseServiceBean);
+                                                systemConfig);
                         
             fileReplacePageHelper = new FileReplacePageHelper(addReplaceFileHelper,
                     dataset,
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index aff543e643c..ed54704c4a1 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -2451,8 +2451,7 @@ public Response addFileToDataset(@PathParam("id") String idSupplied,
                 fileService,
                 permissionSvc,
                 commandEngine,
-                                                systemConfig,
-                                                licenseSvc);
+                systemConfig);
 
 
         //-------------------
@@ -3387,14 +3386,84 @@ public Response addFilesToDataset(@PathParam("id") String idSupplied,
                 this.fileService,
                 this.permissionSvc,
                 this.commandEngine,
-                this.systemConfig,
-                this.licenseSvc
+                this.systemConfig
         );
 
         return addFileHelper.addFiles(jsonData, dataset, authUser);
 
     }
 
+    /**
+     * Replace multiple Files to an existing Dataset
+     *
+     * @param idSupplied
+     * @param jsonData
+     * @return
+     */
+    @POST
+    @Path("{id}/replaceFiles")
+    @Consumes(MediaType.MULTIPART_FORM_DATA)
+    public Response replaceFilesInDataset(@PathParam("id") String idSupplied,
+                                      @FormDataParam("jsonData") String jsonData) {
+
+        if (!systemConfig.isHTTPUpload()) {
+            return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled"));
+        }
+
+        // -------------------------------------
+        // (1) Get the user from the API key
+        // -------------------------------------
+        User authUser;
+        try {
+            authUser = findUserOrDie();
+        } catch (WrappedResponse ex) {
+            return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.addreplace.error.auth")
+            );
+        }
+
+        // -------------------------------------
+        // (2) Get the Dataset Id
+        // -------------------------------------
+        Dataset dataset;
+
+        try {
+            dataset = findDatasetOrDie(idSupplied);
+        } catch (WrappedResponse wr) {
+            return wr.getResponse();
+        }
+
+        dataset.getLocks().forEach(dl -> {
+            logger.info(dl.toString());
+        });
+
+        //------------------------------------
+        // (2a) Make sure dataset does not have package file
+        // --------------------------------------
+
+        for (DatasetVersion dv : dataset.getVersions()) {
+            if (dv.isHasPackageFile()) {
+                return error(Response.Status.FORBIDDEN,
+                        BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile")
+                );
+            }
+        }
+
+        DataverseRequest dvRequest = createDataverseRequest(authUser);
+
+        AddReplaceFileHelper addFileHelper = new AddReplaceFileHelper(
+                dvRequest,
+                this.ingestService,
+                this.datasetService,
+                this.fileService,
+                this.permissionSvc,
+                this.commandEngine,
+                this.systemConfig
+        );
+
+        return addFileHelper.replaceFiles(jsonData, dataset, authUser);
+
+    }
+
     /**
      * API to find curation assignments and statuses
      *
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java
index d1ecd2d8824..ecb40af19f8 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java
@@ -256,8 +256,7 @@ public Response replaceFileInDataset(
                                                 this.fileService,
                                                 this.permissionSvc,
                                                 this.commandEngine,
-                                                this.systemConfig,
-                                                this.licenseSvc);
+                                                this.systemConfig);
 
         // (5) Run "runReplaceFileByDatasetId"
         long fileToReplaceId = 0;
diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
index 8e7922fd83b..207f1e309be 100644
--- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
+++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
@@ -26,20 +26,22 @@
 import edu.harvard.iq.dataverse.engine.command.impl.RestrictFileCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand;
 import edu.harvard.iq.dataverse.ingest.IngestServiceBean;
-import edu.harvard.iq.dataverse.license.LicenseServiceBean;
 import edu.harvard.iq.dataverse.util.BundleUtil;
 import edu.harvard.iq.dataverse.util.FileUtil;
 import edu.harvard.iq.dataverse.util.SystemConfig;
 import edu.harvard.iq.dataverse.util.file.CreateDataFileResult;
 import edu.harvard.iq.dataverse.util.json.JsonPrinter;
+import edu.harvard.iq.dataverse.util.json.JsonUtil;
+
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.StringReader;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Map;
 import java.util.Objects;
 import java.util.Set;
 import java.util.logging.Level;
@@ -47,10 +49,10 @@
 import javax.ejb.EJBException;
 import javax.json.Json;
 import javax.json.JsonArrayBuilder;
+import javax.json.JsonNumber;
 import javax.json.JsonObject;
 import javax.json.JsonArray;
 import javax.json.JsonObjectBuilder;
-import javax.json.JsonReader;
 import javax.validation.ConstraintViolation;
 import javax.ws.rs.core.MediaType;
 import javax.ws.rs.core.Response;
@@ -114,10 +116,9 @@ public class AddReplaceFileHelper{
     public static String FILE_ADD_OPERATION = "FILE_ADD_OPERATION";
     public static String FILE_REPLACE_OPERATION = "FILE_REPLACE_OPERATION";
     public static String FILE_REPLACE_FORCE_OPERATION = "FILE_REPLACE_FORCE_OPERATION";
-    public static String MULTIPLEFILES_ADD_OPERATION = "MULTIPLEFILES_ADD_OPERATION";
-            
+
     private String currentOperation;
-    
+    boolean multifile = false;
     // -----------------------------------
     // All the needed EJBs, passed to the constructor
     // -----------------------------------
@@ -127,8 +128,6 @@ public class AddReplaceFileHelper{
     private PermissionServiceBean permissionService;
     private EjbDataverseEngine commandEngine;
     private SystemConfig systemConfig;
-    private LicenseServiceBean licenseServiceBean;
-
     // -----------------------------------
     // Instance variables directly added
     // -----------------------------------
@@ -144,10 +143,6 @@ public class AddReplaceFileHelper{
     // -- Optional  
     private DataFile fileToReplace;             // step 25
     
-    // -----------------------------------
-    // Instance variables derived from other input
-    // -----------------------------------
-    private User user;
     private DatasetVersion workingVersion;
     private DatasetVersion clone;
     List<DataFile> initialFileList; 
@@ -256,13 +251,12 @@ public void resetFileHelper(){
      * @param dvRequest 
      */
     public AddReplaceFileHelper(DataverseRequest dvRequest, 
-                            IngestServiceBean ingestService,                            
+                            IngestServiceBean ingestService,
                             DatasetServiceBean datasetService,
                             DataFileServiceBean fileService,
                             PermissionServiceBean permissionService,
                             EjbDataverseEngine commandEngine,
-                            SystemConfig systemConfig,
-                            LicenseServiceBean licenseServiceBean){
+                            SystemConfig systemConfig){
 
         // ---------------------------------
         // make sure DataverseRequest isn't null and has a user
@@ -304,16 +298,12 @@ public AddReplaceFileHelper(DataverseRequest dvRequest,
         this.permissionService = permissionService;
         this.commandEngine = commandEngine;
         this.systemConfig = systemConfig;
-        this.licenseServiceBean = licenseServiceBean;
-
-        
-        
         initErrorHandling();
         
         // Initiate instance vars
         this.dataset = null;
         this.dvRequest = dvRequest;
-        this.user = dvRequest.getUser();
+        dvRequest.getUser();
         
     }
 
@@ -336,7 +326,7 @@ public boolean runAddFileByDataset(Dataset chosenDataset,
 
     }
 
-    public boolean runAddFileByDataset(Dataset chosenDataset,
+    private boolean runAddFileByDataset(Dataset chosenDataset,
                                        String newFileName,
                                        String newFileContentType,
                                        String newStorageIdentifier,
@@ -348,12 +338,8 @@ public boolean runAddFileByDataset(Dataset chosenDataset,
 
         initErrorHandling();
 
-        if(multipleFiles) {
-            this.currentOperation = MULTIPLEFILES_ADD_OPERATION;
-        }
-        else {
-            this.currentOperation = FILE_ADD_OPERATION;
-        }
+        multifile=multipleFiles;
+        this.currentOperation = FILE_ADD_OPERATION;
 
         if (!this.step_001_loadDataset(chosenDataset)){
             return false;
@@ -393,6 +379,11 @@ public boolean runAddFile(Dataset dataset,
     }*/
     
 
+    public boolean runForceReplaceFile(long fileToReplaceId, String newFilename, String newFileContentType,
+        String newStorageIdentifier, InputStream newFileInputStream, OptionalFileParams optionalFileParams) {
+        return runForceReplaceFile(fileToReplaceId, newFilename, newFileContentType,
+                newStorageIdentifier, newFileInputStream, optionalFileParams, false);
+    }
     /**
      * After the constructor, this method is called to replace a file
      * 
@@ -403,16 +394,18 @@ public boolean runAddFile(Dataset dataset,
      * @param newFileInputStream
      * @return 
      */
-    public boolean runForceReplaceFile(Long oldFileId,
+    private boolean runForceReplaceFile(Long oldFileId,
                         String newFileName, 
                         String newFileContentType, 
                         String newStorageIdentifier,
                         InputStream newFileInputStream,
-                        OptionalFileParams optionalFileParams){
+                        OptionalFileParams optionalFileParams,
+                        boolean multipleFiles){
         
         msgt(">> runForceReplaceFile");
         initErrorHandling();
 
+        multifile=multipleFiles;
         this.currentOperation = FILE_REPLACE_FORCE_OPERATION;
 
                
@@ -432,16 +425,25 @@ public boolean runForceReplaceFile(Long oldFileId,
     }
     
 
-	public boolean runReplaceFile(Long oldFileId,
+    public boolean runReplaceFile(long fileToReplaceId, String newFilename, String newFileContentType,
+            String newStorageIdentifier, InputStream newFileInputStream, OptionalFileParams optionalFileParams) {
+        return runReplaceFile(fileToReplaceId, newFilename, newFileContentType,
+                newStorageIdentifier, newFileInputStream, optionalFileParams, false);
+        
+    }
+    
+    private boolean runReplaceFile(Long oldFileId,
                             String newFileName, 
                             String newFileContentType, 
                             String newStorageIdentifier, 
                             InputStream newFileInputStream,
-                            OptionalFileParams optionalFileParams){
+                            OptionalFileParams optionalFileParams,
+                            boolean multipleFiles){
     
         msgt(">> runReplaceFile");
 
         initErrorHandling();
+        multifile=multipleFiles;
         this.currentOperation = FILE_REPLACE_OPERATION;
         
         if (oldFileId==null){
@@ -759,19 +761,15 @@ private boolean runAddReplacePhase2(boolean tabIngest){
             return false;
             
         }
-        
-        if (this.isFileReplaceOperation()){
+        if (this.isFileReplaceOperation()) {
             msgt("step_080_run_update_dataset_command_for_replace");
-            if (!this.step_080_run_update_dataset_command_for_replace()){
-                return false;            
+            if (!this.step_080_run_update_dataset_command_for_replace()) {
+                return false;
             }
-            
-        }else{
+        } else if (!multifile) {
             msgt("step_070_run_update_dataset_command");
-            if (!this.isMultipleFilesAddOperation()) {
-                if (!this.step_070_run_update_dataset_command()) {
-                    return false;
-                }
+            if (!this.step_070_run_update_dataset_command()) {
+                return false;
             }
         }
 
@@ -834,16 +832,6 @@ public boolean isFileAddOperation(){
         return this.currentOperation.equals(FILE_ADD_OPERATION);
     }
 
-    /**
-     * Is this a multiple files add operation ?
-     * @return
-     */
-
-    public boolean isMultipleFilesAddOperation(){
-
-        return this.currentOperation.equals(MULTIPLEFILES_ADD_OPERATION);
-    }
-
     /**
      * Initialize error handling vars
      */
@@ -1201,7 +1189,10 @@ private boolean step_030_createNewFilesViaIngest(){
 
         // Load the working version of the Dataset
         workingVersion = dataset.getEditVersion();
-        clone =   workingVersion.cloneDatasetVersion();
+        if(!multifile) {
+            //Don't repeatedly update the clone (losing changes) in multifile case
+            clone = workingVersion.cloneDatasetVersion();
+        }
         try {
             CreateDataFileResult result = FileUtil.createDataFiles(workingVersion,
                     this.newFileInputStream,
@@ -1292,9 +1283,6 @@ private boolean step_040_auto_checkForDuplicates(){
         // Initialize new file list
         this.finalFileList = new ArrayList<>();
 
-        String warningMessage  = null;
-        
-
         if (isFileReplaceOperation() && this.fileToReplace == null){
             // This error shouldn't happen if steps called correctly
             this.addErrorSevere(getBundleErr("existing_file_to_replace_is_null") + " (This error shouldn't happen if steps called in sequence....checkForFileReplaceDuplicate)");
@@ -1511,10 +1499,7 @@ private boolean step_050_checkForConstraintViolations(){
             return true;
         }
         
-        // -----------------------------------------------------------   
-        // violations found: gather all error messages
-        // -----------------------------------------------------------   
-        List<String> errMsgs = new ArrayList<>();
+        new ArrayList<>();
         for (ConstraintViolation violation : constraintViolations) {
             /*
             for 8859 return conflict response status if the validation fails
@@ -1605,70 +1590,81 @@ private boolean step_060_addFilesViaIngestService(boolean tabIngest){
         return true;
     }
     
+    List<FileMetadata> filesToDelete = new ArrayList<FileMetadata>();
+    Map<Long, String> deleteFileStorageLocations = new HashMap<>();
     
     /**
      * Create and run the update dataset command
      * 
      * @return 
      */
-    private boolean step_070_run_update_dataset_command(){
-        
-        if (this.hasError()){
+    private boolean step_070_run_update_dataset_command() {
+        //Note -only single file operations and multifile replace call this, multifile add does not
+        if (this.hasError()) {
             return false;
         }
 
-        Command<Dataset> update_cmd;
+        Command<Dataset> update_cmd = null;
         String deleteStorageLocation = null;
-        long deleteFileId=-1;
-        if(isFileReplaceOperation()) {
-            List<FileMetadata> filesToDelete = new ArrayList<FileMetadata>();
+        long deleteFileId = -1;
+        if (isFileReplaceOperation()) {
+            if (!multifile) {
+                filesToDelete.clear();
+                deleteFileStorageLocations.clear();
+            }
             filesToDelete.add(fileToReplace.getFileMetadata());
-            
-            if(!fileToReplace.isReleased()) {
-                //If file is only in draft version, also need to delete the physical file
-            deleteStorageLocation = fileService.getPhysicalFileToDelete(fileToReplace);
-            deleteFileId=fileToReplace.getId();
+
+            if (!fileToReplace.isReleased()) {
+                // If file is only in draft version, also need to delete the physical file
+                deleteStorageLocation = fileService.getPhysicalFileToDelete(fileToReplace);
+                deleteFileId = fileToReplace.getId();
+                deleteFileStorageLocations.put(deleteFileId, deleteStorageLocation);
+            }
+            if (!multifile) {
+                // Adding the file to the delete list for the command will delete this
+                // filemetadata and, if the file hasn't been released, the datafile itself.
+                update_cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, filesToDelete, clone);
             }
-            //Adding the file to the delete list for the command will delete this filemetadata and, if the file hasn't been released, the datafile itself. 
-            update_cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, filesToDelete, clone);
         } else {
-          update_cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, clone);
+            update_cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, clone);
         }
-        ((UpdateDatasetVersionCommand) update_cmd).setValidateLenient(true);  
-        
-        try {            
-            // Submit the update dataset command 
-            // and update the local dataset object
-            //
-            dataset = commandEngine.submit(update_cmd);
-        } catch (CommandException ex) {
-            /**
-             * @todo Add a test to exercise this error.
-             */
-            this.addErrorSevere(getBundleErr("add.add_file_error"));
-            logger.severe(ex.getMessage());
-            return false;
-        }catch (EJBException ex) {
-            /**
-             * @todo Add a test to exercise this error.
-             */
-            this.addErrorSevere("add.add_file_error (see logs)");
-            logger.severe(ex.getMessage());
-            return false;
+        if (!multifile) {
+            //Avoid NPE in multifile replace case
+            ((UpdateDatasetVersionCommand) update_cmd).setValidateLenient(true);
         }
-        //Sanity check
-        if(isFileReplaceOperation()) {
-            if (deleteStorageLocation != null) {
-                // Finalize the delete of the physical file 
-                // (File service will double-check that the datafile no 
-                // longer exists in the database, before proceeding to 
-                // delete the physical file)
-                try {
-                    fileService.finalizeFileDelete(deleteFileId, deleteStorageLocation);
-                } catch (IOException ioex) {
-                    logger.warning("Failed to delete the physical file associated with the deleted datafile id="
-                            + deleteFileId + ", storage location: " + deleteStorageLocation);
-                }
+        if (!multifile) {
+            try {
+                // Submit the update dataset command
+                // and update the local dataset object
+                //
+                dataset = commandEngine.submit(update_cmd);
+            } catch (CommandException ex) {
+                /**
+                 * @todo Add a test to exercise this error.
+                 */
+                this.addErrorSevere(getBundleErr("add.add_file_error"));
+                logger.severe(ex.getMessage());
+                return false;
+            } catch (EJBException ex) {
+                /**
+                 * @todo Add a test to exercise this error.
+                 */
+                this.addErrorSevere("add.add_file_error (see logs)");
+                logger.severe(ex.getMessage());
+                return false;
+            }
+        }
+
+        if (isFileReplaceOperation() && !multifile) {
+            // Finalize the delete of the physical file
+            // (File service will double-check that the datafile no
+            // longer exists in the database, before proceeding to
+            // delete the physical file)
+            try {
+                fileService.finalizeFileDelete(deleteFileId, deleteStorageLocation);
+            } catch (IOException ioex) {
+                logger.warning("Failed to delete the physical file associated with the deleted datafile id="
+                        + deleteFileId + ", storage location: " + deleteStorageLocation);
             }
         }
         return true;
@@ -1766,7 +1762,7 @@ private boolean step_080_run_update_dataset_command_for_replace(){
             }
 
             /*
-             * Go through the final file list, settting the rootFileId and previousFileId
+             * Go through the final file list, setting the rootFileId and previousFileId
              */
             for (DataFile df : finalFileList) {
                 df.setPreviousDataFileId(fileToReplace.getId());
@@ -1927,7 +1923,7 @@ private boolean step_100_startIngestJobs(){
             //return true;
         //}
 
-        if (!this.isMultipleFilesAddOperation()) {
+        if (!multifile) {
             msg("pre ingest start");
             // start the ingest!
             ingestService.startIngestJobsForDataset(dataset, dvRequest.getAuthenticatedUser());
@@ -2021,6 +2017,13 @@ public void setDuplicateFileWarning(String duplicateFileWarning) {
         this.duplicateFileWarning = duplicateFileWarning;
     }
 
+    /** Add multiple pre-positioned files listed in the jsonData. Works with direct upload, Globus, and other out-of-band methods.
+     * 
+     * @param jsonData - an array of jsonData entries (one per file) using the single add file jsonData format
+     * @param dataset
+     * @param authUser
+     * @return
+     */
     public Response addFiles(String jsonData, Dataset dataset, User authUser) {
         msgt("(addFilesToDataset) jsonData: " + jsonData.toString());
 
@@ -2033,15 +2036,14 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) {
         // -----------------------------------------------------------
         // Read jsonData and Parse files information from jsondata  :
         // -----------------------------------------------------------
-        try (StringReader rdr = new StringReader(jsonData)) {
-            JsonReader dbJsonReader = Json.createReader(rdr);
-            filesJson = dbJsonReader.readArray();
-            dbJsonReader.close();
+        try {
+            filesJson = JsonUtil.getJsonArray(jsonData);
 
 
             if (filesJson != null) {
                 totalNumberofFiles = filesJson.getValuesAs(JsonObject.class).size();
-
+                workingVersion = dataset.getEditVersion();
+                clone = workingVersion.cloneDatasetVersion();
                 for (JsonObject fileJson : filesJson.getValuesAs(JsonObject.class)) {
 
                     OptionalFileParams optionalFileParams = null;
@@ -2131,7 +2133,7 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) {
                 }
 
                 try {
-                    Command<Dataset> cmd = new UpdateDatasetVersionCommand(dataset, dvRequest);
+                    Command<Dataset> cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, clone);
                     ((UpdateDatasetVersionCommand) cmd).setValidateLenient(true);
                     commandEngine.submit(cmd);
                 } catch (CommandException ex) {
@@ -2140,9 +2142,6 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) {
 
                 dataset = datasetService.find(dataset.getId());
 
-                List<DataFile> s = dataset.getFiles();
-                for (DataFile dataFile : s) {
-                }
                 //ingest job
                 ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) authUser);
 
@@ -2166,6 +2165,184 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) {
                 .add("status", STATUS_OK)
                 .add("data", Json.createObjectBuilder().add("Files", jarr).add("Result", result)).build() ).build();
     }
+    
+    /**
+     * Replace multiple files with prepositioned replacements as listed in the
+     * jsonData. Works with direct upload, Globus, and other out-of-band methods.
+     * 
+     * @param jsonData - must include fileToReplaceId key with file ID and may include forceReplace key with true/false(default) 
+     * @param dataset
+     * @param authUser
+     * @return
+     */
+    
+    public Response replaceFiles(String jsonData, Dataset dataset, User authUser) {
+        msgt("(replaceFilesInDataset) jsonData: " + jsonData.toString());
+
+        JsonArrayBuilder jarr = Json.createArrayBuilder();
+
+        JsonArray filesJson = null;
+
+        int totalNumberofFiles = 0;
+        int successNumberofFiles = 0;
+        // -----------------------------------------------------------
+        // Read jsonData and Parse files information from jsondata  :
+        // -----------------------------------------------------------
+        try {
+            filesJson = JsonUtil.getJsonArray(jsonData);
+
+
+            if (filesJson != null) {
+                totalNumberofFiles = filesJson.getValuesAs(JsonObject.class).size();
+                workingVersion = dataset.getEditVersion();
+                clone = workingVersion.cloneDatasetVersion();
+                for (JsonObject fileJson : filesJson.getValuesAs(JsonObject.class)) {
+                    boolean forceReplace = false;
+                    // (2a) Check for optional "forceReplace"
+                    if ((fileJson.containsKey("forceReplace"))) {
+                        forceReplace = fileJson.getBoolean("forceReplace", false);
+                    }
+                    long fileToReplaceId = -1;
+                    JsonNumber ftri = fileJson.getJsonNumber("fileToReplaceId");
+                    if(ftri !=null) {
+                        fileToReplaceId = ftri.longValueExact();
+                    }
+                    
+                    OptionalFileParams optionalFileParams = null;
+                    try {
+                        // (2b) Load up optional params via JSON
+                        //  - Will skip extra attributes which includes fileToReplaceId and forceReplace
+                        optionalFileParams = new OptionalFileParams(fileJson.toString());
+
+                        String newFilename = null;
+                        String newFileContentType = null;
+                        String newStorageIdentifier = null;
+                        if ((fileToReplaceId !=-1) && optionalFileParams.hasStorageIdentifier()) {
+                            newStorageIdentifier = optionalFileParams.getStorageIdentifier();
+                            newStorageIdentifier = DataAccess.expandStorageIdentifierIfNeeded(newStorageIdentifier);
+                            if(!DataAccess.uploadToDatasetAllowed(dataset,  newStorageIdentifier)) {
+                                addErrorSevere("Dataset store configuration does not allow provided storageIdentifier.");
+                            }
+                            if (optionalFileParams.hasFileName()) {
+                                newFilename = optionalFileParams.getFileName();
+                                if (optionalFileParams.hasMimetype()) {
+                                    newFileContentType = optionalFileParams.getMimeType();
+                                }
+                            }
+
+                            msgt("REPLACE!  = " + newFilename);
+                            if (!hasError()) {
+                                if (forceReplace){
+                                    runForceReplaceFile(fileToReplaceId,
+                                                            newFilename,
+                                                            newFileContentType,
+                                                            newStorageIdentifier,
+                                                            null,
+                                                            optionalFileParams, true);
+                                }else{
+                                    runReplaceFile(fileToReplaceId,
+                                                            newFilename,
+                                                            newFileContentType,
+                                                            newStorageIdentifier,
+                                                            null,
+                                                            optionalFileParams, true);
+                                }
+                            }
+                            if (hasError()) {
+                                JsonObjectBuilder fileoutput = Json.createObjectBuilder()
+                                        .add("storageIdentifier", newStorageIdentifier)
+                                        .add("errorMessage", getHttpErrorCode().toString() +":"+ getErrorMessagesAsString("\n"))
+                                        .add("fileDetails", fileJson);
+                                jarr.add(fileoutput);
+                            } else {
+                                JsonObject successresult = getSuccessResultAsJsonObjectBuilder().build();
+                                String duplicateWarning = getDuplicateFileWarning();
+
+                                if (duplicateWarning != null && !duplicateWarning.isEmpty()) {
+                                    JsonObjectBuilder fileoutput = Json.createObjectBuilder()
+                                            .add("storageIdentifier", newStorageIdentifier)
+                                            .add("warningMessage", getDuplicateFileWarning())
+                                            .add("fileDetails", successresult.getJsonArray("files").getJsonObject(0));
+                                    jarr.add(fileoutput);
+                                } else {
+                                    JsonObjectBuilder fileoutput = Json.createObjectBuilder()
+                                            .add("storageIdentifier", newStorageIdentifier)
+                                            .add("successMessage", "Replaced successfully in the dataset")
+                                            .add("fileDetails", successresult.getJsonArray("files").getJsonObject(0));
+                                    jarr.add(fileoutput);
+                                }
+                            successNumberofFiles = successNumberofFiles + 1;
+                            }
+                        } else {
+                            JsonObjectBuilder fileoutput = Json.createObjectBuilder()
+                                    .add("errorMessage", "You must provide a fileToReplaceId, storageidentifier, filename, and mimetype.")
+                                    .add("fileDetails", fileJson);
+
+                            jarr.add(fileoutput);
+                        }
+
+                    } catch (DataFileTagException ex) {
+                        Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, ex);
+                        JsonObjectBuilder fileoutput = Json.createObjectBuilder()
+                                .add("errorCode", Response.Status.BAD_REQUEST.getStatusCode())
+                                .add("message", ex.getMessage())
+                                .add("fileDetails", fileJson);
+                        jarr.add(fileoutput);
+
+                    }
+                    catch (NoFilesException ex) {
+                        Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, ex);
+                        JsonObjectBuilder fileoutput = Json.createObjectBuilder()
+                                .add("errorCode", Response.Status.BAD_REQUEST.getStatusCode())
+                                .add("message", BundleUtil.getStringFromBundle("NoFileException!  Serious Error! See administrator!"))
+                                .add("fileDetails", fileJson);
+                        jarr.add(fileoutput);
+                    }
+
+                }// End of adding files
+
+                DatasetLock eipLock = dataset.getLockFor(DatasetLock.Reason.EditInProgress);
+                if (eipLock == null) {
+                    logger.warning("Dataset not locked for EditInProgress ");
+                } else {
+                    datasetService.removeDatasetLocks(dataset, DatasetLock.Reason.EditInProgress);
+                    logger.info("Removed EditInProgress lock ");
+                }
+
+                try {
+                    Command<Dataset> cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, filesToDelete, clone);
+                    ((UpdateDatasetVersionCommand) cmd).setValidateLenient(true);
+                    commandEngine.submit(cmd);
+                } catch (CommandException ex) {
+                    return error(Response.Status.INTERNAL_SERVER_ERROR, "CommandException updating DatasetVersion from addFiles job: " + ex.getMessage());
+                }
+
+                fileService.finalizeFileDeletes(deleteFileStorageLocations);
+                
+                dataset = datasetService.find(dataset.getId());
+
+                //ingest job
+                ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) authUser);
+
+            }
+        }
+        catch ( javax.json.stream.JsonParsingException ex) {
+            ex.printStackTrace();
+            return error(BAD_REQUEST, "Json Parsing Exception :" + ex.getMessage());
+        }
+        catch (Exception e) {
+            e.printStackTrace();
+            return error(BAD_REQUEST, e.getMessage());
+        }
+
+        JsonObjectBuilder result = Json.createObjectBuilder()
+                .add("Total number of files", totalNumberofFiles)
+                .add("Number of files successfully replaced", successNumberofFiles);
+
+        return Response.ok().entity(Json.createObjectBuilder()
+                .add("status", STATUS_OK)
+                .add("data", Json.createObjectBuilder().add("Files", jarr).add("Result", result)).build() ).build();
+    }
 
     protected static Response error(Response.Status sts, String msg ) {
         return Response.status(sts)

From e6bd5b3d63f4655a48080cdcda284e7507f9fd3f Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 30 Sep 2022 10:27:54 -0400
Subject: [PATCH 078/173] docs

---
 .../developers/s3-direct-upload-api.rst       | 38 ++++++++++++++++++-
 1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst b/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst
index 3dc73ce6a0c..b29b3421900 100644
--- a/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst
+++ b/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst
@@ -122,7 +122,7 @@ To add multiple Uploaded Files to the Dataset
 ---------------------------------------------
 
 Once the files exists in the s3 bucket, a final API call is needed to add all the files to the Dataset. In this API call, additional metadata is added using the "jsonData" parameter.
-jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must also include values for:
+jsonData for this call is an array of objects that normally include information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must also include values for:
 
 * "description" - A description of the file
 * "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset
@@ -154,7 +154,7 @@ Replacing an existing file in the Dataset
 -----------------------------------------
 
 Once the file exists in the s3 bucket, a final API call is needed to register it as a replacement of an existing file. This call is the same call used to replace a file to a Dataverse installation but, rather than sending the file bytes, additional metadata is added using the "jsonData" parameter.
-jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, whether to allow the mimetype to change (forceReplace=true), etc. For direct uploads, the jsonData object must also include values for:
+jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, whether to allow the mimetype to change (forceReplace=true), etc. For direct uploads, the jsonData object must include values for:
 
 * "storageIdentifier" - String, as specified in prior calls
 * "fileName" - String
@@ -178,3 +178,37 @@ Note that the API call does not validate that the file matches the hash value su
   
 Note that this API call can be used independently of the others, e.g. supporting use cases in which the file already exists in S3/has been uploaded via some out-of-band method. 
 With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifer must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above.
+
+Replacing multiple existing files in the Dataset
+------------------------------------------------
+
+Once the replacement files exist in the s3 bucket, a final API call is needed to register them as replacements for existing files. In this API call, additional metadata is added using the "jsonData" parameter.
+jsonData for this call is array of objects that normally include information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must include some additional values:
+
+* "fileToReplaceId" - the id of the file being replaced
+* "forceReplace" - whether to replace a file with one of a different mimetype (optional, default is false)
+* "description" - A description of the file
+* "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset
+* "storageIdentifier" - String
+* "fileName" - String
+* "mimeType" - String
+* "fixity/checksum" either:
+
+  * "md5Hash" - String with MD5 hash value, or
+  * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings
+
+
+The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512
+
+.. code-block:: bash
+
+  export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+  export SERVER_URL=https://demo.dataverse.org
+  export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV
+  export JSON_DATA="[{'fileToReplaceId': 10, 'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42', 'fileName':'file1.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123456'}}, \
+                      {'fileToReplaceId': 10, 'forceReplace': true, 'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53', 'fileName':'file2.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123789'}}]"
+
+  curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/replaceFiles?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA"
+
+Note that this API call can be used independently of the others, e.g. supporting use cases in which the files already exists in S3/has been uploaded via some out-of-band method.
+With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifer must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above.

From 088cf8ac0248466b03bc2ae07e6c1d1439154f62 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 30 Sep 2022 10:31:24 -0400
Subject: [PATCH 079/173] release note

---
 doc/release-notes/9005-replaceFiles-api-call | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 doc/release-notes/9005-replaceFiles-api-call

diff --git a/doc/release-notes/9005-replaceFiles-api-call b/doc/release-notes/9005-replaceFiles-api-call
new file mode 100644
index 00000000000..b1df500251e
--- /dev/null
+++ b/doc/release-notes/9005-replaceFiles-api-call
@@ -0,0 +1,3 @@
+9005
+
+DIrect upload and out-of-band uploads can now be used to replace multiple files with one API call (complementing the prior ability to add multiple new files)
\ No newline at end of file

From 4ffccdb08675f92b3f6e2c46059b9d75ba97b077 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 30 Sep 2022 13:43:33 -0400
Subject: [PATCH 080/173] fix replaceFiles and remove hasError checks that
 block further changes

hasError is not  cleared where it was being used causing one error to
skip all further add/replace calls and report that error for all
subsequent files
---
 .../datasetutility/AddReplaceFileHelper.java  | 32 +++++++------------
 1 file changed, 11 insertions(+), 21 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
index 207f1e309be..efb05558b40 100644
--- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
+++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
@@ -2067,10 +2067,9 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) {
                             }
 
                             msgt("ADD!  = " + newFilename);
-                            if (!hasError()) {
-                                runAddFileByDataset(dataset, newFilename, newFileContentType, newStorageIdentifier,
-                                        null, optionalFileParams, true);
-                            }
+
+                            runAddFileByDataset(dataset, newFilename, newFileContentType, newStorageIdentifier, null,
+                                    optionalFileParams, true);
                             if (hasError()) {
                                 JsonObjectBuilder fileoutput = Json.createObjectBuilder()
                                         .add("storageIdentifier", newStorageIdentifier)
@@ -2176,9 +2175,10 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) {
      * @return
      */
     
-    public Response replaceFiles(String jsonData, Dataset dataset, User authUser) {
+    public Response replaceFiles(String jsonData, Dataset ds, User authUser) {
         msgt("(replaceFilesInDataset) jsonData: " + jsonData.toString());
 
+        this.dataset = ds;
         JsonArrayBuilder jarr = Json.createArrayBuilder();
 
         JsonArray filesJson = null;
@@ -2231,22 +2231,12 @@ public Response replaceFiles(String jsonData, Dataset dataset, User authUser) {
                             }
 
                             msgt("REPLACE!  = " + newFilename);
-                            if (!hasError()) {
-                                if (forceReplace){
-                                    runForceReplaceFile(fileToReplaceId,
-                                                            newFilename,
-                                                            newFileContentType,
-                                                            newStorageIdentifier,
-                                                            null,
-                                                            optionalFileParams, true);
-                                }else{
-                                    runReplaceFile(fileToReplaceId,
-                                                            newFilename,
-                                                            newFileContentType,
-                                                            newStorageIdentifier,
-                                                            null,
-                                                            optionalFileParams, true);
-                                }
+                            if (forceReplace) {
+                                runForceReplaceFile(fileToReplaceId, newFilename, newFileContentType,
+                                        newStorageIdentifier, null, optionalFileParams, true);
+                            } else {
+                                runReplaceFile(fileToReplaceId, newFilename, newFileContentType, newStorageIdentifier,
+                                        null, optionalFileParams, true);
                             }
                             if (hasError()) {
                                 JsonObjectBuilder fileoutput = Json.createObjectBuilder()

From 9d2fc0585c136c21109fb624002438d562246c75 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 30 Sep 2022 13:45:07 -0400
Subject: [PATCH 081/173] relocate/rename entry for the /addFiles,
 /replaceFiles in native-api

the title Add File Metadata has been misunderstood to mean the call can
change the metadata for existing files which it can't. The entry was
also in the File section when it is a dataset-level call
---
 doc/sphinx-guides/source/api/native-api.rst | 49 +++------------------
 1 file changed, 7 insertions(+), 42 deletions(-)

diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst
index 93e1c36f179..e634bee37c9 100644
--- a/doc/sphinx-guides/source/api/native-api.rst
+++ b/doc/sphinx-guides/source/api/native-api.rst
@@ -1511,6 +1511,13 @@ The fully expanded example above (without environment variables) looks like this
 
   curl -H X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST https://demo.dataverse.org/api/datasets/:persistentId/add?persistentId=doi:10.5072/FK2/J8SJZB -F 'jsonData={"description":"A remote image.","storageIdentifier":"trsa://themes/custom/qdr/images/CoreTrustSeal-logo-transparent.png","checksumType":"MD5","md5Hash":"509ef88afa907eaf2c17c1c8d8fde77e","label":"testlogo.png","fileName":"testlogo.png","mimeType":"image/png"}'
 
+Adding Files To a Dataset via Other Tools
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In some circumstances, it may be useful to move or copy files into Dataverse's storage manually or via external tools and then add then to a dataset (i.e. without involving Dataverse in the file transfer itself). 
+Two API calls are available for this use case to add files to a dataset or to replace files that were already in the dataset.
+These calls were developed as part of Dataverse's direct upload mechanism and are detailed in :doc:`/developers/s3-direct-upload-api`.
+
 Report the data (file) size of a Dataset
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -2348,48 +2355,6 @@ The fully expanded example above (without environment variables) looks like this
 Note: The ``id`` returned in the json response is the id of the file metadata version.
 
 
-
-Adding File Metadata
-~~~~~~~~~~~~~~~~~~~~
-
-This API call requires a ``jsonString`` expressing the metadata of multiple files. It adds file metadata to the database table where the file has already been copied to the storage.
-
-The jsonData object includes values for:
-
-* "description" - A description of the file
-* "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset
-* "storageIdentifier" - String
-* "fileName" - String
-* "mimeType" - String
-* "fixity/checksum" either:
-
-  * "md5Hash" - String with MD5 hash value, or
-  * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings
-
-.. note:: See :ref:`curl-examples-and-environment-variables` if you are unfamiliar with the use of ``export`` below.
-
-A curl example using an ``PERSISTENT_ID``
-
-* ``SERVER_URL`` - e.g. https://demo.dataverse.org
-* ``API_TOKEN`` - API endpoints require an API token that can be passed as the X-Dataverse-key HTTP header.  For more details, see the :doc:`auth` section.
-* ``PERSISTENT_IDENTIFIER`` - Example: ``doi:10.5072/FK2/7U7YBV``
-
-.. code-block:: bash
-
-  export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
-  export SERVER_URL=https://demo.dataverse.org
-  export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV
-  export JSON_DATA="[{'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42', 'fileName':'file1.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123456'}}, \
-                      {'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53', 'fileName':'file2.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123789'}}]"
-
-  curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/addFiles?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA"
-
-The fully expanded example above (without environment variables) looks like this:
-
-.. code-block:: bash
-
-  curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST https://demo.dataverse.org/api/datasets/:persistentId/addFiles?persistentId=doi:10.5072/FK2/7U7YBV -F jsonData='[{"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123456"}}, {"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123789"}}]'
-
 Updating File Metadata
 ~~~~~~~~~~~~~~~~~~~~~~
 

From fcf107279dcc0c7b208c68d5b2fd9deb3d0d11cd Mon Sep 17 00:00:00 2001
From: j-n-c <josenscarvalho@gmail.com>
Date: Tue, 18 Oct 2022 16:32:44 +0100
Subject: [PATCH 082/173] #9074 - Added support for building sphynx docs using
 python 3.10+

---
 doc/sphinx-guides/requirements.txt | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/doc/sphinx-guides/requirements.txt b/doc/sphinx-guides/requirements.txt
index 4488c54cd5e..eb9f952d013 100755
--- a/doc/sphinx-guides/requirements.txt
+++ b/doc/sphinx-guides/requirements.txt
@@ -1,5 +1,7 @@
-# current version as of this writing
-Sphinx==3.5.4
+# Necessary workaround for building Sphynx guides with Python 3.10+ versions
+Sphinx==3.5.4 ; python_version < '3.10'
+Sphinx==5.3.0 ; python_version >= '3.10'
+
 # Necessary workaround for ReadTheDocs for Sphinx 3.x - unnecessary as of Sphinx 4.5+
 Jinja2>=3.0.2,<3.1
 

From 26e9861fe586503b4bd485cdb1c5d1b00fd7662b Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 21 Oct 2022 16:48:11 -0400
Subject: [PATCH 083/173] Add dvwebloader as upload option

---
 .../edu/harvard/iq/dataverse/DatasetPage.java | 27 ++++++++++++++++++-
 .../harvard/iq/dataverse/SettingsWrapper.java |  9 +++++++
 .../settings/SettingsServiceBean.java         |  4 ++-
 .../iq/dataverse/util/SystemConfig.java       | 12 ++++++++-
 src/main/java/propertyFiles/Bundle.properties |  4 +++
 src/main/webapp/editFilesFragment.xhtml       |  6 ++++-
 src/main/webapp/resources/css/structure.css   |  1 +
 7 files changed, 59 insertions(+), 4 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
index 0a8db69bf5b..750636fec45 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
@@ -56,6 +56,7 @@
 
 import edu.harvard.iq.dataverse.util.StringUtil;
 import edu.harvard.iq.dataverse.util.SystemConfig;
+import edu.harvard.iq.dataverse.util.URLTokenUtil;
 import edu.harvard.iq.dataverse.validation.URLValidator;
 import edu.harvard.iq.dataverse.workflows.WorkflowComment;
 
@@ -1845,7 +1846,9 @@ public boolean globusUploadSupported() {
         return settingsWrapper.isGlobusUpload() && settingsWrapper.isGlobusEnabledStorageDriver(dataset.getEffectiveStorageDriverId());
     }
     
-    
+    public boolean webloaderUploadSupported() {
+        return settingsWrapper.isWebloaderUpload() && StorageIO.isDirectUploadEnabled(dataset.getEffectiveStorageDriverId());
+    }
 
     private String init(boolean initFull) {
 
@@ -6062,4 +6065,26 @@ public void startGlobusTransfer() {
         }
         PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken));
     }
+
+    public String getWebloaderUrlForDataset(Dataset d) {
+        String localeCode = session.getLocaleCode();
+        ApiToken apiToken = null;
+        User user = session.getUser();
+
+        if (user instanceof AuthenticatedUser) {
+            apiToken = authService.findApiTokenByUser((AuthenticatedUser) user);
+
+            if ((apiToken == null) || (apiToken.getExpireTime().before(new Date()))) {
+                logger.fine("Created apiToken for user: " + user.getIdentifier());
+                apiToken = authService.generateApiTokenForUser((AuthenticatedUser) user);
+            }
+        }
+        // Use URLTokenUtil for params currently in common with external tools.
+        URLTokenUtil tokenUtil = new URLTokenUtil(d, apiToken, localeCode);
+        String appUrl;
+        appUrl = settingsService.getValueForKey(SettingsServiceBean.Key.WebloaderUrl)
+                + "?datasetPid={datasetPid}&siteUrl={siteUrl}&key={apiToken}&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}";
+        return tokenUtil.replaceTokensWithValues(appUrl);
+    }
+
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java
index aa40423000d..bf36f265743 100644
--- a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java
+++ b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java
@@ -107,6 +107,8 @@ public class SettingsWrapper implements java.io.Serializable {
     
     private Boolean rsyncOnly = null;
     
+    private Boolean webloaderUpload = null;
+    
     private String metricsUrl = null; 
     
     private Boolean dataFilePIDSequentialDependent = null;
@@ -338,6 +340,13 @@ public String getGlobusAppUrl() {
         
     }
     
+    public boolean isWebloaderUpload() {
+        if (webloaderUpload == null) {
+            webloaderUpload = systemConfig.isWebloaderUpload();
+        }
+        return webloaderUpload;
+    }
+    
     public boolean isRsyncOnly() {
         if (rsyncOnly == null) {
             String downloadMethods = getValueForKey(SettingsServiceBean.Key.DownloadMethods);
diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
index 50e29d2a333..371463fb215 100644
--- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
@@ -563,7 +563,9 @@ Whether Harvesting (OAI) service is enabled
         /*
          * Allow a custom JavaScript to control values of specific fields.
          */
-        ControlledVocabularyCustomJavaScript
+        ControlledVocabularyCustomJavaScript, 
+        
+        WebloaderUrl
         ;
 
         @Override
diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
index 7abd0d02065..62dcbfc8ab0 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
@@ -862,7 +862,13 @@ public enum FileUploadMethods {
          * Upload through Globus of large files
          */
 
-        GLOBUS("globus")
+        GLOBUS("globus"), 
+        
+        /**
+         * Upload folders of files through dvwebloader app
+         */
+
+        WEBLOADER("dvwebloader");
         ;
 
 
@@ -999,6 +1005,10 @@ public boolean isRsyncUpload(){
     public boolean isGlobusUpload(){
         return getMethodAvailable(FileUploadMethods.GLOBUS.toString(), true);
     }
+    
+    public boolean isWebloaderUpload(){
+        return getMethodAvailable(FileUploadMethods.WEBLOADER.toString(), true);
+    }
 
     // Controls if HTTP upload is enabled for both GUI and API.
     public boolean isHTTPUpload(){       
diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties
index 8a4fdeb9e28..1019ec5d3e8 100644
--- a/src/main/java/propertyFiles/Bundle.properties
+++ b/src/main/java/propertyFiles/Bundle.properties
@@ -1671,6 +1671,10 @@ file.finishGlobus=Globus Transfer has finished
 file.downloadFromGlobus=Download through Globus
 file.globus.transfer=Globus Transfer
 file.globus.of=of:
+file.fromWebloader.tip=Upload a folder of files. This method retains the relative path structure on from your local machine. (Using it will cancel any other types of uploads in progress on this page.)
+file.fromWebloaderAfterCreate.tip=This option will be enabled after this dataset is created.
+file.fromWebloader=Upload a Folder
+
 file.api.httpDisabled=File upload via HTTP is not available for this installation of Dataverse.
 file.api.alreadyHasPackageFile=File upload via HTTP disabled since this dataset already contains a package file.
 file.replace.original=Original File
diff --git a/src/main/webapp/editFilesFragment.xhtml b/src/main/webapp/editFilesFragment.xhtml
index 0fd5bf48fb7..40937008ab8 100644
--- a/src/main/webapp/editFilesFragment.xhtml
+++ b/src/main/webapp/editFilesFragment.xhtml
@@ -158,7 +158,11 @@
                                   widgetVar="fileUploadWidget">
                         <f:passThroughAttribute name="aria-label" value="#{bundle['file.uploadFiles']}"/>
                     </p:fileUpload>
-                                
+                                <div jsf:id="webloaderBlock" jsf:rendered="#{webloaderUploadSupported and !lockedFromEdits }"  class="margin-top">
+                                    <p class="help-block" jsf:rendered="#{!createDataset}">#{bundle['file.webloader.tip']}</p>
+                                    <p class="help-block" jsf:rendered="#{createDataset}">#{bundle['file.webloaderAfterCreate.tip']}</p>
+                                    <p:button target="_blank" rendered="#{!createDataset}" onclick="cancelDatasetEdit();window.open('#{DatasetPage.getWebloaderUrlForDataset(dataset)}');return false;" value="#{bundle['file.fromWebloader']}"  icon="webloader-btn" />
+                                </div>
                                 <div jsf:id="dropboxBlock" jsf:rendered="#{settingsWrapper.isHasDropBoxKey() and !lockedFromEdits }"  class="margin-top">
                                     <!-- Dropbox upload widget -->
                                     <p class="help-block">#{bundle['file.fromDropbox.tip']}</p>
diff --git a/src/main/webapp/resources/css/structure.css b/src/main/webapp/resources/css/structure.css
index c184c46cee9..65489431d65 100644
--- a/src/main/webapp/resources/css/structure.css
+++ b/src/main/webapp/resources/css/structure.css
@@ -883,6 +883,7 @@ div.panel-body.read-terms{max-height:220px; overflow-y:scroll; width:100%; backg
 #dragdropMsg {padding:20px;font-size:1.3em;color:#808080;text-align:center;}
 .dropin-btn-status.ui-icon {background: url("https://www.dropbox.com/static/images/widgets/dbx-saver-status.png") no-repeat;}
 .globus-btn.ui-icon {background: url("https://docs.globus.org/images/home/transfer.png") no-repeat;background-size:contain;display:inline-block;}
+.webloader-btn.ui-icon {background: url("resources/images/folders.png") no-repeat;background-size:contain;display:inline-block;}
 
 
 /* VERSIONS */

From 85fc67d0ee870b938108347109a3719bb550d000 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Tue, 25 Oct 2022 14:38:12 -0400
Subject: [PATCH 084/173] bug fixes, refactor, styling

---
 .../edu/harvard/iq/dataverse/DatasetPage.java |  22 ++++-------
 .../iq/dataverse/EditDatafilesPage.java       |  22 +++++++++++
 .../AuthenticationServiceBean.java            |  10 +++++
 .../iq/dataverse/util/WebloaderUtil.java      |  36 ++++++++++++++++++
 src/main/webapp/dataset.xhtml                 |   1 +
 src/main/webapp/editFilesFragment.xhtml       |  15 ++++----
 src/main/webapp/editdatafiles.xhtml           |   1 +
 src/main/webapp/resources/css/structure.css   |   2 +-
 src/main/webapp/resources/images/folders.png  | Bin 0 -> 787 bytes
 9 files changed, 87 insertions(+), 22 deletions(-)
 create mode 100644 src/main/java/edu/harvard/iq/dataverse/util/WebloaderUtil.java
 create mode 100644 src/main/webapp/resources/images/folders.png

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
index 750636fec45..05069d34c67 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
@@ -57,6 +57,7 @@
 import edu.harvard.iq.dataverse.util.StringUtil;
 import edu.harvard.iq.dataverse.util.SystemConfig;
 import edu.harvard.iq.dataverse.util.URLTokenUtil;
+import edu.harvard.iq.dataverse.util.WebloaderUtil;
 import edu.harvard.iq.dataverse.validation.URLValidator;
 import edu.harvard.iq.dataverse.workflows.WorkflowComment;
 
@@ -6068,23 +6069,16 @@ public void startGlobusTransfer() {
 
     public String getWebloaderUrlForDataset(Dataset d) {
         String localeCode = session.getLocaleCode();
-        ApiToken apiToken = null;
         User user = session.getUser();
-
         if (user instanceof AuthenticatedUser) {
-            apiToken = authService.findApiTokenByUser((AuthenticatedUser) user);
-
-            if ((apiToken == null) || (apiToken.getExpireTime().before(new Date()))) {
-                logger.fine("Created apiToken for user: " + user.getIdentifier());
-                apiToken = authService.generateApiTokenForUser((AuthenticatedUser) user);
-            }
+            ApiToken apiToken = authService.getValidApiTokenForUser((AuthenticatedUser) user);
+            return WebloaderUtil.getWebloaderUrl(d, apiToken, localeCode,
+                    settingsService.getValueForKey(SettingsServiceBean.Key.WebloaderUrl));
+        } else {
+            // Shouldn't normally happen (seesion timeout? bug?)
+            logger.warning("getWebloaderUrlForDataset called for non-Authenticated user");
+            return null;
         }
-        // Use URLTokenUtil for params currently in common with external tools.
-        URLTokenUtil tokenUtil = new URLTokenUtil(d, apiToken, localeCode);
-        String appUrl;
-        appUrl = settingsService.getValueForKey(SettingsServiceBean.Key.WebloaderUrl)
-                + "?datasetPid={datasetPid}&siteUrl={siteUrl}&key={apiToken}&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}";
-        return tokenUtil.replaceTokensWithValues(appUrl);
     }
 
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java
index 6cf294ffd6d..9845fa16526 100644
--- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java
@@ -5,7 +5,9 @@
 import edu.harvard.iq.dataverse.api.AbstractApiBean;
 import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean;
 import edu.harvard.iq.dataverse.authorization.Permission;
+import edu.harvard.iq.dataverse.authorization.users.ApiToken;
 import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
+import edu.harvard.iq.dataverse.authorization.users.User;
 import edu.harvard.iq.dataverse.branding.BrandingUtil;
 import edu.harvard.iq.dataverse.datasetutility.AddReplaceFileHelper;
 import edu.harvard.iq.dataverse.datasetutility.FileSizeChecker;
@@ -36,6 +38,8 @@
 import edu.harvard.iq.dataverse.util.FileUtil;
 import edu.harvard.iq.dataverse.util.JsfHelper;
 import edu.harvard.iq.dataverse.util.SystemConfig;
+import edu.harvard.iq.dataverse.util.URLTokenUtil;
+import edu.harvard.iq.dataverse.util.WebloaderUtil;
 import edu.harvard.iq.dataverse.util.BundleUtil;
 import edu.harvard.iq.dataverse.util.EjbUtil;
 import edu.harvard.iq.dataverse.util.FileMetadataUtil;
@@ -3067,6 +3071,10 @@ public boolean globusUploadSupported() {
         return settingsWrapper.isGlobusUpload()
                 && settingsWrapper.isGlobusEnabledStorageDriver(dataset.getEffectiveStorageDriverId());
     }
+    
+    public boolean webloaderUploadSupported() {
+        return settingsWrapper.isWebloaderUpload() && StorageIO.isDirectUploadEnabled(dataset.getEffectiveStorageDriverId());
+    }
 
     private void populateFileMetadatas() {
         fileMetadatas = new ArrayList<>();
@@ -3106,4 +3114,18 @@ public void setFileAccessRequest(boolean fileAccessRequest) {
     public boolean isHasPublicStore() {
         return settingsWrapper.isTrueForKey(SettingsServiceBean.Key.PublicInstall, StorageIO.isPublicStore(dataset.getEffectiveStorageDriverId()));
     }
+    
+    public String getWebloaderUrlForDataset(Dataset d) {
+        String localeCode = session.getLocaleCode();
+        User user = session.getUser();
+        if (user instanceof AuthenticatedUser) {
+            ApiToken apiToken = authService.getValidApiTokenForUser((AuthenticatedUser) user);
+            return WebloaderUtil.getWebloaderUrl(d, apiToken, localeCode,
+                    settingsService.getValueForKey(SettingsServiceBean.Key.WebloaderUrl));
+        } else {
+            // Shouldn't normally happen (seesion timeout? bug?)
+            logger.warning("getWebloaderUrlForDataset called for non-Authenticated user");
+            return null;
+        }
+    }
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java
index b242cd2936f..f7b88147c05 100644
--- a/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java
@@ -938,4 +938,14 @@ public List <WorkflowComment> getWorkflowCommentsByAuthenticatedUser(Authenticat
         return query.getResultList();
     }
 
+    public ApiToken getValidApiTokenForUser(AuthenticatedUser user) {
+        ApiToken apiToken = null;
+        apiToken = findApiTokenByUser(user);
+        if ((apiToken == null) || (apiToken.getExpireTime().before(new Date()))) {
+            logger.fine("Created apiToken for user: " + user.getIdentifier());
+            apiToken = generateApiTokenForUser(user);
+        }
+        return apiToken;
+    }
+
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/util/WebloaderUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/WebloaderUtil.java
new file mode 100644
index 00000000000..266d55eceb3
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/util/WebloaderUtil.java
@@ -0,0 +1,36 @@
+package edu.harvard.iq.dataverse.util;
+
+import java.util.Date;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.Locale;
+import java.util.Map.Entry;
+import java.util.logging.Logger;
+
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpSession;
+
+import edu.harvard.iq.dataverse.Dataset;
+import edu.harvard.iq.dataverse.DatasetPage;
+import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean;
+import edu.harvard.iq.dataverse.authorization.users.ApiToken;
+import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
+import edu.harvard.iq.dataverse.authorization.users.User;
+import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
+
+public class WebloaderUtil {
+
+    private static final Logger logger = Logger.getLogger(WebloaderUtil.class.getCanonicalName());
+
+    /**
+     * Create the URL required to launch https://github.com/gdcc/dvweloader
+     */
+    public static String getWebloaderUrl(Dataset d, ApiToken apiToken, String localeCode, String baseUrl) {
+        // Use URLTokenUtil for params currently in common with external tools.
+        URLTokenUtil tokenUtil = new URLTokenUtil(d, apiToken, localeCode);
+        String appUrl;
+        appUrl = baseUrl
+                + "?datasetPid={datasetPid}&siteUrl={siteUrl}&key={apiToken}&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}";
+        return tokenUtil.replaceTokensWithValues(appUrl);
+    }
+}
diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml
index 1bb862721a5..35753374dbb 100644
--- a/src/main/webapp/dataset.xhtml
+++ b/src/main/webapp/dataset.xhtml
@@ -846,6 +846,7 @@
                                 <ui:param name="editDatafilesPage" value="false"/>
                                 <ui:param name="rsyncSupported" value="#{DatasetPage.rsyncUploadSupported()}"/>
                                 <ui:param name="globusUploadSupported" value="#{DatasetPage.globusUploadSupported()}"/>
+                                <ui:param name="webloaderUploadSupported" value="#{DatasetPage.webloaderUploadSupported()}"/>
                                 <ui:param name="dataverse" value="#{DatasetPage.dataset.owner}"/>
                                 <ui:param name="dataset" value="#{DatasetPage.dataset}"/>
                                 <ui:param name="version" value="#{DatasetPage.workingVersion}"/>
diff --git a/src/main/webapp/editFilesFragment.xhtml b/src/main/webapp/editFilesFragment.xhtml
index 40937008ab8..a8d2bde1059 100644
--- a/src/main/webapp/editFilesFragment.xhtml
+++ b/src/main/webapp/editFilesFragment.xhtml
@@ -158,12 +158,13 @@
                                   widgetVar="fileUploadWidget">
                         <f:passThroughAttribute name="aria-label" value="#{bundle['file.uploadFiles']}"/>
                     </p:fileUpload>
-                                <div jsf:id="webloaderBlock" jsf:rendered="#{webloaderUploadSupported and !lockedFromEdits }"  class="margin-top">
-                                    <p class="help-block" jsf:rendered="#{!createDataset}">#{bundle['file.webloader.tip']}</p>
-                                    <p class="help-block" jsf:rendered="#{createDataset}">#{bundle['file.webloaderAfterCreate.tip']}</p>
-                                    <p:button target="_blank" rendered="#{!createDataset}" onclick="cancelDatasetEdit();window.open('#{DatasetPage.getWebloaderUrlForDataset(dataset)}');return false;" value="#{bundle['file.fromWebloader']}"  icon="webloader-btn" />
+                              <ui:fragment rendered="#{!lockedFromEdits}">
+                                <div jsf:id="webloaderBlock" jsf:rendered="#{webloaderUploadSupported}"  class="ui-fileupload-buttonbar ui-widget-header ui-corner-top">
+                                    <p class="help-block" jsf:rendered="#{!createDataset}">#{bundle['file.fromWebloader.tip']}</p>
+                                    <p class="help-block" jsf:rendered="#{createDataset}">#{bundle['file.fromWebloaderAfterCreate.tip']}</p>
+                                    <p:button target="_blank" rendered="#{!createDataset}" onclick="cancelDatasetEdit();window.open('#{(datasetPage==true) ? DatasetPage.getWebloaderUrlForDataset(dataset):EditDatafilesPage.getWebloaderUrlForDataset(dataset)}');return false;" value="#{bundle['file.fromWebloader']}"  icon="webloader-btn" />
                                 </div>
-                                <div jsf:id="dropboxBlock" jsf:rendered="#{settingsWrapper.isHasDropBoxKey() and !lockedFromEdits }"  class="margin-top">
+                                <div jsf:id="dropboxBlock" jsf:rendered="#{settingsWrapper.isHasDropBoxKey()}"  class="margin-top">
                                     <!-- Dropbox upload widget -->
                                     <p class="help-block">#{bundle['file.fromDropbox.tip']}</p>
                                     <h:inputText id="dropBoxSelectionInput" style="display:none" value="#{EditDatafilesPage.dropBoxSelection}"/>
@@ -171,12 +172,12 @@
                                     <p:commandButton id="dropBoxUserButton" disabled="#{!(datasetPage || EditDatafilesPage.showFileUploadComponent())}" value="#{bundle['file.fromDropbox']}" onclick="openDropboxChooser();" icon="dropin-btn-status" />
                                     <p:message for="dropBoxButton" id="dropBoxUploadMessage" display="text" redisplay="false" />
                                 </div>
-                                <div jsf:id="globusBlock" jsf:rendered="#{globusUploadSupported and !lockedFromEdits }"  class="margin-top">
+                                <div jsf:id="globusBlock" jsf:rendered="#{globusUploadSupported}"  class="margin-top">
                                     <p class="help-block" jsf:rendered="#{!createDataset}">#{bundle['file.fromGlobus.tip']}</p>
                                     <p class="help-block" jsf:rendered="#{createDataset}">#{bundle['file.fromGlobusAfterCreate.tip']}</p>
                                     <p:button target="_blank" rendered="#{!createDataset}" onclick="cancelDatasetEdit();window.open('#{GlobusServiceBean.getGlobusAppUrlForDataset(dataset)}');return false;" value="#{bundle['file.fromGlobus']}"  icon="globus-btn" />
                                 </div>
-                    
+                              </ui:fragment>
                             </div>
                         </div>
                     
diff --git a/src/main/webapp/editdatafiles.xhtml b/src/main/webapp/editdatafiles.xhtml
index 6c4f07f51da..02acb224827 100644
--- a/src/main/webapp/editdatafiles.xhtml
+++ b/src/main/webapp/editdatafiles.xhtml
@@ -63,6 +63,7 @@
                                 <ui:param name="showFileButtonUpdate" value="true"/>
                                 <ui:param name="rsyncSupported" value="#{EditDatafilesPage.rsyncUploadSupported()}"/>
                                 <ui:param name="globusUploadSupported" value="#{EditDatafilesPage.globusUploadSupported()}"/>
+                                <ui:param name="webloaderUploadSupported" value="#{EditDatafilesPage.webloaderUploadSupported()}"/>
                                 <ui:param name="lockedFromEdits" value="#{EditDatafilesPage.lockedFromEdits}"/>
                             </ui:include>
                         </div>
diff --git a/src/main/webapp/resources/css/structure.css b/src/main/webapp/resources/css/structure.css
index 65489431d65..6ef6dfb29e5 100644
--- a/src/main/webapp/resources/css/structure.css
+++ b/src/main/webapp/resources/css/structure.css
@@ -883,7 +883,7 @@ div.panel-body.read-terms{max-height:220px; overflow-y:scroll; width:100%; backg
 #dragdropMsg {padding:20px;font-size:1.3em;color:#808080;text-align:center;}
 .dropin-btn-status.ui-icon {background: url("https://www.dropbox.com/static/images/widgets/dbx-saver-status.png") no-repeat;}
 .globus-btn.ui-icon {background: url("https://docs.globus.org/images/home/transfer.png") no-repeat;background-size:contain;display:inline-block;}
-.webloader-btn.ui-icon {background: url("resources/images/folders.png") no-repeat;background-size:contain;display:inline-block;}
+.webloader-btn.ui-icon {background: url("/resources/images/folders.png") no-repeat;background-size:contain;display:inline-block;}
 
 
 /* VERSIONS */
diff --git a/src/main/webapp/resources/images/folders.png b/src/main/webapp/resources/images/folders.png
new file mode 100644
index 0000000000000000000000000000000000000000..a3dc36372803a113a1d6e562731cb96ef42d416a
GIT binary patch
literal 787
zcmV+u1MK{XP)<h;3K|Lk000e1NJLTq002M$002M;0ssI2B@5<>00001b5ch_0Itp)
z=>Px#1ZP1_K>z@;j|==^1poj532;bRa{vGi!vFvd!vV){sAK>D0;owuK~!i%?U=uc
z>Od66<3+6PZ4^NyrT74XrSBjJ_JS`Uf{ng_jaA;kPPxI#%ErP!pavETJ4;za@3(qb
z!ZjIZVwg#czrvlfA!NRs`OeHqY_r*DVdfA04J{u5s(EVxyWQ^F20}pB0t2fGVY;18
zhXPHrEUVdURw@;WgMq7ncDr4x)k>uj#hPZCCVuDhxvuN?|JX4Mqt$9rAQ4vqSTp&2
zo&pZm>vcRHr$8dk0`U4LlL-af3<iTtCPRTNTr&`h#YAT_9*^Y<c+)Zp16;4y02a+h
zoCPEj2^w(&wW(B!0*UyugfM$#%!BOLc~cL7^Z6{lq<Fxe3_vObQy>>t0hh~#MjXHc
zO15DTdJnKpqu32-#DS6$1Xuz>f$$Q-IgducAoL#i2=K&Di){l{7|=CA6bOKerqe0f
zy<fg=42Q$Jl1J!HCxojI9{*zRi#<D+%Td6=a=B#G=+JvWb_P6j+m>aGMkBYArECGP
zEEYWle82<S?bc0z>?O3{?`gybW{*~haj=i%KCoJ?Xe18aCA%z&;>81oVN|PCiaD@*
z1|C37<p1b&UqTc(;YC2B(MTqf6njGGtJ!QeV>~XB9(eKf3xOrXew=~~2z~E(YI+4{
zA^2EmH(;w(Ai!TDY2rZf5#WiRKJo#&CWsmn<22Rl^(eT}@AtiZ8dHz~^uC9~;jfK!
zI^FGdDG^T4olYpo!0~u26bcmFK+i=ZCr+mmBRR4KU|X%%>v-i9b6}e0VzF@35c(>l
zAOqQKmI7`tNw))q0s-Z6nF2m=3!|rzy@Z~gF8_l)dl1Q;7<G_F;-IMGjS3}!jTaBl
zh=XFWsO$Qp|9tF;g$4M6k6$H%?t!PKkKRW1I`FZ|o;YYkavum3As3)&{{V1+#ZfnM
Ro9h4o002ovPDHLkV1gy}R)7Ei

literal 0
HcmV?d00001


From f4621ababa36ed2f07109ff2490ce1a4e734a42a Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Tue, 25 Oct 2022 15:04:52 -0400
Subject: [PATCH 085/173] simplify - just use plus icon

---
 src/main/webapp/editFilesFragment.xhtml      |   2 +-
 src/main/webapp/resources/css/structure.css  |   2 --
 src/main/webapp/resources/images/folders.png | Bin 787 -> 0 bytes
 3 files changed, 1 insertion(+), 3 deletions(-)
 delete mode 100644 src/main/webapp/resources/images/folders.png

diff --git a/src/main/webapp/editFilesFragment.xhtml b/src/main/webapp/editFilesFragment.xhtml
index a8d2bde1059..09ee7f50024 100644
--- a/src/main/webapp/editFilesFragment.xhtml
+++ b/src/main/webapp/editFilesFragment.xhtml
@@ -162,7 +162,7 @@
                                 <div jsf:id="webloaderBlock" jsf:rendered="#{webloaderUploadSupported}"  class="ui-fileupload-buttonbar ui-widget-header ui-corner-top">
                                     <p class="help-block" jsf:rendered="#{!createDataset}">#{bundle['file.fromWebloader.tip']}</p>
                                     <p class="help-block" jsf:rendered="#{createDataset}">#{bundle['file.fromWebloaderAfterCreate.tip']}</p>
-                                    <p:button target="_blank" rendered="#{!createDataset}" onclick="cancelDatasetEdit();window.open('#{(datasetPage==true) ? DatasetPage.getWebloaderUrlForDataset(dataset):EditDatafilesPage.getWebloaderUrlForDataset(dataset)}');return false;" value="#{bundle['file.fromWebloader']}"  icon="webloader-btn" />
+                                    <p:button target="_blank" rendered="#{!createDataset}" onclick="cancelDatasetEdit();window.open('#{(datasetPage==true) ? DatasetPage.getWebloaderUrlForDataset(dataset):EditDatafilesPage.getWebloaderUrlForDataset(dataset)}');return false;" value="#{bundle['file.fromWebloader']}"  icon="ui-icon-plusthick" />
                                 </div>
                                 <div jsf:id="dropboxBlock" jsf:rendered="#{settingsWrapper.isHasDropBoxKey()}"  class="margin-top">
                                     <!-- Dropbox upload widget -->
diff --git a/src/main/webapp/resources/css/structure.css b/src/main/webapp/resources/css/structure.css
index 6ef6dfb29e5..0dff334833e 100644
--- a/src/main/webapp/resources/css/structure.css
+++ b/src/main/webapp/resources/css/structure.css
@@ -883,8 +883,6 @@ div.panel-body.read-terms{max-height:220px; overflow-y:scroll; width:100%; backg
 #dragdropMsg {padding:20px;font-size:1.3em;color:#808080;text-align:center;}
 .dropin-btn-status.ui-icon {background: url("https://www.dropbox.com/static/images/widgets/dbx-saver-status.png") no-repeat;}
 .globus-btn.ui-icon {background: url("https://docs.globus.org/images/home/transfer.png") no-repeat;background-size:contain;display:inline-block;}
-.webloader-btn.ui-icon {background: url("/resources/images/folders.png") no-repeat;background-size:contain;display:inline-block;}
-
 
 /* VERSIONS */
 div[id$="versionsTable"] th.col-select-width * {display:none;}
diff --git a/src/main/webapp/resources/images/folders.png b/src/main/webapp/resources/images/folders.png
deleted file mode 100644
index a3dc36372803a113a1d6e562731cb96ef42d416a..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 787
zcmV+u1MK{XP)<h;3K|Lk000e1NJLTq002M$002M;0ssI2B@5<>00001b5ch_0Itp)
z=>Px#1ZP1_K>z@;j|==^1poj532;bRa{vGi!vFvd!vV){sAK>D0;owuK~!i%?U=uc
z>Od66<3+6PZ4^NyrT74XrSBjJ_JS`Uf{ng_jaA;kPPxI#%ErP!pavETJ4;za@3(qb
z!ZjIZVwg#czrvlfA!NRs`OeHqY_r*DVdfA04J{u5s(EVxyWQ^F20}pB0t2fGVY;18
zhXPHrEUVdURw@;WgMq7ncDr4x)k>uj#hPZCCVuDhxvuN?|JX4Mqt$9rAQ4vqSTp&2
zo&pZm>vcRHr$8dk0`U4LlL-af3<iTtCPRTNTr&`h#YAT_9*^Y<c+)Zp16;4y02a+h
zoCPEj2^w(&wW(B!0*UyugfM$#%!BOLc~cL7^Z6{lq<Fxe3_vObQy>>t0hh~#MjXHc
zO15DTdJnKpqu32-#DS6$1Xuz>f$$Q-IgducAoL#i2=K&Di){l{7|=CA6bOKerqe0f
zy<fg=42Q$Jl1J!HCxojI9{*zRi#<D+%Td6=a=B#G=+JvWb_P6j+m>aGMkBYArECGP
zEEYWle82<S?bc0z>?O3{?`gybW{*~haj=i%KCoJ?Xe18aCA%z&;>81oVN|PCiaD@*
z1|C37<p1b&UqTc(;YC2B(MTqf6njGGtJ!QeV>~XB9(eKf3xOrXew=~~2z~E(YI+4{
zA^2EmH(;w(Ai!TDY2rZf5#WiRKJo#&CWsmn<22Rl^(eT}@AtiZ8dHz~^uC9~;jfK!
zI^FGdDG^T4olYpo!0~u26bcmFK+i=ZCr+mmBRR4KU|X%%>v-i9b6}e0VzF@35c(>l
zAOqQKmI7`tNw))q0s-Z6nF2m=3!|rzy@Z~gF8_l)dl1Q;7<G_F;-IMGjS3}!jTaBl
zh=XFWsO$Qp|9tF;g$4M6k6$H%?t!PKkKRW1I`FZ|o;YYkavum3As3)&{{V1+#ZfnM
Ro9h4o002ovPDHLkV1gy}R)7Ei


From ee1e0c8019d1cec82750f4e9454fd6ca264a520d Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Thu, 3 Nov 2022 17:59:06 +0100
Subject: [PATCH 086/173] build(ct-base): switch to Payara 5.2022.4

The upgrade to 5.2022.3 made Dataverse deployments
fail because the postboot script deployment method
was broken. This has been fixed with 5.2022.4, which
is why we use this version now.
---
 modules/dataverse-parent/pom.xml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml
index 4ffc5941278..fe50601d583 100644
--- a/modules/dataverse-parent/pom.xml
+++ b/modules/dataverse-parent/pom.xml
@@ -337,7 +337,11 @@
         <profile>
             <id>ct</id>
             <properties>
-                <!--<payara.version>5.2022.3</payara.version>-->
+                <!--
+                    Payara 5.2022.3 has problems with postboot deployment scripts.
+                    Fixed in this release, see https://github.com/payara/Payara/pull/5991
+                -->
+                <payara.version>5.2022.4</payara.version>
             </properties>
     
             <build>

From 7d9327edbf194049c1233b12fed6c0ade8dc518d Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Thu, 3 Nov 2022 17:39:55 -0400
Subject: [PATCH 087/173] Refactored permissions checks and fixed workflow
 token access

---
 .../edu/harvard/iq/dataverse/api/Access.java  | 334 +++++-------------
 1 file changed, 96 insertions(+), 238 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java
index abeedf23b59..321b3ebfab6 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java
@@ -187,9 +187,6 @@ public class Access extends AbstractApiBean {
     @Inject
     MakeDataCountLoggingServiceBean mdcLogService;
     
-    
-    private static final String API_KEY_HEADER = "X-Dataverse-key";    
-    
     //@EJB
     
     // TODO: 
@@ -197,23 +194,19 @@ public class Access extends AbstractApiBean {
     @Path("datafile/bundle/{fileId}")
     @GET
     @Produces({"application/zip"})
-    public BundleDownloadInstance datafileBundle(@PathParam("fileId") String fileId, @QueryParam("fileMetadataId") Long fileMetadataId,@QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiToken, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) /*throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ {
+    public BundleDownloadInstance datafileBundle(@PathParam("fileId") String fileId, @QueryParam("fileMetadataId") Long fileMetadataId,@QueryParam("gbrecs") boolean gbrecs, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) /*throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ {
  
 
         GuestbookResponse gbr = null;
         
         DataFile df = findDataFileOrDieWrapper(fileId);
         
-        if (apiToken == null || apiToken.equals("")) {
-            apiToken = headers.getHeaderString(API_KEY_HEADER);
-        }
-        
         // This will throw a ForbiddenException if access isn't authorized: 
-        checkAuthorization(df, apiToken);
+        checkAuthorization(df);
         
         if (gbrecs != true && df.isReleased()){
             // Write Guestbook record if not done previously and file is released
-            User apiTokenUser = findAPITokenUser(apiToken);
+            User apiTokenUser = findAPITokenUser();
             gbr = guestbookResponseService.initAPIGuestbookResponse(df.getOwner(), df, session, apiTokenUser);
             guestbookResponseService.save(gbr);
             MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, df);                                        
@@ -278,7 +271,7 @@ private DataFile findDataFileOrDieWrapper(String fileId){
     @Path("datafile/{fileId:.+}")
     @GET
     @Produces({"application/xml"})
-    public Response datafile(@PathParam("fileId") String fileId, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiToken, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) /*throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ {
+    public Response datafile(@PathParam("fileId") String fileId, @QueryParam("gbrecs") boolean gbrecs, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) /*throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ {
         
         // check first if there's a trailing slash, and chop it: 
         while (fileId.lastIndexOf('/') == fileId.length() - 1) {
@@ -303,20 +296,16 @@ public Response datafile(@PathParam("fileId") String fileId, @QueryParam("gbrecs
             throw new NotFoundException(errorMessage);
             // (nobody should ever be using this API on a harvested DataFile)!
         }
-        
-        if (apiToken == null || apiToken.equals("")) {
-            apiToken = headers.getHeaderString(API_KEY_HEADER);
-        }
-         
+               
+        // This will throw a ForbiddenException if access isn't authorized: 
+        checkAuthorization(df);
+
         if (gbrecs != true && df.isReleased()){
             // Write Guestbook record if not done previously and file is released
-            User apiTokenUser = findAPITokenUser(apiToken);
+            User apiTokenUser = findAPITokenUser();
             gbr = guestbookResponseService.initAPIGuestbookResponse(df.getOwner(), df, session, apiTokenUser);
         }
-               
-        // This will throw a ForbiddenException if access isn't authorized: 
-        checkAuthorization(df, apiToken);
-        
+
         DownloadInfo dInfo = new DownloadInfo(df);
 
         logger.fine("checking if thumbnails are supported on this file.");
@@ -532,11 +521,10 @@ public String tabularDatafileMetadataDDI(@PathParam("fileId") String fileId,  @Q
     @Path("datafile/{fileId}/auxiliary")
     @GET
     public Response listDatafileMetadataAux(@PathParam("fileId") String fileId,
-            @QueryParam("key") String apiToken,
             @Context UriInfo uriInfo,
             @Context HttpHeaders headers,
             @Context HttpServletResponse response) throws ServiceUnavailableException {
-        return listAuxiliaryFiles(fileId, null, apiToken, uriInfo, headers, response);
+        return listAuxiliaryFiles(fileId, null, uriInfo, headers, response);
     }
     /*
      * GET method for retrieving a list auxiliary files associated with
@@ -547,26 +535,21 @@ public Response listDatafileMetadataAux(@PathParam("fileId") String fileId,
     @GET
     public Response listDatafileMetadataAuxByOrigin(@PathParam("fileId") String fileId,
             @PathParam("origin") String origin,
-            @QueryParam("key") String apiToken,
             @Context UriInfo uriInfo,
             @Context HttpHeaders headers,
             @Context HttpServletResponse response) throws ServiceUnavailableException {
-        return listAuxiliaryFiles(fileId, origin, apiToken, uriInfo, headers, response);
+        return listAuxiliaryFiles(fileId, origin, uriInfo, headers, response);
     } 
     
-    private Response listAuxiliaryFiles(String fileId, String origin, String apiToken, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response) {
+    private Response listAuxiliaryFiles(String fileId, String origin, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response) {
           DataFile df = findDataFileOrDieWrapper(fileId);
 
-        if (apiToken == null || apiToken.equals("")) {
-            apiToken = headers.getHeaderString(API_KEY_HEADER);
-        }
-
         List<AuxiliaryFile> auxFileList = auxiliaryFileService.findAuxiliaryFiles(df, origin);
 
         if (auxFileList == null || auxFileList.isEmpty()) {
             throw new NotFoundException("No Auxiliary files exist for datafile " + fileId + (origin==null ? "": " and the specified origin"));
         }
-        boolean isAccessAllowed = isAccessAuthorized(df, apiToken);
+        boolean isAccessAllowed = isAccessAuthorized(df);
         JsonArrayBuilder jab = Json.createArrayBuilder();
         auxFileList.forEach(auxFile -> {
             if (isAccessAllowed || auxFile.getIsPublic()) {
@@ -594,17 +577,12 @@ private Response listAuxiliaryFiles(String fileId, String origin, String apiToke
     public DownloadInstance downloadAuxiliaryFile(@PathParam("fileId") String fileId,
             @PathParam("formatTag") String formatTag,
             @PathParam("formatVersion") String formatVersion,
-            @QueryParam("key") String apiToken, 
             @Context UriInfo uriInfo, 
             @Context HttpHeaders headers, 
             @Context HttpServletResponse response) throws ServiceUnavailableException {
     
         DataFile df = findDataFileOrDieWrapper(fileId);
         
-        if (apiToken == null || apiToken.equals("")) {
-            apiToken = headers.getHeaderString(API_KEY_HEADER);
-        }
-        
         DownloadInfo dInfo = new DownloadInfo(df);
         boolean publiclyAvailable = false; 
 
@@ -654,7 +632,7 @@ public DownloadInstance downloadAuxiliaryFile(@PathParam("fileId") String fileId
         // as defined for the DataFile itself), and will throw a ForbiddenException 
         // if access is denied:
         if (!publiclyAvailable) {
-            checkAuthorization(df, apiToken);
+            checkAuthorization(df);
         }
         
         return downloadInstance;
@@ -670,16 +648,16 @@ public DownloadInstance downloadAuxiliaryFile(@PathParam("fileId") String fileId
     @POST
     @Consumes("text/plain")
     @Produces({ "application/zip" })
-    public Response postDownloadDatafiles(String fileIds, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiTokenParam, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException {
+    public Response postDownloadDatafiles(String fileIds, @QueryParam("gbrecs") boolean gbrecs, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException {
         
 
-        return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response);
+        return downloadDatafiles(fileIds, gbrecs, uriInfo, headers, response);
     }
 
     @Path("dataset/{id}")
     @GET
     @Produces({"application/zip"})
-    public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersistentId, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiTokenParam, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException {
+    public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersistentId, @QueryParam("gbrecs") boolean gbrecs, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException {
         try {
             User user = findUserOrDie(); 
             DataverseRequest req = createDataverseRequest(user);
@@ -693,7 +671,7 @@ public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersist
                     // We don't want downloads from Draft versions to be counted, 
                     // so we are setting the gbrecs (aka "do not write guestbook response") 
                     // variable accordingly:
-                    return downloadDatafiles(fileIds, true, apiTokenParam, uriInfo, headers, response);
+                    return downloadDatafiles(fileIds, true, uriInfo, headers, response);
                 }
             }
             
@@ -714,7 +692,7 @@ public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersist
             }
             
             String fileIds = getFileIdsAsCommaSeparated(latest.getFileMetadatas());
-            return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response);
+            return downloadDatafiles(fileIds, gbrecs, uriInfo, headers, response);
         } catch (WrappedResponse wr) {
             return wr.getResponse();
         }
@@ -763,7 +741,7 @@ public Command<DatasetVersion> handleLatestPublished() {
             if (dsv.isDraft()) {
                 gbrecs = true;
             }
-            return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response);
+            return downloadDatafiles(fileIds, gbrecs, uriInfo, headers, response);
         } catch (WrappedResponse wr) {
             return wr.getResponse();
         }
@@ -784,11 +762,11 @@ private static String getFileIdsAsCommaSeparated(List<FileMetadata> fileMetadata
     @Path("datafiles/{fileIds}")
     @GET
     @Produces({"application/zip"})
-    public Response datafiles(@PathParam("fileIds") String fileIds, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiTokenParam, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException {
-        return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response);
+    public Response datafiles(@PathParam("fileIds") String fileIds, @QueryParam("gbrecs") boolean gbrecs, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException {
+        return downloadDatafiles(fileIds, gbrecs, uriInfo, headers, response);
     }
 
-    private Response downloadDatafiles(String rawFileIds, boolean donotwriteGBResponse, String apiTokenParam, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response) throws WebApplicationException /* throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ {
+    private Response downloadDatafiles(String rawFileIds, boolean donotwriteGBResponse, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response) throws WebApplicationException /* throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ {
         final long zipDownloadSizeLimit = systemConfig.getZipDownloadLimit();
                 
         logger.fine("setting zip download size limit to " + zipDownloadSizeLimit + " bytes.");
@@ -810,11 +788,7 @@ private Response downloadDatafiles(String rawFileIds, boolean donotwriteGBRespon
         String customZipServiceUrl = settingsService.getValueForKey(SettingsServiceBean.Key.CustomZipDownloadServiceUrl);
         boolean useCustomZipService = customZipServiceUrl != null; 
         
-        String apiToken = (apiTokenParam == null || apiTokenParam.equals("")) 
-                ? headers.getHeaderString(API_KEY_HEADER) 
-                : apiTokenParam;
-        
-        User apiTokenUser = findAPITokenUser(apiToken); //for use in adding gb records if necessary
+        User apiTokenUser = findAPITokenUser(); //for use in adding gb records if necessary
         
         Boolean getOrig = false;
         for (String key : uriInfo.getQueryParameters().keySet()) {
@@ -827,7 +801,7 @@ private Response downloadDatafiles(String rawFileIds, boolean donotwriteGBRespon
         if (useCustomZipService) {
             URI redirect_uri = null; 
             try {
-                redirect_uri = handleCustomZipDownload(customZipServiceUrl, fileIds, apiToken, apiTokenUser, uriInfo, headers, donotwriteGBResponse, true); 
+                redirect_uri = handleCustomZipDownload(customZipServiceUrl, fileIds, apiTokenUser, uriInfo, headers, donotwriteGBResponse, true); 
             } catch (WebApplicationException wae) {
                 throw wae;
             }
@@ -859,7 +833,7 @@ public void write(OutputStream os) throws IOException,
                         logger.fine("token: " + fileIdParams[i]);
                         Long fileId = null;
                         try {
-                            fileId = new Long(fileIdParams[i]);
+                            fileId = Long.parseLong(fileIdParams[i]);
                         } catch (NumberFormatException nfe) {
                             fileId = null;
                         }
@@ -867,7 +841,7 @@ public void write(OutputStream os) throws IOException,
                             logger.fine("attempting to look up file id " + fileId);
                             DataFile file = dataFileService.find(fileId);
                             if (file != null) {
-                                if (isAccessAuthorized(file, apiToken)) { 
+                                if (isAccessAuthorized(file)) { 
                                     
                                     logger.fine("adding datafile (id=" + file.getId() + ") to the download list of the ZippedDownloadInstance.");
                                     //downloadInstance.addDataFile(file);
@@ -1436,8 +1410,8 @@ public Response requestFileAccess(@PathParam("id") String fileToRequestAccessId,
             List<String> args = Arrays.asList(wr.getLocalizedMessage());
             return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.fileAccess.failure.noUser", args));
         }
-
-        if (isAccessAuthorized(dataFile, getRequestApiKey())) {
+        //Already have access
+        if (isAccessAuthorized(dataFile)) {
             return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.requestAccess.failure.invalidRequest"));
         }
 
@@ -1708,15 +1682,15 @@ public Response rejectFileAccess(@PathParam("id") String fileToRequestAccessId,
     // checkAuthorization is a convenience method; it calls the boolean method
     // isAccessAuthorized(), the actual workhorse, tand throws a 403 exception if not.
     
-    private void checkAuthorization(DataFile df, String apiToken) throws WebApplicationException {
+    private void checkAuthorization(DataFile df) throws WebApplicationException {
 
-        if (!isAccessAuthorized(df, apiToken)) {
+        if (!isAccessAuthorized(df)) {
             throw new ForbiddenException();
         }        
     }
     
 
-    private boolean isAccessAuthorized(DataFile df, String apiToken) {
+    private boolean isAccessAuthorized(DataFile df) {
     // First, check if the file belongs to a released Dataset version: 
         
         boolean published = false; 
@@ -1787,37 +1761,41 @@ private boolean isAccessAuthorized(DataFile df, String apiToken) {
             }
         }
         
-        if (!restricted && !embargoed) {
-            // And if they are not published, they can still be downloaded, if the user
+        
+
+        //The one case where we don't need to check permissions
+        if (!restricted && !embargoed && published) {
+            // If they are not published, they can still be downloaded, if the user
             // has the permission to view unpublished versions! (this case will 
             // be handled below)
-            if (published) { 
-                return true;
-            }
+            return true;
         }
         
+        //For permissions check decide if we havce a session user, or an API user
         User user = null;
         
         /** 
          * Authentication/authorization:
-         * 
-         * note that the fragment below - that retrieves the session object
-         * and tries to find the user associated with the session - is really
-         * for logging/debugging purposes only; for practical purposes, it 
-         * would be enough to just call "permissionService.on(df).has(Permission.DownloadFile)"
-         * and the method does just that, tries to authorize for the user in 
-         * the current session (or guest user, if no session user is available):
          */
         
-        if (session != null) {
+        User apiTokenUser = null;
+        //If we get a non-GuestUser from findUserOrDie, use it. Otherwise, check the session
+        try {
+            logger.fine("calling apiTokenUser = findUserOrDie()...");
+            apiTokenUser = findUserOrDie();
+        } catch (WrappedResponse wr) {
+            logger.log(Level.FINE, "Message from findUserOrDie(): {0}", wr.getMessage());
+        }
+        
+        if ((apiTokenUser instanceof GuestUser) && session != null) {
             if (session.getUser() != null) {
-                if (session.getUser().isAuthenticated()) {
-                    user = session.getUser();
-                } else {
+                user = session.getUser();
+                apiTokenUser=null;
+                //Fine logging
+                if (!session.getUser().isAuthenticated()) {
                     logger.fine("User associated with the session is not an authenticated user.");
                     if (session.getUser() instanceof PrivateUrlUser) {
                         logger.fine("User associated with the session is a PrivateUrlUser user.");
-                        user = session.getUser();
                     }
                     if (session.getUser() instanceof GuestUser) {
                         logger.fine("User associated with the session is indeed a guest user.");
@@ -1829,154 +1807,41 @@ private boolean isAccessAuthorized(DataFile df, String apiToken) {
         } else {
             logger.fine("Session is null.");
         } 
-        
-        User apiTokenUser = null;
-        
-        if ((apiToken != null)&&(apiToken.length()!=64)) {
-            // We'll also try to obtain the user information from the API token, 
-            // if supplied: 
-        
-            try {
-                logger.fine("calling apiTokenUser = findUserOrDie()...");
-                apiTokenUser = findUserOrDie();
-            } catch (WrappedResponse wr) {
-                logger.log(Level.FINE, "Message from findUserOrDie(): {0}", wr.getMessage());
-            }
-            
-            if (apiTokenUser == null) {
-                logger.warning("API token-based auth: Unable to find a user with the API token provided.");
-            }
+        //If we don't have a user, nothing more to do. (Note session could have returned GuestUser)
+        if (user == null && apiTokenUser == null) {
+            logger.warning("Unable to find a user via session or with a token.");
+            return false;
         }
-        
-        // OK, let's revisit the case of non-restricted files, this time in        
-        // an unpublished version:         
+
+        // OK, let's revisit the case of non-restricted files, this time in
+        // an unpublished version:
         // (if (published) was already addressed above)
-        
-        if (!restricted && !embargoed) {
+
+        DataverseRequest dvr = null;
+        if (apiTokenUser != null) {
+            dvr = createDataverseRequest(apiTokenUser);
+        } else {
+            // used in JSF context, user may be Guest
+            dvr = dvRequestService.getDataverseRequest();
+        }
+        if (!published) { // and restricted or embargoed (implied by earlier processing)
             // If the file is not published, they can still download the file, if the user
             // has the permission to view unpublished versions:
-            
-            if ( user != null ) {
-                // used in JSF context
-                if (permissionService.requestOn(dvRequestService.getDataverseRequest(), df.getOwner()).has(Permission.ViewUnpublishedDataset)) {
-                    // it's not unthinkable, that a null user (i.e., guest user) could be given
-                    // the ViewUnpublished permission!
-                    logger.log(Level.FINE, "Session-based auth: user {0} has access rights on the non-restricted, unpublished datafile.", user.getIdentifier());
-                    return true;
-                }
-            }
-
-            if (apiTokenUser != null) {
-                // used in an API context
-                if (permissionService.requestOn( createDataverseRequest(apiTokenUser), df.getOwner()).has(Permission.ViewUnpublishedDataset)) {
-                    logger.log(Level.FINE, "Token-based auth: user {0} has access rights on the non-restricted, unpublished datafile.", apiTokenUser.getIdentifier());
-                    return true;
-                }
-            }
 
-            // last option - guest user in either contexts
-            // Guset user is impled by the code above.
-            if ( permissionService.requestOn(dvRequestService.getDataverseRequest(), df.getOwner()).has(Permission.ViewUnpublishedDataset) ) {
+            if (permissionService.requestOn(dvr, df.getOwner()).has(Permission.ViewUnpublishedDataset)) {
+                // it's not unthinkable, that a GuestUser could be given
+                // the ViewUnpublished permission!
+                logger.log(Level.FINE,
+                        "Session-based auth: user {0} has access rights on the non-restricted, unpublished datafile.",
+                        dvr.getUser().getIdentifier());
                 return true;
             }
-                    
-        } else {
-            
-            // OK, this is a restricted and/or embargoed file. 
-            
-            boolean hasAccessToRestrictedBySession = false; 
-            boolean hasAccessToRestrictedByToken = false; 
-            
-            if (permissionService.on(df).has(Permission.DownloadFile)) {
-            // Note: PermissionServiceBean.on(Datafile df) will obtain the 
-            // User from the Session object, just like in the code fragment 
-            // above. That's why it's not passed along as an argument.
-                hasAccessToRestrictedBySession = true; 
-            } else if (apiTokenUser != null && permissionService.requestOn(createDataverseRequest(apiTokenUser), df).has(Permission.DownloadFile)) {
-                hasAccessToRestrictedByToken = true; 
-            }
-            
-            if (hasAccessToRestrictedBySession || hasAccessToRestrictedByToken) {
-                if (published) {
-                    if (hasAccessToRestrictedBySession) {
-                        if (user != null) {
-                            logger.log(Level.FINE, "Session-based auth: user {0} is granted access to the restricted, published datafile.", user.getIdentifier());
-                        } else {
-                            logger.fine("Session-based auth: guest user is granted access to the restricted, published datafile.");
-                        }
-                    } else {
-                        logger.log(Level.FINE, "Token-based auth: user {0} is granted access to the restricted, published datafile.", apiTokenUser.getIdentifier());
-                    }
-                    return true;
-                } else {
-                    // if the file is NOT published, we will let them download the 
-                    // file ONLY if they also have the permission to view 
-                    // unpublished versions:
-                    // Note that the code below does not allow a case where it is the
-                    // session user that has the permission on the file, and the API token 
-                    // user with the ViewUnpublished permission, or vice versa!
-                    if (hasAccessToRestrictedBySession) {
-                        if (permissionService.on(df.getOwner()).has(Permission.ViewUnpublishedDataset)) {
-                            if (user != null) {
-                                logger.log(Level.FINE, "Session-based auth: user {0} is granted access to the restricted, unpublished datafile.", user.getIdentifier());
-                            } else {
-                                logger.fine("Session-based auth: guest user is granted access to the restricted, unpublished datafile.");
-                            }
-                            return true;
-                        } 
-                    } else {
-                        if (apiTokenUser != null && permissionService.requestOn(createDataverseRequest(apiTokenUser), df.getOwner()).has(Permission.ViewUnpublishedDataset)) {
-                            logger.log(Level.FINE, "Token-based auth: user {0} is granted access to the restricted, unpublished datafile.", apiTokenUser.getIdentifier());
-                            return true;
-                        }
-                    }
-                }
-            }
-        } 
+        } else { // published and restricted and/or embargoed
 
-        
-        if ((apiToken != null)) {
-            // Will try to obtain the user information from the API token, 
-            // if supplied: 
-        
-            try {
-                logger.fine("calling user = findUserOrDie()...");
-                user = findUserOrDie();
-            } catch (WrappedResponse wr) {
-                logger.log(Level.FINE, "Message from findUserOrDie(): {0}", wr.getMessage());
-            }
-            
-            if (user == null) {
-                logger.warning("API token-based auth: Unable to find a user with the API token provided.");
-                return false;
-            } 
-            
-            
-            //Doesn't this ~duplicate logic above - if so, if there's a way to get here, I think it still works for embargoed files (you only get access if you have download permissions, and, if not published, also view unpublished)
-            if (permissionService.requestOn(createDataverseRequest(user), df).has(Permission.DownloadFile)) {
-                if (published) {
-                    logger.log(Level.FINE, "API token-based auth: User {0} has rights to access the datafile.", user.getIdentifier());
-                    //Same case as line 1809 (and part of 1708 though when published you don't need the DownloadFile permission)
-                    return true; 
-                } else {
-                    // if the file is NOT published, we will let them download the 
-                    // file ONLY if they also have the permission to view 
-                    // unpublished versions:
-                    if (permissionService.requestOn(createDataverseRequest(user), df.getOwner()).has(Permission.ViewUnpublishedDataset)) {
-                        logger.log(Level.FINE, "API token-based auth: User {0} has rights to access the (unpublished) datafile.", user.getIdentifier());
-                        //Same case as line 1843?
-                        return true;
-                    } else {
-                        logger.log(Level.FINE, "API token-based auth: User {0} is not authorized to access the (unpublished) datafile.", user.getIdentifier());
-                    }
-                }
-            } else {
-                logger.log(Level.FINE, "API token-based auth: User {0} is not authorized to access the datafile.", user.getIdentifier());
+            if (permissionService.requestOn(dvr, df).has(Permission.DownloadFile)) {
+                return true;
             }
-            
-            return false;
-        } 
-        
+        }
         if (user != null) {
             logger.log(Level.FINE, "Session-based auth: user {0} has NO access rights on the requested datafile.", user.getIdentifier());
         } 
@@ -1984,37 +1849,30 @@ private boolean isAccessAuthorized(DataFile df, String apiToken) {
         if (apiTokenUser != null) {
             logger.log(Level.FINE, "Token-based auth: user {0} has NO access rights on the requested datafile.", apiTokenUser.getIdentifier());
         } 
-        
-        if (user == null && apiTokenUser == null) {
-            logger.fine("Unauthenticated access: No guest access to the datafile.");
-        }
-        
         return false; 
     }   
     
 
         
-    private User findAPITokenUser(String apiToken) {
+    private User findAPITokenUser() {
         User apiTokenUser = null;
-
-        if ((apiToken != null) && (apiToken.length() != 64)) {
-            // We'll also try to obtain the user information from the API token, 
-            // if supplied: 
-
-            try {
-                logger.fine("calling apiTokenUser = findUserOrDie()...");
-                apiTokenUser = findUserOrDie();
-                return apiTokenUser;
-            } catch (WrappedResponse wr) {
-                logger.log(Level.FINE, "Message from findUserOrDie(): {0}", wr.getMessage());
-                return null;
+        try {
+            logger.fine("calling apiTokenUser = findUserOrDie()...");
+            apiTokenUser = findUserOrDie();
+            if(apiTokenUser instanceof GuestUser) {
+                if(session!=null && session.getUser()!=null) {
+                //The apiTokenUser, if set, will override the sessionUser in permissions calcs, so set it to null if we have a session user
+                apiTokenUser=null;
+                }
             }
-
+            return apiTokenUser;
+        } catch (WrappedResponse wr) {
+            logger.log(Level.FINE, "Message from findUserOrDie(): {0}", wr.getMessage());
+            return null;
         }
-        return apiTokenUser;
     }
 
-    private URI handleCustomZipDownload(String customZipServiceUrl, String fileIds, String apiToken, User apiTokenUser, UriInfo uriInfo, HttpHeaders headers, boolean donotwriteGBResponse, boolean orig) throws WebApplicationException {
+    private URI handleCustomZipDownload(String customZipServiceUrl, String fileIds, User apiTokenUser, UriInfo uriInfo, HttpHeaders headers, boolean donotwriteGBResponse, boolean orig) throws WebApplicationException {
         
         String zipServiceKey = null; 
         Timestamp timestamp = null; 
@@ -2031,7 +1889,7 @@ private URI handleCustomZipDownload(String customZipServiceUrl, String fileIds,
         for (int i = 0; i < fileIdParams.length; i++) {
             Long fileId = null;
             try {
-                fileId = new Long(fileIdParams[i]);
+                fileId = Long.parseLong(fileIdParams[i]);
                 validIdCount++;
             } catch (NumberFormatException nfe) {
                 fileId = null;
@@ -2040,7 +1898,7 @@ private URI handleCustomZipDownload(String customZipServiceUrl, String fileIds,
                 DataFile file = dataFileService.find(fileId);
                 if (file != null) {
                     validFileCount++;
-                    if (isAccessAuthorized(file, apiToken)) {
+                    if (isAccessAuthorized(file)) {
                         logger.fine("adding datafile (id=" + file.getId() + ") to the download list of the ZippedDownloadInstance.");
                         if (donotwriteGBResponse != true && file.isReleased()) {
                             GuestbookResponse gbr = guestbookResponseService.initAPIGuestbookResponse(file.getOwner(), file, session, apiTokenUser);

From 05345ba39688291d028af40497b1ada4368a1418 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Fri, 4 Nov 2022 17:03:51 +0100
Subject: [PATCH 088/173] feat(ct-base): make buildx/BuildKit use a shared
 state for builds

Should speed up recurring builds a bit.
---
 modules/container-base/pom.xml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml
index 67e2c2f9911..f8b59dcecaa 100644
--- a/modules/container-base/pom.xml
+++ b/modules/container-base/pom.xml
@@ -97,6 +97,7 @@
                                                 <platform>linux/arm64</platform>
                                                 <platform>linux/amd64</platform>
                                             </platforms>
+                                            <dockerStateDir>${project.build.directory}/buildx-state</dockerStateDir>
                                         </buildx>
                                         <dockerFile>Dockerfile</dockerFile>
                                         <args>

From e261e3701b1af286d5901e1a82f84fff525dcd74 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Fri, 4 Nov 2022 17:09:27 +0100
Subject: [PATCH 089/173] feat(ct-base): switch /docroot to /dv and add volumes
 #8932

- Instead of a /docroot, add a more generic /dv which is owned by payara:payara
  and can be used to either store data in a single volume using subfolders or
  use subfolders with different backing volumes. Anyway, data is not written
  to overlay FS this way. (As long as an app image points to this location)
- Also define /secrets and /dumps as volumes, so data flowing into these
  locations is again not added to the overlay FS (which might cause severe
  damage in case of heap dumps!)
- Document the different locations in the base image guide.
- Remove the /docroot workaround for uploaded files. This will be solved at application
  level (either by moving the workaround there) or https://github.com/IQSS/dataverse/pull/8983
---
 .../source/container/base-image.rst           | 48 +++++++++++++++----
 .../container-base/src/main/docker/Dockerfile | 19 +++-----
 2 files changed, 45 insertions(+), 22 deletions(-)

diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst
index 197f4175538..8cf6af1f904 100644
--- a/doc/sphinx-guides/source/container/base-image.rst
+++ b/doc/sphinx-guides/source/container/base-image.rst
@@ -218,7 +218,16 @@ Locations
 +++++++++
 
 This environment variables represent certain locations and might be reused in your scripts etc.
-These variables aren't meant to be reconfigurable and reflect state in the filesystem layout!
+All of these variables aren't meant to be reconfigurable and reflect state in the filesystem layout!
+
+**Writeable at build time:**
+
+The overlay filesystem of Docker and other container technologies is not meant to be used for any performance IO.
+You should avoid *writing* data anywhere in the file tree at runtime, except for well known locations with mounted
+volumes backing them (see below).
+
+The locations below are meant to be written to when you build a container image, either this base or anything
+building upon it. You can also use these for references in scripts, etc.
 
 .. list-table::
     :align: left
@@ -245,10 +254,35 @@ These variables aren't meant to be reconfigurable and reflect state in the files
     * - ``DEPLOY_DIR``
       - ``${HOME_DIR}/deployments``
       - Any EAR or WAR file, exploded WAR directory etc are autodeployed on start
-    * - ``DOCROOT_DIR``
-      - ``/docroot``
-      - Mount a volume here to store i18n language bundle files, sitemaps, images for Dataverse collections, logos,
-        custom themes and stylesheets, etc here. You might need to replicate this data or place on shared file storage.
+    * - ``DOMAIN_DIR``
+      - ``${PAYARA_DIR}/glassfish`` ``/domains/${DOMAIN_NAME}``
+      - Path to root of the Payara domain applications will be deployed into. Usually ``${DOMAIN_NAME}`` will be ``domain1``.
+
+
+**Writeable at runtime:**
+
+The locations below are defined as `Docker volumes <https://docs.docker.com/storage/volumes/>`_ by the base image.
+They will by default get backed by an "anonymous volume", but you can (and should) bind-mount a host directory or
+named Docker volume in these places to avoid data loss, gain performance and/or use a network file system.
+
+**Notes:**
+1. On Kubernetes you still need to provide volume definitions for these places in your deployment objects!
+2. You should not write data into these locations at build time - it will be shadowed by the mounted volumes!
+
+.. list-table::
+    :align: left
+    :width: 100
+    :widths: 10 10 50
+    :header-rows: 1
+
+    * - Env. variable
+      - Value
+      - Description
+    * - ``STORAGE_DIR``
+      - ``/dv``
+      - This place is writeable by the Payara user, making it usable as a place to store research data, customizations
+        or other. Images inheriting the base image should create distinct folders here, backed by different
+        mounted volumes.
     * - ``SECRETS_DIR``
       - ``/secrets``
       - Mount secrets or other here, being picked up automatically by
@@ -258,10 +292,6 @@ These variables aren't meant to be reconfigurable and reflect state in the files
       - ``/dumps``
       - Default location where heap dumps will be stored (see above).
         You should mount some storage here (disk or ephemeral).
-    * - ``DOMAIN_DIR``
-      - ``${PAYARA_DIR}/glassfish`` ``/domains/${DOMAIN_NAME}``
-      - Path to root of the Payara domain applications will be deployed into. Usually ``${DOMAIN_NAME}`` will be ``domain1``.
-
 
 
 Exposed Ports
diff --git a/modules/container-base/src/main/docker/Dockerfile b/modules/container-base/src/main/docker/Dockerfile
index cafeb2ffb59..07968e92359 100644
--- a/modules/container-base/src/main/docker/Dockerfile
+++ b/modules/container-base/src/main/docker/Dockerfile
@@ -1,4 +1,4 @@
-# Copyright 2019 Forschungszentrum Jülich GmbH
+# Copyright 2022 Forschungszentrum Jülich GmbH
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -38,7 +38,7 @@ ENV PAYARA_DIR="${HOME_DIR}/appserver" \
     SCRIPT_DIR="${HOME_DIR}/scripts" \
     CONFIG_DIR="${HOME_DIR}/config" \
     DEPLOY_DIR="${HOME_DIR}/deployments" \
-    DOCROOT_DIR="/docroot" \
+    STORAGE_DIR="/dv" \
     SECRETS_DIR="/secrets" \
     DUMPS_DIR="/dumps" \
     PASSWORD_FILE="${HOME_DIR}/passwordFile" \
@@ -73,17 +73,19 @@ ARG GID=1000
 USER root
 WORKDIR /
 SHELL ["/bin/bash", "-euo", "pipefail", "-c"]
+# Mark these directories as mutuable data containers to avoid cluttering the images overlayfs at runtime.
+VOLUME ${STORAGE_DIR} ${SECRETS_DIR} ${DUMPS_DIR}
 RUN <<EOF
     # Create pathes
     mkdir -p "${HOME_DIR}" "${PAYARA_DIR}" "${DEPLOY_DIR}" "${CONFIG_DIR}" "${SCRIPT_DIR}"
-    mkdir -p "${DOCROOT_DIR}" "${SECRETS_DIR}" "${DUMPS_DIR}"
+    mkdir -p "${STORAGE_DIR}" "${SECRETS_DIR}" "${DUMPS_DIR}"
     # Create user
     addgroup --gid ${GID} payara
     adduser --system --uid ${UID} --no-create-home --shell /bin/bash --home "${HOME_DIR}" --gecos "" --ingroup payara payara
     echo payara:payara | chpasswd
     # Set permissions
     chown -R payara: "${HOME_DIR}"
-    chown -R payara: "${DOCROOT_DIR}" "${SECRETS_DIR}" "${DUMPS_DIR}"
+    chown -R payara: "${STORAGE_DIR}" "${SECRETS_DIR}" "${DUMPS_DIR}"
 EOF
 
 ARG JATTACH_VERSION="v2.1"
@@ -211,15 +213,6 @@ RUN <<EOF
         "${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/logs"
 EOF
 
-# Make docroot of Payara reside in higher level directory for easier targeting
-# Due to gdcc/dataverse-kubernetes#177: create the generated pathes so they are
-# writeable by us. TBR with gdcc/dataverse-kubernetes#178.
-RUN <<EOF
-    rm -rf "${DOMAIN_DIR}"/docroot
-    ln -s "${DOCROOT_DIR}" "${DOMAIN_DIR}"/docroot
-    mkdir -p "${DOMAIN_DIR}"/generated/jsp/dataverse
-EOF
-
 # Set the entrypoint to tini (as a process supervisor)
 ENTRYPOINT ["/usr/bin/dumb-init", "--"]
 # This works because we add ${SCRIPT_DIR} to $PATH above!

From 5d77ab9d83a0a1d3b2c9d46c6b0d48be961737d6 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Fri, 4 Nov 2022 17:10:47 +0100
Subject: [PATCH 090/173] ci(ct-base): switch some steps to run on push or
 schedule #8932

Instead of only running the steps to push images to Docker Hub
on a Git push event, also make it possible to run them an anything
not being a pull_request event. (Like a schedule)
---
 .github/workflows/container_base_push.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml
index 82c7a376ae0..2520a7e9257 100644
--- a/.github/workflows/container_base_push.yml
+++ b/.github/workflows/container_base_push.yml
@@ -53,18 +53,18 @@ jobs:
             - name: Build base container image with local architecture
               run: mvn -f modules/container-base -Pct package
 
-            - if: ${{ github.event_name == 'push' }} # run only if this is a push - PRs have no access to secrets
+            - if: ${{ github.event_name != 'pull_request' }} # run only if this is not a pull request - PRs have no access to secrets
               name: Log in to the Container registry
               uses: docker/login-action@v1
               with:
                   username: ${{ secrets.DOCKERHUB_USERNAME }}
                   password: ${{ secrets.DOCKERHUB_TOKEN }}
-            - if: ${{ github.event_name == 'push' }} # run only if this is a push - multi-arch makes no sense with PR
+            - if: ${{ github.event_name != 'pull_request' }} # run only if this is not a pull request - multi-arch makes no sense with PR
               name: Set up QEMU for multi-arch builds
               uses: docker/setup-qemu-action@v2
             - name: Re-set image tag based on branch
               if: ${{ github.ref == 'master' }}
               run: echo "IMAGE_TAG=release"
-            - if: ${{ github.event_name == 'push' }} # run only if this is a push - tag push will only succeed in upstream
+            - if: ${{ github.event_name != 'pull_request' }} # run only if this is not a pull request - tag push will only succeed in upstream
               name: Deploy multi-arch base container image to Docker Hub
               run: mvn -f modules/container-base -Pct deploy -Dbase.image.tag=${{ env.IMAGE_TAG }} -Ddocker.registry=${{ env.REGISTRY }}

From fbfcaa4c5fec93dc2e8ea434497a700e5a047463 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Tue, 8 Nov 2022 16:30:39 +0100
Subject: [PATCH 091/173] docs,ci(ct-base): add and push README description to
 Docker Hub #8932

When pushing to Docker Hub from development, we now also push
a short description with disclaimers, links to docs and license hints.
---
 .github/workflows/container_base_push.yml | 21 +++++++--
 modules/container-base/README.md          | 56 +++++++++++++++++++++++
 2 files changed, 74 insertions(+), 3 deletions(-)
 create mode 100644 modules/container-base/README.md

diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml
index 2520a7e9257..1ef8ba94e78 100644
--- a/.github/workflows/container_base_push.yml
+++ b/.github/workflows/container_base_push.yml
@@ -9,6 +9,7 @@ on:
         paths:
             - 'modules/container-base/**'
             - 'modules/dataverse-parent/pom.xml'
+            - '.github/workflows/container_base_push.yml'
     pull_request:
         branches:
             - 'develop'
@@ -16,6 +17,7 @@ on:
         paths:
             - 'modules/container-base/**'
             - 'modules/dataverse-parent/pom.xml'
+            - '.github/workflows/container_base_push.yml'
 
 env:
     IMAGE_TAG: develop
@@ -53,18 +55,31 @@ jobs:
             - name: Build base container image with local architecture
               run: mvn -f modules/container-base -Pct package
 
-            - if: ${{ github.event_name != 'pull_request' }} # run only if this is not a pull request - PRs have no access to secrets
+            # Run anything below only if this is not a pull request.
+            # Accessing, pushing tags etc. to DockerHub will only succeed in upstream because secrets.
+
+            - if: ${{ github.event_name != 'pull_request' && github.ref == 'develop' }}
+              name: Push description to DockerHub
+              uses: peter-evans/dockerhub-description@v3
+              with:
+                  username: ${{ secrets.DOCKERHUB_USERNAME }}
+                  password: ${{ secrets.DOCKERHUB_TOKEN }}
+                  repository: gdcc/base
+                  short-description: "Dataverse Base Container image providing Payara application server and optimized configuration"
+                  readme-filepath: ./modules/container-base/README.md
+
+            - if: ${{ github.event_name != 'pull_request' }}
               name: Log in to the Container registry
               uses: docker/login-action@v1
               with:
                   username: ${{ secrets.DOCKERHUB_USERNAME }}
                   password: ${{ secrets.DOCKERHUB_TOKEN }}
-            - if: ${{ github.event_name != 'pull_request' }} # run only if this is not a pull request - multi-arch makes no sense with PR
+            - if: ${{ github.event_name != 'pull_request' }}
               name: Set up QEMU for multi-arch builds
               uses: docker/setup-qemu-action@v2
             - name: Re-set image tag based on branch
               if: ${{ github.ref == 'master' }}
               run: echo "IMAGE_TAG=release"
-            - if: ${{ github.event_name != 'pull_request' }} # run only if this is not a pull request - tag push will only succeed in upstream
+            - if: ${{ github.event_name != 'pull_request' }}
               name: Deploy multi-arch base container image to Docker Hub
               run: mvn -f modules/container-base -Pct deploy -Dbase.image.tag=${{ env.IMAGE_TAG }} -Ddocker.registry=${{ env.REGISTRY }}
diff --git a/modules/container-base/README.md b/modules/container-base/README.md
new file mode 100644
index 00000000000..d6f93b14da7
--- /dev/null
+++ b/modules/container-base/README.md
@@ -0,0 +1,56 @@
+# Dataverse Base Container Image
+
+A "base image" offers you a pre-installed and pre-tuned application server to deploy Dataverse software to.
+Adding basic functionality like executing scripts at container boot, monitoring, memory tweaks etc is all done
+at this layer, to make the application image focus on the app itself.
+
+## Quick Reference
+
+**Maintained by:** 
+
+This image is created, maintained and supported by the Dataverse community on a best-effort basis.
+
+**Where to find documentation:**
+
+The [Dataverse Container Guide - Base Image](https://guides.dataverse.org/en/latest/container/base-image.html)
+provides in-depth information about content, building, tuning and so on for this image. 
+
+**Where to get help and ask questions:**
+
+IQSS will not offer you support how to deploy or run it, please reach out to the community for help on using it.
+You can join the Community Chat on Matrix at https://chat.dataverse.org or the Community Slack at
+https://dataversecommunity.slack.com to ask for help and guidance.
+
+## Supported Image Tags
+
+This image is sourced within the main upstream code [repository of the Dataverse software](https://github.com/IQSS/dataverse).
+Development and maintenance happens there (again, by the community). Community supported image tags are based on the two
+most important branches:
+
+- `develop` representing the unstable state of affairs in Dataverse's development branch
+  ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/develop/modules/container-base/src/main/docker/Dockerfile))
+- `release` representing the latest stable release in Dataverse's main branch
+  ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/master/modules/container-base/src/main/docker/Dockerfile))
+
+Within the main repository, you may find the base image's files at `<git root>/modules/container-base`.
+This Maven module uses the `Maven Docker Plugin <https://dmp.fabric8.io>`_ to build and ship the image.
+You may use, extend, or alter this image to your liking and/or host in some different registry if you want to.
+
+**Supported architectures:** This image is created as a "multi-arch image", supporting the most common architectures 
+Dataverse usually runs on: AMD64 (Windows/Linux/...) and ARM64 (Apple M1/M2).
+
+## License
+
+Image content created by the community is licensed under [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0), 
+like the [main Dataverse project](https://github.com/IQSS/dataverse/blob/develop/LICENSE.md).
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+See the License for the specific language governing permissions and limitations under the License.
+
+As with all Docker images, all images likely also contain other software which may be under other licenses (such as 
+[Payara Server](https://github.com/payara/Payara/blob/master/LICENSE.txt), Bash, etc from the base
+distribution, along with any direct or indirect (Java) dependencies contained).
+
+As for any pre-built image usage, it is the image user's responsibility to ensure that any use of this image complies
+with any relevant licenses for all software contained within.

From 1241591eb171609542df9e218388f6bb71e7ae71 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Tue, 8 Nov 2022 16:31:33 +0100
Subject: [PATCH 092/173] docs(ct-base): add short intro to base image docs
 page #8932

Explain a bit (short!) what this image is and what to expect.
---
 doc/sphinx-guides/source/container/base-image.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst
index 8cf6af1f904..8016ce95f27 100644
--- a/doc/sphinx-guides/source/container/base-image.rst
+++ b/doc/sphinx-guides/source/container/base-image.rst
@@ -4,8 +4,13 @@ Application Base Image
 .. contents:: |toctitle|
     :local:
 
+A "base image" offers you a pre-installed and pre-tuned application server to deploy Dataverse software to.
+Adding basic functionality like executing scripts at container boot, monitoring, memory tweaks etc is all done
+at this layer, to make the application image focus on the app itself.
+
 Within the main repository, you may find the base image's files at ``<git root>/modules/container-base``.
 This Maven module uses the `Maven Docker Plugin <https://dmp.fabric8.io>`_ to build and ship the image.
+You may use, extend, or alter this image to your liking and/or host in some different registry if you want to.
 
 **NOTE: This image is created, maintained and supported by the Dataverse community on a best-effort basis.**
 IQSS will not offer you support how to deploy or run it, please reach out to the community for help on using it.

From 22eb801f0a1dacaea2f34ea1a2864cf5d54f5365 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Tue, 8 Nov 2022 23:36:39 +0100
Subject: [PATCH 093/173] ci(ct-base): update action versions #8932

---
 .github/workflows/container_base_push.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml
index 1ef8ba94e78..519e135f944 100644
--- a/.github/workflows/container_base_push.yml
+++ b/.github/workflows/container_base_push.yml
@@ -38,15 +38,15 @@ jobs:
 
         steps:
             - name: Checkout repository
-              uses: actions/checkout@v2
+              uses: actions/checkout@v3
 
             - name: Set up JDK ${{ matrix.jdk }}
-              uses: actions/setup-java@v2
+              uses: actions/setup-java@v3
               with:
                   java-version: ${{ matrix.jdk }}
                   distribution: 'adopt'
             - name: Cache Maven packages
-              uses: actions/cache@v2
+              uses: actions/cache@v3
               with:
                   path: ~/.m2
                   key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
@@ -70,7 +70,7 @@ jobs:
 
             - if: ${{ github.event_name != 'pull_request' }}
               name: Log in to the Container registry
-              uses: docker/login-action@v1
+              uses: docker/login-action@v2
               with:
                   username: ${{ secrets.DOCKERHUB_USERNAME }}
                   password: ${{ secrets.DOCKERHUB_TOKEN }}

From 7d4388ed5022e64a1db721160169d93a2c565007 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Tue, 8 Nov 2022 23:42:20 +0100
Subject: [PATCH 094/173] ci(ct-base): fix step if-conditions for branch names
 #8932

Github context offers ".ref" but we need ".ref_name" to
match *just* the branch name.
---
 .github/workflows/container_base_push.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml
index 519e135f944..5a7280ce3b1 100644
--- a/.github/workflows/container_base_push.yml
+++ b/.github/workflows/container_base_push.yml
@@ -58,7 +58,7 @@ jobs:
             # Run anything below only if this is not a pull request.
             # Accessing, pushing tags etc. to DockerHub will only succeed in upstream because secrets.
 
-            - if: ${{ github.event_name != 'pull_request' && github.ref == 'develop' }}
+            - if: ${{ github.event_name == 'push' && github.ref_name == 'develop' }}
               name: Push description to DockerHub
               uses: peter-evans/dockerhub-description@v3
               with:
@@ -78,7 +78,7 @@ jobs:
               name: Set up QEMU for multi-arch builds
               uses: docker/setup-qemu-action@v2
             - name: Re-set image tag based on branch
-              if: ${{ github.ref == 'master' }}
+              if: ${{ github.ref_name == 'master' }}
               run: echo "IMAGE_TAG=release"
             - if: ${{ github.event_name != 'pull_request' }}
               name: Deploy multi-arch base container image to Docker Hub

From 3d790aacc7ffd4f44e8fb9a4880400960b52b48d Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Tue, 8 Nov 2022 23:50:31 +0100
Subject: [PATCH 095/173] ci(ct-base): fix failing image pushes #8932

The login to the registry needs to be explicit
otherwise pushes will fail to acquire the
correct token and pushes are rejected with
"insufficient_scope: authorization failed"
---
 .github/workflows/container_base_push.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml
index 5a7280ce3b1..fc0a3564e50 100644
--- a/.github/workflows/container_base_push.yml
+++ b/.github/workflows/container_base_push.yml
@@ -72,6 +72,7 @@ jobs:
               name: Log in to the Container registry
               uses: docker/login-action@v2
               with:
+                  registry: ${{ env.REGISTRY }}
                   username: ${{ secrets.DOCKERHUB_USERNAME }}
                   password: ${{ secrets.DOCKERHUB_TOKEN }}
             - if: ${{ github.event_name != 'pull_request' }}

From 609688092192e674686243096fcc45a9e4086826 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <poikilotherm@users.noreply.github.com>
Date: Wed, 9 Nov 2022 15:18:48 +0100
Subject: [PATCH 096/173] docs(ct-base): rephrase slightly to match wording in
 main index

Co-authored-by: Benjamin Peuch <benjamin.peuch@gmail.com>
---
 doc/sphinx-guides/source/index.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/index.rst b/doc/sphinx-guides/source/index.rst
index be32e94d80f..0cd01b8a5a7 100755
--- a/doc/sphinx-guides/source/index.rst
+++ b/doc/sphinx-guides/source/index.rst
@@ -31,7 +31,7 @@ The User Guide is further divided into primary activities: finding & using
 data, adding Datasets, administering dataverses or Datasets, and Dataset exploration/visualizations. Details
 on all of the above tasks can be found in the Users Guide. The
 Installation Guide is for people or organizations who want to host their
-own Dataverse. The Container Guide adds to this information on container-based installations.
+own Dataverse. The Container Guide gives information on how to deploy Dataverse with containers.
 The Developer Guide contains instructions for
 people who want to contribute to the Open Source Dataverse
 project or who want to modify the code to suit their own needs. Finally, the API Guide is for

From 4a79dcbddde84251c4a975e3b858d00171ffef66 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <poikilotherm@users.noreply.github.com>
Date: Wed, 9 Nov 2022 15:25:33 +0100
Subject: [PATCH 097/173] docs(ct-base): apply some language tweaks to docs
 pages

Co-authored-by: Benjamin Peuch <benjamin.peuch@gmail.com>
---
 doc/sphinx-guides/source/container/index.rst |  2 +-
 modules/container-base/README.md             | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/doc/sphinx-guides/source/container/index.rst b/doc/sphinx-guides/source/container/index.rst
index f6c99bfc19e..6d22318ad03 100644
--- a/doc/sphinx-guides/source/container/index.rst
+++ b/doc/sphinx-guides/source/container/index.rst
@@ -9,7 +9,7 @@ Container Guide
 
 Running Dataverse software in containers is quite different than in a :doc:`classic installation <../installation/prep>`.
 
-Both approaches have pros and cons. These days (2022) containers are very often used for development and testing,
+Both approaches have pros and cons. These days, containers are very often used for development and testing,
 but there is an ever rising move for running applications in the cloud using container technology.
 
 **NOTE:**
diff --git a/modules/container-base/README.md b/modules/container-base/README.md
index d6f93b14da7..ce48eae8a65 100644
--- a/modules/container-base/README.md
+++ b/modules/container-base/README.md
@@ -1,7 +1,7 @@
 # Dataverse Base Container Image
 
 A "base image" offers you a pre-installed and pre-tuned application server to deploy Dataverse software to.
-Adding basic functionality like executing scripts at container boot, monitoring, memory tweaks etc is all done
+Adding basic functionality like executing scripts at container boot, monitoring, memory tweaks, etc., is all done
 at this layer, to make the application image focus on the app itself.
 
 ## Quick Reference
@@ -17,14 +17,14 @@ provides in-depth information about content, building, tuning and so on for this
 
 **Where to get help and ask questions:**
 
-IQSS will not offer you support how to deploy or run it, please reach out to the community for help on using it.
+IQSS will not offer you support how to deploy or run it. Please reach out to the community for help on using it.
 You can join the Community Chat on Matrix at https://chat.dataverse.org or the Community Slack at
 https://dataversecommunity.slack.com to ask for help and guidance.
 
 ## Supported Image Tags
 
 This image is sourced within the main upstream code [repository of the Dataverse software](https://github.com/IQSS/dataverse).
-Development and maintenance happens there (again, by the community). Community supported image tags are based on the two
+Development and maintenance happens there (again, by the community). Community-supported image tags are based on the two
 most important branches:
 
 - `develop` representing the unstable state of affairs in Dataverse's development branch
@@ -32,7 +32,7 @@ most important branches:
 - `release` representing the latest stable release in Dataverse's main branch
   ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/master/modules/container-base/src/main/docker/Dockerfile))
 
-Within the main repository, you may find the base image's files at `<git root>/modules/container-base`.
+Within the main repository, you may find the base image files at `<git root>/modules/container-base`.
 This Maven module uses the `Maven Docker Plugin <https://dmp.fabric8.io>`_ to build and ship the image.
 You may use, extend, or alter this image to your liking and/or host in some different registry if you want to.
 
@@ -49,7 +49,7 @@ Unless required by applicable law or agreed to in writing, software distributed
 See the License for the specific language governing permissions and limitations under the License.
 
 As with all Docker images, all images likely also contain other software which may be under other licenses (such as 
-[Payara Server](https://github.com/payara/Payara/blob/master/LICENSE.txt), Bash, etc from the base
+[Payara Server](https://github.com/payara/Payara/blob/master/LICENSE.txt), Bash, etc., from the base
 distribution, along with any direct or indirect (Java) dependencies contained).
 
 As for any pre-built image usage, it is the image user's responsibility to ensure that any use of this image complies

From c4e5028928302b183530d23159ee5e0f807f08b0 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Thu, 10 Nov 2022 11:42:59 +0100
Subject: [PATCH 098/173] refactor(metadata): rename CodeMeta softwareVersion
 to codeVersion #7844

As the citation block already contains a compound field "software"
with both "softwareName" and "softwareVersion", meant to describe software
used to create the dataset, this name conflict must be resolved.

By renaming to "codeVersion", the semantic is not changed, as this metadata
block is about describing software deposits. As the termURI is explicitly
set to "schema.org/softwareVersion" it remains compatible with OAI-ORE and
other linked data usages. A future exporter for CodeMeta might require
special attention for this field.
---
 scripts/api/data/metadatablocks/codemeta.tsv      | 2 +-
 src/main/java/propertyFiles/codeMeta20.properties | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/api/data/metadatablocks/codemeta.tsv b/scripts/api/data/metadatablocks/codemeta.tsv
index 029ca2355ec..3c872426387 100644
--- a/scripts/api/data/metadatablocks/codemeta.tsv
+++ b/scripts/api/data/metadatablocks/codemeta.tsv
@@ -1,7 +1,7 @@
 #metadataBlock	name	dataverseAlias	displayName	blockURI												
 	codeMeta20		Software Metadata (CodeMeta v2.0)	https://codemeta.github.io/terms/
 #datasetField	name	title	description	watermark	fieldType	displayOrder	displayFormat	advancedSearchField	allowControlledVocabulary	allowmultiples	facetable	displayoncreate	required	parent	metadatablock_id	termURI
-	softwareVersion	Software Version	Version of the software instance, usually following some convention like SemVer etc.	e.g. 0.2.1 or 1.3 or 2021.1 etc	text	0	#VALUE	TRUE	FALSE	FALSE	TRUE	TRUE	FALSE		codeMeta20	https://schema.org/softwareVersion
+	codeVersion	Software Version	Version of the software instance, usually following some convention like SemVer etc.	e.g. 0.2.1 or 1.3 or 2021.1 etc	text	0	#VALUE	TRUE	FALSE	FALSE	TRUE	TRUE	FALSE		codeMeta20	https://schema.org/softwareVersion
 	developmentStatus	Development Status	Description of development status, e.g. work in progress (wip), active, etc. See repostatus.org for more information.		text	1	<a href='https://www.repostatus.org/##VALUE'><img src='https://www.repostatus.org/badges/latest/#VALUE.svg' alt='#VALUE '/></a>	TRUE	TRUE	FALSE	TRUE	FALSE	FALSE		codeMeta20	https://www.repostatus.org
 	codeRepository	Code Repository	Link to the repository where the un-compiled, human readable code and related code is located (SVN, GitHub, CodePlex, institutional GitLab instance, Gitea, etc.).	e.g. https://github.com/user/project	url	2	<a href="#VALUE" target="_blank" rel="noopener">#VALUE</a>	TRUE	FALSE	TRUE	FALSE	TRUE	FALSE		codeMeta20	https://schema.org/codeRepository
 	applicationCategory	Application Category	Type of software application, e.g. Simulation, Analysis, Visualisation.		text	3	#VALUE	TRUE	FALSE	TRUE	TRUE	TRUE	FALSE		codeMeta20	https://schema.org/applicationCategory
diff --git a/src/main/java/propertyFiles/codeMeta20.properties b/src/main/java/propertyFiles/codeMeta20.properties
index e203c1e46e9..5f788df4e83 100644
--- a/src/main/java/propertyFiles/codeMeta20.properties
+++ b/src/main/java/propertyFiles/codeMeta20.properties
@@ -1,8 +1,8 @@
 metadatablock.name=codeMeta20
 metadatablock.displayName=Software Metadata (CodeMeta 2.0)
-datasetfieldtype.softwareVersion.title=Software Version
-datasetfieldtype.softwareVersion.description=Version of the software instance, usually following some convention like SemVer etc.
-datasetfieldtype.softwareVersion.watermark=e.g. 0.2.1 or 1.3 or 2021.1 etc
+datasetfieldtype.codeVersion.title=Software Version
+datasetfieldtype.codeVersion.description=Version of the software instance, usually following some convention like SemVer etc.
+datasetfieldtype.codeVersion.watermark=e.g. 0.2.1 or 1.3 or 2021.1 etc
 datasetfieldtype.developmentStatus.title=Development Status
 datasetfieldtype.developmentStatus.description=Description of development status, e.g. work in progress (wip), active, etc. See repostatus.org for more information.
 datasetfieldtype.developmentStatus.watermark=                                                                                                                       Development Status

From d79b4aa3ad1f99ab61d0330462c41c36f478514c Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Thu, 10 Nov 2022 11:44:57 +0100
Subject: [PATCH 099/173] style(metadata): rephrase CodeMeta storage and memory
 requirements descriptions #7844

A slight rephrasing should make it easier to understand what is expected
as content for these metadata fields.
---
 scripts/api/data/metadatablocks/codemeta.tsv      | 4 ++--
 src/main/java/propertyFiles/codeMeta20.properties | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/api/data/metadatablocks/codemeta.tsv b/scripts/api/data/metadatablocks/codemeta.tsv
index 3c872426387..b65cf56b1af 100644
--- a/scripts/api/data/metadatablocks/codemeta.tsv
+++ b/scripts/api/data/metadatablocks/codemeta.tsv
@@ -18,8 +18,8 @@
 	softwareSuggestions	Name & Version	Name and version of the optional software/library dependency	e.g. Sphinx 5.0.2	text	0	#VALUE	TRUE	FALSE	FALSE	TRUE	FALSE	FALSE	softwareSuggestionsItem	codeMeta20	https://codemeta.github.io/terms/softwareSuggestions
 	softwareSuggestionsInfoUrl	Info URL	Link to optional software/library homepage or documentation (ideally also versioned)	e.g. https://www.sphinx-doc.org	url	1	<a href="#VALUE" target="_blank" rel="noopener">#VALUE</a>	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	softwareSuggestionsItem	codeMeta20	https://dataverse.org/schema/codeMeta20/softwareSuggestionsInfoUrl
 	memoryRequirements	Memory Requirements	Minimum memory requirements.		text	12	#VALUE	TRUE	FALSE	FALSE	FALSE	FALSE	FALSE		codeMeta20	https://schema.org/memoryRequirements
-	processorRequirements	Processor Requirements	Processor architecture required to run the application (e.g. IA64).		text	13	#VALUE	TRUE	FALSE	TRUE	FALSE	FALSE	FALSE		codeMeta20	https://schema.org/processorRequirements
-	storageRequirements	Storage Requirements	Storage requirements (e.g. free space required).		text	14	#VALUE	TRUE	FALSE	FALSE	FALSE	FALSE	FALSE		codeMeta20	https://schema.org/storageRequirements
+	processorRequirements	Processor Requirements	Processor architecture or other CPU requirements to run the application (e.g. IA64).		text	13	#VALUE	TRUE	FALSE	TRUE	FALSE	FALSE	FALSE		codeMeta20	https://schema.org/processorRequirements
+	storageRequirements	Storage Requirements	Minimum storage requirements (e.g. free space required).		text	14	#VALUE	TRUE	FALSE	FALSE	FALSE	FALSE	FALSE		codeMeta20	https://schema.org/storageRequirements
 	permissions	Permissions	Permission(s) required to run the code (for example, a mobile app may require full internet access or may run only on wifi).		text	15	#VALUE	TRUE	FALSE	TRUE	FALSE	FALSE	FALSE		codeMeta20	https://schema.org/permissions
 	softwareHelp	Software Help/Documentation	Link to help texts or documentation	e.g. https://user.github.io/project/docs	url	16	<a href="#VALUE" target="_blank" rel="noopener">#VALUE</a>	FALSE	FALSE	TRUE	FALSE	TRUE	FALSE		codeMeta20	https://schema.org/softwareHelp
 	readme	Readme	Link to the README of the project	e.g. https://github.com/user/project/blob/main/README.md	url	17	<a href="#VALUE" target="_blank" rel="noopener">#VALUE</a>	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE		codeMeta20	https://codemeta.github.io/terms/readme
diff --git a/src/main/java/propertyFiles/codeMeta20.properties b/src/main/java/propertyFiles/codeMeta20.properties
index 5f788df4e83..92153ccb10a 100644
--- a/src/main/java/propertyFiles/codeMeta20.properties
+++ b/src/main/java/propertyFiles/codeMeta20.properties
@@ -52,10 +52,10 @@ datasetfieldtype.memoryRequirements.title=Memory Requirements
 datasetfieldtype.memoryRequirements.description=Minimum memory requirements.
 datasetfieldtype.memoryRequirements.watermark=
 datasetfieldtype.processorRequirements.title=Processor Requirements
-datasetfieldtype.processorRequirements.description=Processor architecture required to run the application (e.g. IA64).
+datasetfieldtype.processorRequirements.description=Processor architecture or other CPU requirements to run the application (e.g. IA64).
 datasetfieldtype.processorRequirements.watermark=
 datasetfieldtype.storageRequirements.title=Storage Requirements
-datasetfieldtype.storageRequirements.description=Storage requirements (e.g. free space required).
+datasetfieldtype.storageRequirements.description=Minimum storage requirements (e.g. free space required).
 datasetfieldtype.storageRequirements.watermark=
 datasetfieldtype.permissions.title=Permissions
 datasetfieldtype.permissions.description=Permission(s) required to run the code (for example, a mobile app may require full internet access or may run only on wifi).

From 8d5edf23a13631e878c413e55c320cb704a579b5 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Thu, 10 Nov 2022 12:35:50 +0100
Subject: [PATCH 100/173] feat(metadata): add CodeMeta fields to Solr schema
 #7844

Adding the fields of the CodeMeta block to the Solr schema
to enable quick usage of the fields (despite being flagged experimental in the guides).
---
 conf/solr/8.11.1/schema.xml | 48 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/conf/solr/8.11.1/schema.xml b/conf/solr/8.11.1/schema.xml
index 63312ab5d40..2656abf0dc5 100644
--- a/conf/solr/8.11.1/schema.xml
+++ b/conf/solr/8.11.1/schema.xml
@@ -405,9 +405,31 @@
     <field name="universe" type="text_en" multiValued="true" stored="true" indexed="true"/>
     <field name="weighting" type="text_en" multiValued="false" stored="true" indexed="true"/>
     <field name="westLongitude" type="text_en" multiValued="true" stored="true" indexed="true"/>
+    <field name="codeVersion" type="text_en" multiValued="false" stored="true" indexed="true"/>
+    <field name="developmentStatus" type="text_en" multiValued="false" stored="true" indexed="true"/>
+    <field name="codeRepository" type="text_en" multiValued="true" stored="true" indexed="true"/>
+    <field name="applicationCategory" type="text_en" multiValued="true" stored="true" indexed="true"/>
+    <field name="applicationSubCategory" type="text_en" multiValued="true" stored="true" indexed="true"/>
+    <field name="programmingLanguage" type="text_en" multiValued="true" stored="true" indexed="true"/>
+    <field name="runtimePlatform" type="text_en" multiValued="true" stored="true" indexed="true"/>
+    <field name="operatingSystem" type="text_en" multiValued="true" stored="true" indexed="true"/>
+    <field name="targetProduct" type="text_en" multiValued="true" stored="true" indexed="true"/>
+    <field name="buildInstructions" type="text_en" multiValued="true" stored="true" indexed="true"/>
+    <field name="softwareRequirements" type="text_en" multiValued="true" stored="true" indexed="true"/>
+    <field name="softwareRequirementsInfoUrl" type="text_en" multiValued="true" stored="true" indexed="true"/>
+    <field name="softwareSuggestions" type="text_en" multiValued="true" stored="true" indexed="true"/>
+    <field name="softwareSuggestionsInfoUrl" type="text_en" multiValued="true" stored="true" indexed="true"/>
+    <field name="memoryRequirements" type="text_en" multiValued="false" stored="true" indexed="true"/>
+    <field name="processorRequirements" type="text_en" multiValued="true" stored="true" indexed="true"/>
+    <field name="storageRequirements" type="text_en" multiValued="false" stored="true" indexed="true"/>
+    <field name="permissions" type="text_en" multiValued="true" stored="true" indexed="true"/>
+    <field name="softwareHelp" type="text_en" multiValued="true" stored="true" indexed="true"/>
+    <field name="readme" type="text_en" multiValued="false" stored="true" indexed="true"/>
+    <field name="releaseNotes" type="text_en" multiValued="false" stored="true" indexed="true"/>
+    <field name="contIntegration" type="text_en" multiValued="true" stored="true" indexed="true"/>
+    <field name="issueTracker" type="text_en" multiValued="false" stored="true" indexed="true"/>
     <!-- SCHEMA-FIELDS::END -->
     
-
     <copyField source="description" dest="_text_" maxChars="3000"/>
 
     <!-- Added for Dataverse 4.0 Beta: make variable names and labels searchable in basic search https://redmine.hmdc.harvard.edu/issues/3945 -->
@@ -645,6 +667,30 @@
     <copyField source="universe" dest="_text_" maxChars="3000"/>
     <copyField source="weighting" dest="_text_" maxChars="3000"/>
     <copyField source="westLongitude" dest="_text_" maxChars="3000"/>
+    <copyField source="codeVersion" dest="_text_" maxChars="3000"/>
+    <copyField source="developmentStatus" dest="_text_" maxChars="3000"/>
+    <copyField source="codeRepository" dest="_text_" maxChars="3000"/>
+    <copyField source="applicationCategory" dest="_text_" maxChars="3000"/>
+    <copyField source="applicationSubCategory" dest="_text_" maxChars="3000"/>
+    <copyField source="programmingLanguage" dest="_text_" maxChars="3000"/>
+    <copyField source="runtimePlatform" dest="_text_" maxChars="3000"/>
+    <copyField source="operatingSystem" dest="_text_" maxChars="3000"/>
+    <copyField source="targetProduct" dest="_text_" maxChars="3000"/>
+    <copyField source="buildInstructions" dest="_text_" maxChars="3000"/>
+    <copyField source="softwareRequirements" dest="_text_" maxChars="3000"/>
+    <copyField source="softwareRequirementsInfoUrl" dest="_text_" maxChars="3000"/>
+    <copyField source="softwareSuggestions" dest="_text_" maxChars="3000"/>
+    <copyField source="softwareSuggestionsInfoUrl" dest="_text_" maxChars="3000"/>
+    <copyField source="memoryRequirements" dest="_text_" maxChars="3000"/>
+    <copyField source="processorRequirements" dest="_text_" maxChars="3000"/>
+    <copyField source="storageRequirements" dest="_text_" maxChars="3000"/>
+    <copyField source="permissions" dest="_text_" maxChars="3000"/>
+    <copyField source="softwareHelp" dest="_text_" maxChars="3000"/>
+    <copyField source="readme" dest="_text_" maxChars="3000"/>
+    <copyField source="releaseNotes" dest="_text_" maxChars="3000"/>
+    <copyField source="contIntegration" dest="_text_" maxChars="3000"/>
+    <copyField source="issueTracker" dest="_text_" maxChars="3000"/>
+    
     <!-- SCHEMA-COPY-FIELDS::END -->
     
     <!-- This can be enabled, in case the client does not know what fields may be searched. It isn't enabled by default

From f1a64a873c7900462f5ae96fb2cb3c1186339a46 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Thu, 10 Nov 2022 16:39:24 +0100
Subject: [PATCH 101/173] docs(ct-base): incorporate review suggestions #8932

Thanks @atrisovic @pdurbin
---
 doc/sphinx-guides/source/container/base-image.rst |  2 ++
 doc/sphinx-guides/source/container/index.rst      |  8 ++++----
 modules/container-base/README.md                  | 13 +++++++++----
 3 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst
index 8016ce95f27..9391c90f695 100644
--- a/doc/sphinx-guides/source/container/base-image.rst
+++ b/doc/sphinx-guides/source/container/base-image.rst
@@ -8,6 +8,8 @@ A "base image" offers you a pre-installed and pre-tuned application server to de
 Adding basic functionality like executing scripts at container boot, monitoring, memory tweaks etc is all done
 at this layer, to make the application image focus on the app itself.
 
+**NOTE: The base image does not contain the Dataverse application itself.**
+
 Within the main repository, you may find the base image's files at ``<git root>/modules/container-base``.
 This Maven module uses the `Maven Docker Plugin <https://dmp.fabric8.io>`_ to build and ship the image.
 You may use, extend, or alter this image to your liking and/or host in some different registry if you want to.
diff --git a/doc/sphinx-guides/source/container/index.rst b/doc/sphinx-guides/source/container/index.rst
index 6d22318ad03..25d891016ed 100644
--- a/doc/sphinx-guides/source/container/index.rst
+++ b/doc/sphinx-guides/source/container/index.rst
@@ -7,10 +7,10 @@ Container Guide
 
   base-image
 
-Running Dataverse software in containers is quite different than in a :doc:`classic installation <../installation/prep>`.
+Running Dataverse software in containers is quite different than in a :doc:`standard installation <../installation/prep>`.
 
 Both approaches have pros and cons. These days, containers are very often used for development and testing,
-but there is an ever rising move for running applications in the cloud using container technology.
+but there is an ever rising move toward running applications in the cloud using container technology.
 
 **NOTE:**
 **As the Institute for Quantitative Social Sciences (IQSS) at Harvard is running their installations in the classic
@@ -22,5 +22,5 @@ solutions to run containers in production. There is the `Dataverse on K8s projec
 purpose.
 
 This guide focuses on describing the container images managed from the main Dataverse repository (again: by the
-community, not IQSS), their features and limitations. Instructions on how to build the images yourself, how to
-extend them and how to use them for development purposes may be found in respective subpages.
\ No newline at end of file
+community, not IQSS), their features and limitations. Instructions on how to build the images yourself and how to
+develop and extend them further may be found in respective subpages.
\ No newline at end of file
diff --git a/modules/container-base/README.md b/modules/container-base/README.md
index ce48eae8a65..cbf8921f9e7 100644
--- a/modules/container-base/README.md
+++ b/modules/container-base/README.md
@@ -1,8 +1,13 @@
 # Dataverse Base Container Image
 
-A "base image" offers you a pre-installed and pre-tuned application server to deploy Dataverse software to.
+The Dataverse Base Container Image contains primarily a pre-installed and pre-tuned application server with the
+necessary software dependencies for deploying and launching a Dataverse repository installation.
+
 Adding basic functionality like executing scripts at container boot, monitoring, memory tweaks, etc., is all done
-at this layer, to make the application image focus on the app itself.
+at this layer. Application images building from this very base focus on adding deployable Dataverse code and 
+actual scripts.
+
+*Note:* Currently, there is no application image. Please watch https://github.com/IQSS/dataverse/issues/8934
 
 ## Quick Reference
 
@@ -17,7 +22,7 @@ provides in-depth information about content, building, tuning and so on for this
 
 **Where to get help and ask questions:**
 
-IQSS will not offer you support how to deploy or run it. Please reach out to the community for help on using it.
+IQSS will not offer support on how to deploy or run it. Please reach out to the community for help on using it.
 You can join the Community Chat on Matrix at https://chat.dataverse.org or the Community Slack at
 https://dataversecommunity.slack.com to ask for help and guidance.
 
@@ -33,7 +38,7 @@ most important branches:
   ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/master/modules/container-base/src/main/docker/Dockerfile))
 
 Within the main repository, you may find the base image files at `<git root>/modules/container-base`.
-This Maven module uses the `Maven Docker Plugin <https://dmp.fabric8.io>`_ to build and ship the image.
+This Maven module uses the [Maven Docker Plugin](https://dmp.fabric8.io) to build and ship the image.
 You may use, extend, or alter this image to your liking and/or host in some different registry if you want to.
 
 **Supported architectures:** This image is created as a "multi-arch image", supporting the most common architectures 

From 426f74623278a3d65c413378c07b347806d1b40c Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Thu, 10 Nov 2022 21:43:46 +0100
Subject: [PATCH 102/173] style(ct-base): rephrase container image tags #8932

As requested by review from @pdurbin, aligning image tag names.
---
 .github/workflows/container_base_push.yml         |  4 ++--
 doc/sphinx-guides/source/container/base-image.rst | 10 ++++++----
 modules/container-base/README.md                  |  8 ++++----
 modules/container-base/pom.xml                    |  2 +-
 4 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml
index fc0a3564e50..120f55984dc 100644
--- a/.github/workflows/container_base_push.yml
+++ b/.github/workflows/container_base_push.yml
@@ -20,7 +20,7 @@ on:
             - '.github/workflows/container_base_push.yml'
 
 env:
-    IMAGE_TAG: develop
+    IMAGE_TAG: unstable
     REGISTRY: docker.io
 
 jobs:
@@ -80,7 +80,7 @@ jobs:
               uses: docker/setup-qemu-action@v2
             - name: Re-set image tag based on branch
               if: ${{ github.ref_name == 'master' }}
-              run: echo "IMAGE_TAG=release"
+              run: echo "IMAGE_TAG=stable"
             - if: ${{ github.event_name != 'pull_request' }}
               name: Deploy multi-arch base container image to Docker Hub
               run: mvn -f modules/container-base -Pct deploy -Dbase.image.tag=${{ env.IMAGE_TAG }} -Ddocker.registry=${{ env.REGISTRY }}
diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst
index 9391c90f695..b03879f83f8 100644
--- a/doc/sphinx-guides/source/container/base-image.rst
+++ b/doc/sphinx-guides/source/container/base-image.rst
@@ -22,12 +22,14 @@ efforts.
 Supported Image Tags
 ++++++++++++++++++++
 
-This image is sourced within the main upstream code repository of the Dataverse software. Development and maintenance
-happens there (again, by the community). Community supported image tags are based on the two most important branches:
+This image is sourced from the main upstream code `repository of the Dataverse software <https://github.com/IQSS/dataverse>`_.
+Development and maintenance of the `image's code <https://github.com/IQSS/dataverse/tree/develop/modules/container-base>`
+happens there (again, by the community). Community-supported image tags are based on the two most important
+upstream branches:
 
-- ``develop`` representing the unstable state of affairs in Dataverse's development branch
+- The ``unstable`` tag corresponds to the ``develop`` branch, where pull requests are merged.
   (`Dockerfile <https://github.com/IQSS/dataverse/tree/develop/modules/container-base/src/main/docker/Dockerfile>`__)
-- ``release`` representing the latest stable release in Dataverse's main branch
+- The ``stable`` tag corresponds to the ``master`` branch, where releases are cut from.
   (`Dockerfile <https://github.com/IQSS/dataverse/tree/master/modules/container-base/src/main/docker/Dockerfile>`__)
 
 
diff --git a/modules/container-base/README.md b/modules/container-base/README.md
index cbf8921f9e7..15011d5c6f4 100644
--- a/modules/container-base/README.md
+++ b/modules/container-base/README.md
@@ -29,12 +29,12 @@ https://dataversecommunity.slack.com to ask for help and guidance.
 ## Supported Image Tags
 
 This image is sourced within the main upstream code [repository of the Dataverse software](https://github.com/IQSS/dataverse).
-Development and maintenance happens there (again, by the community). Community-supported image tags are based on the two
-most important branches:
+Development and maintenance of the [image's code](https://github.com/IQSS/dataverse/tree/develop/modules/container-base)
+happens there (again, by the community). Community-supported image tags are based on the two most important branches:
 
-- `develop` representing the unstable state of affairs in Dataverse's development branch
+- The `unstable` tag corresponds to the `develop` branch, where pull requests are merged.
   ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/develop/modules/container-base/src/main/docker/Dockerfile))
-- `release` representing the latest stable release in Dataverse's main branch
+- The `stable` tag corresponds to the `master` branch, where releases are cut from.
   ([`Dockerfile`](https://github.com/IQSS/dataverse/tree/master/modules/container-base/src/main/docker/Dockerfile))
 
 Within the main repository, you may find the base image files at `<git root>/modules/container-base`.
diff --git a/modules/container-base/pom.xml b/modules/container-base/pom.xml
index f8b59dcecaa..bbee6ad67d5 100644
--- a/modules/container-base/pom.xml
+++ b/modules/container-base/pom.xml
@@ -40,7 +40,7 @@
             <properties>
                 <packaging.type>docker-build</packaging.type>
                 <base.image>gdcc/base:${base.image.tag}</base.image>
-                <base.image.tag>develop</base.image.tag>
+                <base.image.tag>unstable</base.image.tag>
                 <java.image>eclipse-temurin:${target.java.version}-jre</java.image>
                 <base.image.uid>1000</base.image.uid>
                 <base.image.gid>1000</base.image.gid>

From 76202477b5d695be6e88ea532be0f3af1def55a0 Mon Sep 17 00:00:00 2001
From: Jim Myers <myersjd@umich.edu>
Date: Thu, 17 Nov 2022 17:48:11 -0500
Subject: [PATCH 103/173] re-apply message fixes

---
 src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java
index d64a1f7cce1..72980c3451a 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java
@@ -34,8 +34,12 @@ public static String getSubjectTextBasedOnNotification(UserNotification userNoti
         List<String> rootDvNameAsList = Arrays.asList(BrandingUtil.getInstallationBrandName());
         String datasetDisplayName = "";
 
-        if (objectOfNotification != null && (objectOfNotification instanceof Dataset)  ) {
-            datasetDisplayName = ((Dataset)objectOfNotification).getDisplayName();
+        if (objectOfNotification != null) {
+            if (objectOfNotification instanceof Dataset) {
+                datasetDisplayName = ((Dataset) objectOfNotification).getDisplayName();
+            } else if (objectOfNotification instanceof DatasetVersion) {
+                datasetDisplayName = ((DatasetVersion) objectOfNotification).getDataset().getDisplayName();
+            }
         }
 
         switch (userNotification.getType()) {

From 8b9d7f5f45d87f96f46b382f52dec6485a6f9e8e Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Wed, 30 Nov 2022 08:38:38 +0100
Subject: [PATCH 104/173] doc: add release note for CodeMeta block #7844

---
 doc/release-notes/7844-codemeta.md | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 doc/release-notes/7844-codemeta.md

diff --git a/doc/release-notes/7844-codemeta.md b/doc/release-notes/7844-codemeta.md
new file mode 100644
index 00000000000..4d98c1f840f
--- /dev/null
+++ b/doc/release-notes/7844-codemeta.md
@@ -0,0 +1,14 @@
+# Experimental CodeMeta Schema Support
+
+With this release, we are adding "experimental" (see note below) support for research software metadata deposits.
+
+By adding a metadata block for [CodeMeta](https://codemeta.github.io), we take another step extending the Dataverse
+scope being a research data repository towards first class support of diverse F.A.I.R. objects, currently focusing
+on research software and computational workflows.
+
+There is more work underway to make Dataverse installations around the world "research software ready". We hope
+for feedback from installations on the new metadata block to optimize and lift it from the experimental stage.
+
+**Note:** like the metadata block for computational workflows before, this schema is flagged as "experimental".
+"Experimental" means it's brand new, opt-in, and might need future tweaking based on experience of usage in the field.
+These blocks are listed here: https://guides.dataverse.org/en/latest/user/appendix.html#experimental-metadata
\ No newline at end of file

From 76abd3400f0c44b317b0d3bbcd007d30cdd870ee Mon Sep 17 00:00:00 2001
From: qqmyers <jim.myers@computer.org>
Date: Wed, 30 Nov 2022 17:41:12 -0500
Subject: [PATCH 105/173] Update 9005-replaceFiles-api-call

typo
---
 doc/release-notes/9005-replaceFiles-api-call | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/release-notes/9005-replaceFiles-api-call b/doc/release-notes/9005-replaceFiles-api-call
index b1df500251e..d1a86efb745 100644
--- a/doc/release-notes/9005-replaceFiles-api-call
+++ b/doc/release-notes/9005-replaceFiles-api-call
@@ -1,3 +1,3 @@
 9005
 
-DIrect upload and out-of-band uploads can now be used to replace multiple files with one API call (complementing the prior ability to add multiple new files)
\ No newline at end of file
+Direct upload and out-of-band uploads can now be used to replace multiple files with one API call (complementing the prior ability to add multiple new files)

From 175ab7fd29a7a51a95811963113d332c8f2621c3 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Fri, 2 Dec 2022 13:06:08 -0500
Subject: [PATCH 106/173] rc2 version of the gdcc-xoai lib. should've been done
 a long time ago :( #8843

---
 modules/dataverse-parent/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml
index e36a78b11be..bf37299f2df 100644
--- a/modules/dataverse-parent/pom.xml
+++ b/modules/dataverse-parent/pom.xml
@@ -163,7 +163,7 @@
         <apache.httpcomponents.core.version>4.4.14</apache.httpcomponents.core.version>
         
         <!-- NEW gdcc XOAI library implementation -->
-        <gdcc.xoai.version>5.0.0-RC1</gdcc.xoai.version>
+        <gdcc.xoai.version>5.0.0-RC2</gdcc.xoai.version>
     
         <!-- Testing dependencies -->
         <testcontainers.version>1.15.0</testcontainers.version>

From 3d1e98c5a9f5f755d8d78b6151b659fe2377f3ed Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Fri, 2 Dec 2022 13:27:40 -0500
Subject: [PATCH 107/173] this method was renamed in RC2 (#8843)

---
 .../harvest/server/xoai/DataverseXoaiItemRepository.java        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiItemRepository.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiItemRepository.java
index faf3cf9ddc4..147d42648fa 100644
--- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiItemRepository.java
+++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiItemRepository.java
@@ -49,7 +49,7 @@ public DataverseXoaiItemRepository (OAIRecordServiceBean recordService, DatasetS
     }
     
     @Override
-    public ItemIdentifier getItem(String identifier) throws IdDoesNotExistException {
+    public ItemIdentifier getItemIdentifier(String identifier) throws IdDoesNotExistException {
         // This method is called when ListMetadataFormats request specifies 
         // the identifier, requesting the formats available for this specific record.
         // In our case, under the current implementation, we need to simply look 

From aeffa3b6fc13a029b70630d856b5f0373a333903 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Mon, 5 Dec 2022 20:41:24 -0500
Subject: [PATCH 108/173] a few extra oai tests (#8843)

---
 .../iq/dataverse/api/HarvestingServerIT.java  | 222 +++++++++++++-----
 .../edu/harvard/iq/dataverse/api/UtilIT.java  |  10 +
 2 files changed, 176 insertions(+), 56 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
index fdd034ab12e..5355b57490d 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
@@ -10,7 +10,12 @@
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
 import com.jayway.restassured.response.Response;
 import com.jayway.restassured.path.json.JsonPath;
+import com.jayway.restassured.path.xml.XmlPath;
+import com.jayway.restassured.path.xml.element.Node;
 import static edu.harvard.iq.dataverse.api.UtilIT.API_TOKEN_HTTP_HEADER;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
 import javax.json.Json;
 import javax.json.JsonArray;
 import static javax.ws.rs.core.Response.Status.FORBIDDEN;
@@ -24,18 +29,32 @@
 import static org.junit.Assert.assertTrue;
 
 /**
- * extremely minimal API tests for creating OAI sets.
+ * Tests for the Harvesting Server functionality
+ * Note that these test BOTH the proprietary Dataverse rest APIs for creating 
+ * and managing sets, AND the OAI-PMH functionality itself.
  */
 public class HarvestingServerIT {
 
     private static final Logger logger = Logger.getLogger(HarvestingServerIT.class.getCanonicalName());
 
+    private static String normalUserAPIKey;
+    private static String adminUserAPIKey;
+    private static String singleSetDatasetIdentifier;
+    private static String singleSetDatasetPersistentId;
+
     @BeforeClass
     public static void setUpClass() {
         RestAssured.baseURI = UtilIT.getRestAssuredBaseUri();
 	// enable harvesting server
 	//  Gave some thought to storing the original response, and resetting afterwards - but that appears to be more complexity than it's worth
 	Response enableHarvestingServerResponse = UtilIT.setSetting(SettingsServiceBean.Key.OAIServerEnabled,"true");
+        
+        // Create users:
+        setupUsers();
+        
+        // Create and publish some datasets: 
+        setupDatasets();
+        
     }
 
     @AfterClass
@@ -44,7 +63,7 @@ public static void afterClass() {
 	Response enableHarvestingServerResponse = UtilIT.setSetting(SettingsServiceBean.Key.OAIServerEnabled,"false");
     }
 
-    private void setupUsers() {
+    private static void setupUsers() {
         Response cu0 = UtilIT.createRandomUser();
         normalUserAPIKey = UtilIT.getApiTokenFromResponse(cu0);
         Response cu1 = UtilIT.createRandomUser();
@@ -52,6 +71,40 @@ private void setupUsers() {
         Response u1a = UtilIT.makeSuperUser(un1);
         adminUserAPIKey = UtilIT.getApiTokenFromResponse(cu1);
     }
+    
+    private static void setupDatasets() {
+        // create dataverse:
+        Response createDataverseResponse = UtilIT.createRandomDataverse(adminUserAPIKey);
+        createDataverseResponse.prettyPrint();
+        String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse);
+
+        // publish dataverse:
+        Response publishDataverse = UtilIT.publishDataverseViaNativeApi(dataverseAlias, adminUserAPIKey);
+        assertEquals(OK.getStatusCode(), publishDataverse.getStatusCode());
+
+        // create dataset: 
+        Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, adminUserAPIKey);
+        createDatasetResponse.prettyPrint();
+        Integer datasetId = UtilIT.getDatasetIdFromResponse(createDatasetResponse);
+
+        // retrieve the global id: 
+        singleSetDatasetPersistentId = UtilIT.getDatasetPersistentIdFromResponse(createDatasetResponse);
+
+        // publish dataset:
+        Response publishDataset = UtilIT.publishDatasetViaNativeApi(singleSetDatasetPersistentId, "major", adminUserAPIKey);
+        assertEquals(200, publishDataset.getStatusCode());
+
+        singleSetDatasetIdentifier = singleSetDatasetPersistentId.substring(singleSetDatasetPersistentId.lastIndexOf('/') + 1);
+
+        logger.info("identifier: " + singleSetDatasetIdentifier);
+        
+        // Publish command is executed asynchronously, i.e. it may 
+        // still be running after we received the OK from the publish API. 
+        // The oaiExport step also requires the metadata exports to be done and this
+        // takes longer than just publish/reindex.
+        // So wait for all of this to finish.
+        UtilIT.sleepForReexport(singleSetDatasetPersistentId, adminUserAPIKey, 10);
+    }
 
     private String jsonForTestSpec(String name, String def) {
         String r = String.format("{\"name\":\"%s\",\"definition\":\"%s\"}", name, def);//description is optional
@@ -63,20 +116,84 @@ private String jsonForEditSpec(String name, String def, String desc) {
         return r;
     }
 
-    private String normalUserAPIKey;
-    private String adminUserAPIKey;
+    private XmlPath validateOaiVerbResponse(Response oaiResponse, String verb) {
+        // confirm that the response is in fact XML:
+        XmlPath responseXmlPath = oaiResponse.getBody().xmlPath();
+        assertNotNull(responseXmlPath);
+        
+        String dateString = responseXmlPath.getString("OAI-PMH.responseDate");
+        assertNotNull(dateString); // TODO: validate that it's well-formatted!
+        logger.info("date string from the OAI output:"+dateString);
+        assertEquals("http://localhost:8080/oai", responseXmlPath.getString("OAI-PMH.request"));
+        assertEquals(verb, responseXmlPath.getString("OAI-PMH.request.@verb"));
+        return responseXmlPath;
+    }
+    
+    @Test 
+    public void testOaiIdentify() {
+        // Run Identify:
+        Response identifyResponse = UtilIT.getOaiIdentify();
+        assertEquals(OK.getStatusCode(), identifyResponse.getStatusCode());
+        //logger.info("Identify response: "+identifyResponse.prettyPrint());
+
+        // Validate the response: 
+        
+        XmlPath responseXmlPath = validateOaiVerbResponse(identifyResponse, "Identify");
+        assertEquals("http://localhost:8080/oai", responseXmlPath.getString("OAI-PMH.Identify.baseURL"));
+        // Confirm that the server is reporting the correct parameters that 
+        // our server implementation should be using:
+        assertEquals("2.0", responseXmlPath.getString("OAI-PMH.Identify.protocolVersion"));
+        assertEquals("transient", responseXmlPath.getString("OAI-PMH.Identify.deletedRecord"));
+        assertEquals("YYYY-MM-DDThh:mm:ssZ", responseXmlPath.getString("OAI-PMH.Identify.granularity"));
+    }
+    
+    @Test
+    public void testOaiListMetadataFormats() {
+        // Run ListMeatadataFormats:
+        Response listFormatsResponse = UtilIT.getOaiListMetadataFormats();
+        assertEquals(OK.getStatusCode(), listFormatsResponse.getStatusCode());
+        //logger.info("ListMetadataFormats response: "+listFormatsResponse.prettyPrint());
+
+        // Validate the response: 
+        
+        XmlPath responseXmlPath = validateOaiVerbResponse(listFormatsResponse, "ListMetadataFormats");
+        
+        // Check the payload of the response atgainst the list of metadata formats
+        // we are currently offering under OAI; will need to be explicitly 
+        // modified if/when we add more harvestable formats.
+        
+        List listFormats = responseXmlPath.getList("OAI-PMH.ListMetadataFormats.metadataFormat");
+
+        assertNotNull(listFormats);
+        assertEquals(5, listFormats.size());
+        
+        // The metadata formats are reported in an unpredictable ordder. We
+        // want to sort the prefix names for comparison purposes, and for that 
+        // they need to be saved in a modifiable list: 
+        List<String> metadataPrefixes = new ArrayList<>(); 
+        
+        for (int i = 0; i < listFormats.size(); i++) {
+            metadataPrefixes.add(responseXmlPath.getString("OAI-PMH.ListMetadataFormats.metadataFormat["+i+"].metadataPrefix"));
+        }
+        Collections.sort(metadataPrefixes);
+        
+        assertEquals("[Datacite, dataverse_json, oai_datacite, oai_dc, oai_ddi]", metadataPrefixes.toString());
+        
 
+    }
+    
+    
     @Test
-    public void testSetCreation() {
-        setupUsers();
+    public void testSetCreateAPIandOAIlistIdentifiers() {
+        // Create the set with Dataverse /api/harvest/server API:
         String setName = UtilIT.getRandomString(6);
         String def = "*";
 
         // make sure the set does not exist
-        String u0 = String.format("/api/harvest/server/oaisets/%s", setName);
+        String setPath = String.format("/api/harvest/server/oaisets/%s", setName);
         String createPath ="/api/harvest/server/oaisets/add";
         Response r0 = given()
-                .get(u0);
+                .get(setPath);
         assertEquals(404, r0.getStatusCode());
 
         // try to create set as normal user, should fail
@@ -94,7 +211,7 @@ public void testSetCreation() {
         assertEquals(201, r2.getStatusCode());
         
         Response getSet = given()
-                .get(u0);
+                .get(setPath);
         
         logger.info("getSet.getStatusCode(): " + getSet.getStatusCode());
         logger.info("getSet printresponse:  " + getSet.prettyPrint());
@@ -118,17 +235,19 @@ public void testSetCreation() {
         Response r4 = UtilIT.exportOaiSet(setName);
         assertEquals(200, r4.getStatusCode());
         
-        // try to delete as normal user  should fail
+        
+        
+        // try to delete as normal user, should fail
         Response r5 = given()
                 .header(UtilIT.API_TOKEN_HTTP_HEADER, normalUserAPIKey)
-                .delete(u0);
+                .delete(setPath);
         logger.info("r5.getStatusCode(): " + r5.getStatusCode());
         assertEquals(400, r5.getStatusCode());
         
-        // try to delete as admin user  should work
+        // try to delete as admin user, should work
         Response r6 = given()
                 .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
-                .delete(u0);
+                .delete(setPath);
         logger.info("r6.getStatusCode(): " + r6.getStatusCode());
         assertEquals(200, r6.getStatusCode());
 
@@ -136,7 +255,7 @@ public void testSetCreation() {
     
     @Test
     public void testSetEdit() {
-        setupUsers();
+        //setupUsers();
         String setName = UtilIT.getRandomString(6);
         String def = "*";
 
@@ -195,46 +314,17 @@ public void testSetEdit() {
     // OAI set with that one dataset, and attempt to retrieve the OAI record
     // with GetRecord. 
     @Test
-    public void testOaiFunctionality() throws InterruptedException {
+    public void testSingleRecordOaiSet() throws InterruptedException {
 
-        setupUsers();
-
-        // create dataverse:
-        Response createDataverseResponse = UtilIT.createRandomDataverse(adminUserAPIKey);
-        createDataverseResponse.prettyPrint();
-        String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse);
+        //setupUsers();
 
-        // publish dataverse:
-        Response publishDataverse = UtilIT.publishDataverseViaNativeApi(dataverseAlias, adminUserAPIKey);
-        assertEquals(OK.getStatusCode(), publishDataverse.getStatusCode());
-
-        // create dataset: 
-        Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, adminUserAPIKey);
-        createDatasetResponse.prettyPrint();
-        Integer datasetId = UtilIT.getDatasetIdFromResponse(createDatasetResponse);
-
-        // retrieve the global id: 
-        String datasetPersistentId = UtilIT.getDatasetPersistentIdFromResponse(createDatasetResponse);
-
-        // publish dataset:
-        Response publishDataset = UtilIT.publishDatasetViaNativeApi(datasetPersistentId, "major", adminUserAPIKey);
-        assertEquals(200, publishDataset.getStatusCode());
-
-        String identifier = datasetPersistentId.substring(datasetPersistentId.lastIndexOf('/') + 1);
-
-        logger.info("identifier: " + identifier);
+        
 
-        // Let's try and create an OAI set with the dataset we have just 
-        // created and published:
-        // - however, publish command is executed asynchronously, i.e. it may 
-        // still be running after we received the OK from the publish API. 
-        // The oaiExport step also requires the metadata exports to be done and this
-        // takes longer than just publish/reindex.
-        // So wait for all of this to finish.
-        UtilIT.sleepForReexport(datasetPersistentId, adminUserAPIKey, 10);
+        // Let's try and create an OAI set with the "single set dataset" that 
+        // was created as part of the initial setup:
         
-        String setName = identifier;
-        String setQuery = "dsPersistentId:" + identifier;
+        String setName = singleSetDatasetIdentifier;
+        String setQuery = "dsPersistentId:" + singleSetDatasetIdentifier;
         String apiPath = String.format("/api/harvest/server/oaisets/%s", setName);
         String createPath ="/api/harvest/server/oaisets/add";
         Response createSetResponse = given()
@@ -277,12 +367,18 @@ public void testOaiFunctionality() throws InterruptedException {
                 // There should be 1 and only 1 record in the response:
                 assertEquals(1, ret.size());
                 // And the record should be the dataset we have just created:
-                assertEquals(datasetPersistentId, listIdentifiersResponse.getBody().xmlPath()
+                assertEquals(singleSetDatasetPersistentId, listIdentifiersResponse.getBody().xmlPath()
                         .getString("OAI-PMH.ListIdentifiers.header.identifier"));
                 break;
             }
             Thread.sleep(1000L);
-        } while (i<maxWait);
+        } while (i<maxWait); 
+        // OK, the code above that expects to have to wait for up to 10 seconds 
+        // for the set to export is most likely utterly unnecessary (the potentially
+        // expensive part of the operation - exporting the metadata of our dataset -
+        // already happened during its publishing (we made sure to wait there). 
+        // Exporting the set should not take any time - but I'll keep that code 
+        // in place since it's not going to hurt. - L.A. 
         System.out.println("Waited " + i + " seconds for OIA export.");
         //Fail if we didn't find the exported record before the timeout
         assertTrue(i < maxWait);
@@ -292,7 +388,7 @@ public void testOaiFunctionality() throws InterruptedException {
 
         assertNotNull(listRecords);
         assertEquals(1, listRecords.size());
-        assertEquals(datasetPersistentId, listRecordsResponse.getBody().xmlPath().getString("OAI-PMH.ListRecords.record[0].header.identifier"));
+        assertEquals(singleSetDatasetPersistentId, listRecordsResponse.getBody().xmlPath().getString("OAI-PMH.ListRecords.record[0].header.identifier"));
 
         // assert that Datacite format does not contain the XML prolog
         Response listRecordsResponseDatacite = UtilIT.getOaiListRecords(setName, "Datacite");
@@ -301,11 +397,25 @@ public void testOaiFunctionality() throws InterruptedException {
         assertFalse(body.contains("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"));
 
         // And now run GetRecord on the OAI record for the dataset:
-        Response getRecordResponse = UtilIT.getOaiRecord(datasetPersistentId, "oai_dc");
-
-        assertEquals(datasetPersistentId, getRecordResponse.getBody().xmlPath().getString("OAI-PMH.GetRecord.record.header.identifier"));
+        Response getRecordResponse = UtilIT.getOaiRecord(singleSetDatasetPersistentId, "oai_dc");
+        
+        System.out.println("GetRecord response in its entirety: "+getRecordResponse.getBody().prettyPrint());
+        System.out.println("one more time:");
+        getRecordResponse.prettyPrint();
+        
+        assertEquals(singleSetDatasetPersistentId, getRecordResponse.getBody().xmlPath().getString("OAI-PMH.GetRecord.record.header.identifier"));
 
         // TODO: 
         // check the actual metadata payload of the OAI record more carefully?
     }
+    
+    // This test will attempt to create a set with multiple records (enough 
+    // to trigger a paged response with a continuation token) and test its
+    // performance. 
+    
+    
+    @Test
+    public void testMultiRecordOaiSet() throws InterruptedException {
+        
+    }
 }
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
index 550d4ed1264..9fa47db167b 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
@@ -2620,6 +2620,16 @@ static Response exportOaiSet(String setName) {
         return given().put(apiPath);
     }
     
+    static Response getOaiIdentify() {
+        String oaiVerbPath = "/oai?verb=Identify";
+        return given().get(oaiVerbPath);
+    }
+    
+    static Response getOaiListMetadataFormats() {
+        String oaiVerbPath = "/oai?verb=ListMetadataFormats";
+        return given().get(oaiVerbPath);
+    }
+    
     static Response getOaiRecord(String datasetPersistentId, String metadataFormat) {
         String apiPath = String.format("/oai?verb=GetRecord&identifier=%s&metadataPrefix=%s", datasetPersistentId, metadataFormat);
         return given().get(apiPath);

From 4b60983e360b3ee4b5a50535b769852fc9ea67ef Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Tue, 6 Dec 2022 09:25:42 +0100
Subject: [PATCH 109/173] refactor(settings): remove unused Config var in
 SystemConfig #7000

---
 src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
index fe95f53d293..fc7fd7beb06 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
@@ -10,8 +10,6 @@
 import edu.harvard.iq.dataverse.settings.JvmSettings;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
 import edu.harvard.iq.dataverse.validation.PasswordValidatorUtil;
-import org.eclipse.microprofile.config.Config;
-import org.eclipse.microprofile.config.ConfigProvider;
 import org.passay.CharacterRule;
 
 import javax.ejb.EJB;
@@ -46,7 +44,6 @@
 public class SystemConfig {
 
     private static final Logger logger = Logger.getLogger(SystemConfig.class.getCanonicalName());
-    private static final Config config = ConfigProvider.getConfig();
 
     @EJB
     SettingsServiceBean settingsService;
@@ -133,7 +130,6 @@ public String getVersion(boolean withBuildNumber) {
         //       It will default to read from microprofile-config.properties source,
         //       which contains in the source a Maven property reference to ${project.version}.
         //       When packaging the app to deploy it, Maven will replace this, rendering it a static entry.
-        // NOTE: MicroProfile Config will cache the entry for us in internal maps.
         String appVersion = JvmSettings.VERSION.lookup();
             
         if (withBuildNumber) {

From 711dc6362dc629269d7db5840eb13821fc978682 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Fri, 9 Dec 2022 10:39:44 -0500
Subject: [PATCH 110/173] extra metadata from NetCDF and HDF5 files in NcML
 format #9153

---
 doc/release-notes/9153-extract-metadata.md    |  1 +
 .../source/user/dataset-management.rst        |  7 ++
 .../edu/harvard/iq/dataverse/DatasetPage.java |  1 +
 .../iq/dataverse/EditDatafilesPage.java       |  1 +
 .../datadeposit/MediaResourceManagerImpl.java |  1 +
 .../datasetutility/AddReplaceFileHelper.java  |  2 +
 .../dataverse/ingest/IngestServiceBean.java   | 64 ++++++++++++++++++-
 .../harvard/iq/dataverse/api/NetcdfIT.java    | 57 +++++++++++++++++
 8 files changed, 133 insertions(+), 1 deletion(-)
 create mode 100644 doc/release-notes/9153-extract-metadata.md
 create mode 100644 src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java

diff --git a/doc/release-notes/9153-extract-metadata.md b/doc/release-notes/9153-extract-metadata.md
new file mode 100644
index 00000000000..ce4cc714805
--- /dev/null
+++ b/doc/release-notes/9153-extract-metadata.md
@@ -0,0 +1 @@
+For NetCDF and HDF5 files, an attempt will be made to extract metadata in NcML (XML) format and save it as an auxiliary file.
diff --git a/doc/sphinx-guides/source/user/dataset-management.rst b/doc/sphinx-guides/source/user/dataset-management.rst
index ec3bb392ce5..e891ca72880 100755
--- a/doc/sphinx-guides/source/user/dataset-management.rst
+++ b/doc/sphinx-guides/source/user/dataset-management.rst
@@ -299,6 +299,13 @@ Astronomy (FITS)
 
 Metadata found in the header section of `Flexible Image Transport System (FITS) files <http://fits.gsfc.nasa.gov/fits_primer.html>`_ are automatically extracted by the Dataverse Software, aggregated and displayed in the Astronomy Domain-Specific Metadata of the Dataset that the file belongs to. This FITS file metadata, is therefore searchable and browsable (facets) at the Dataset-level.
 
+NetCDF and HDF5
+---------------
+
+For NetCDF and HDF5 files, an attempt will be made to extract metadata in NcML_ (XML) format and save it as an auxiliary file. (See also :doc:`/developers/aux-file-support` in the Developer Guide.)
+
+.. _NcML: https://docs.unidata.ucar.edu/netcdf-java/current/userguide/ncml_overview.html
+
 Compressed Files
 ----------------
 
diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
index 6e71f6c5042..b538aaca2c6 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
@@ -3733,6 +3733,7 @@ public String save() {
         // Call Ingest Service one more time, to
         // queue the data ingest jobs for asynchronous execution:
         ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) session.getUser());
+        ingestService.extractMetadata(dataset, (AuthenticatedUser) session.getUser());
 
         //After dataset saved, then persist prov json data
         if(systemConfig.isProvCollectionEnabled()) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java
index fc8df8681af..d045126a3aa 100644
--- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java
@@ -1225,6 +1225,7 @@ public String save() {
         // queue the data ingest jobs for asynchronous execution:
         if (mode == FileEditMode.UPLOAD) {
             ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) session.getUser());
+            ingestService.extractMetadata(dataset, (AuthenticatedUser) session.getUser());
         }
 
         if (FileEditMode.EDIT == mode && Referrer.FILE == referrer && fileMetadatas.size() > 0) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java
index 5491024c73c..e8d25bb4148 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java
@@ -373,6 +373,7 @@ DepositReceipt replaceOrAddFiles(String uri, Deposit deposit, AuthCredentials au
             }
 
             ingestService.startIngestJobsForDataset(dataset, user);
+            ingestService.extractMetadata(dataset, user);
 
             ReceiptGenerator receiptGenerator = new ReceiptGenerator();
             String baseUrl = urlManager.getHostnamePlusBaseUrlPath(uri);
diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
index febbb249a91..5277d014430 100644
--- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
+++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
@@ -1932,6 +1932,7 @@ private boolean step_100_startIngestJobs(){
             // start the ingest!
             ingestService.startIngestJobsForDataset(dataset, dvRequest.getAuthenticatedUser());
             msg("post ingest start");
+            ingestService.extractMetadata(dataset, dvRequest.getAuthenticatedUser());
         }
         return true;
     }
@@ -2145,6 +2146,7 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) {
                 }
                 //ingest job
                 ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) authUser);
+                ingestService.extractMetadata(dataset, (AuthenticatedUser) authUser);
 
             }
         }
diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
index b03bae618a4..e261efce642 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
@@ -20,6 +20,8 @@
 
 package edu.harvard.iq.dataverse.ingest;
 
+import edu.harvard.iq.dataverse.AuxiliaryFile;
+import edu.harvard.iq.dataverse.AuxiliaryFileServiceBean;
 import edu.harvard.iq.dataverse.ControlledVocabularyValue;
 import edu.harvard.iq.dataverse.datavariable.VariableCategory;
 import edu.harvard.iq.dataverse.datavariable.VariableServiceBean;
@@ -72,6 +74,7 @@
 //import edu.harvard.iq.dvn.unf.*;
 import org.dataverse.unf.*;
 import java.io.BufferedInputStream;
+import java.io.ByteArrayInputStream;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
@@ -81,6 +84,7 @@
 import java.nio.channels.FileChannel;
 import java.nio.channels.ReadableByteChannel;
 import java.nio.channels.WritableByteChannel;
+import java.nio.charset.StandardCharsets;
 import java.nio.file.DirectoryStream;
 import java.nio.file.Files;
 import java.nio.file.Path;
@@ -113,6 +117,9 @@
 import javax.jms.QueueSession;
 import javax.jms.Message;
 import javax.faces.application.FacesMessage;
+import javax.ws.rs.core.MediaType;
+import ucar.nc2.NetcdfFile;
+import ucar.nc2.NetcdfFiles;
 
 /**
  *
@@ -134,6 +141,8 @@ public class IngestServiceBean {
     @EJB
     DataFileServiceBean fileService; 
     @EJB
+    AuxiliaryFileServiceBean auxiliaryFileService;
+    @EJB
     SystemConfig systemConfig;
 
     @Resource(lookup = "java:app/jms/queue/ingest")
@@ -343,6 +352,7 @@ public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
 						try {
 							// FITS is the only type supported for metadata
 							// extraction, as of now. -- L.A. 4.0
+                                                        // Consider adding other formats such as NetCDF/HDF5.
 							dataFile.setContentType("application/fits");
 							metadataExtracted = extractMetadata(tempFileLocation, dataFile, version);
 						} catch (IOException mex) {
@@ -565,7 +575,58 @@ public int compare(DataFile d1, DataFile d2) {
         return sb.toString();
     }
 
-    
+    // Note: There is another method called extractMetadata for FITS files.
+    public void extractMetadata(Dataset dataset, AuthenticatedUser user) {
+        for (DataFile dataFile : dataset.getFiles()) {
+            Path pathToLocalDataFile = null;
+            try {
+                pathToLocalDataFile = dataFile.getStorageIO().getFileSystemPath();
+            } catch (IOException ex) {
+                logger.info("Exception calling dataAccess.getFileSystemPath: " + ex);
+            }
+            InputStream inputStream = null;
+            if (pathToLocalDataFile != null) {
+                try ( NetcdfFile netcdfFile = NetcdfFiles.open(pathToLocalDataFile.toString())) {
+                    if (netcdfFile != null) {
+                        // TODO: What should we pass as a URL to toNcml()?
+                        String ncml = netcdfFile.toNcml("FIXME_URL");
+                        inputStream = new ByteArrayInputStream(ncml.getBytes(StandardCharsets.UTF_8));
+                    } else {
+                        logger.info("NetcdfFiles.open() could open file id " + dataFile.getId() + " (null returned).");
+                    }
+                } catch (IOException ex) {
+                    logger.info("NetcdfFiles.open() could open file id " + dataFile.getId() + ". Exception caught: " + ex);
+                }
+            } else {
+                logger.info("pathToLocalDataFile is null! Are you on S3? Metadata extraction from NetCDF/HDF5 is not yet available.");
+                // As a tabular file, we'll probably need to download the NetCDF/HDF5 files from S3 and then try to extra the metadata,
+                // unless we can get some sort of S3 interface working:
+                // https://docs.unidata.ucar.edu/netcdf-java/current/userguide/dataset_urls.html#object-stores
+                // If we need to download the file and extract only some of the bytes (hopefully the first bytes) here's the spec for NetCDF:
+                // https://docs.unidata.ucar.edu/netcdf-c/current/file_format_specifications.html
+            }
+            if (inputStream != null) {
+                // TODO: What should the tag be?
+                String formatTag = "ncml";
+                // TODO: What should the version be?
+                String formatVersion = "0.1";
+                // TODO: What should the origin be?
+                String origin = "myOrigin";
+                boolean isPublic = true;
+                // TODO: What should the type be?
+                String type = "myType";
+                // TODO: Does NcML have its own content type? (MIME type)
+                MediaType mediaType = new MediaType("text", "xml");
+                try {
+                    AuxiliaryFile auxFile = auxiliaryFileService.processAuxiliaryFile(inputStream, dataFile, formatTag, formatVersion, origin, isPublic, type, mediaType);
+                    logger.info("Aux file extracted from NetCDF/HDF5 file saved: " + auxFile);
+                } catch (Exception ex) {
+                    logger.info("exception throw calling processAuxiliaryFile: " + ex);
+                }
+            }
+        }
+    }
+
     public void produceSummaryStatistics(DataFile dataFile, File generatedTabularFile) throws IOException {
         /*
         logger.info("Skipping summary statistics and UNF.");
@@ -1159,6 +1220,7 @@ public boolean fileMetadataExtractable(DataFile dataFile) {
      * extractMetadata: 
      * framework for extracting metadata from uploaded files. The results will 
      * be used to populate the metadata of the Dataset to which the file belongs. 
+     * Note that another method called extractMetadata creates aux files from data files.
     */
     public boolean extractMetadata(String tempFileLocation, DataFile dataFile, DatasetVersion editVersion) throws IOException {
         boolean ingestSuccessful = false;
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java b/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java
new file mode 100644
index 00000000000..a83af514935
--- /dev/null
+++ b/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java
@@ -0,0 +1,57 @@
+package edu.harvard.iq.dataverse.api;
+
+import com.jayway.restassured.RestAssured;
+import com.jayway.restassured.path.json.JsonPath;
+import com.jayway.restassured.response.Response;
+import java.io.IOException;
+import static javax.ws.rs.core.Response.Status.CREATED;
+import static javax.ws.rs.core.Response.Status.OK;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class NetcdfIT {
+
+    @BeforeClass
+    public static void setUp() {
+        RestAssured.baseURI = UtilIT.getRestAssuredBaseUri();
+    }
+
+    @Test
+    public void testNmclFromNetcdf() throws IOException {
+        Response createUser = UtilIT.createRandomUser();
+        createUser.then().assertThat().statusCode(OK.getStatusCode());
+        String apiToken = UtilIT.getApiTokenFromResponse(createUser);
+        String username = UtilIT.getUsernameFromResponse(createUser);
+
+        Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken);
+        createDataverseResponse.prettyPrint();
+        createDataverseResponse.then().assertThat()
+                .statusCode(CREATED.getStatusCode());
+
+        String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse);
+
+        Response createDataset = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken);
+        createDataset.prettyPrint();
+        createDataset.then().assertThat()
+                .statusCode(CREATED.getStatusCode());
+
+        Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset);
+        String datasetPid = UtilIT.getDatasetPersistentIdFromResponse(createDataset);
+
+        String pathToFile = "src/test/resources/netcdf/madis-raob";
+
+        Response uploadFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, apiToken);
+        uploadFile.prettyPrint();
+        uploadFile.then().assertThat().statusCode(OK.getStatusCode());
+
+        long fileId = JsonPath.from(uploadFile.body().asString()).getLong("data.files[0].dataFile.id");
+        String tag = "ncml";
+        String version = "0.1";
+
+        Response downloadNcml = UtilIT.downloadAuxFile(fileId, tag, version, apiToken);
+        //downloadNcml.prettyPrint(); // long output
+        downloadNcml.then().assertThat()
+                .statusCode(OK.getStatusCode())
+                .contentType("text/xml; name=\"madis-raob.ncml_0.1.xml\";charset=UTF-8");
+    }
+}

From c4f07f91446eedeee611a75537b3b90872817d0b Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Fri, 9 Dec 2022 17:57:29 -0500
Subject: [PATCH 111/173] more tests for the OAI server functionality (#8843)

---
 .../iq/dataverse/api/HarvestingServerIT.java  | 349 ++++++++++++------
 .../edu/harvard/iq/dataverse/api/UtilIT.java  |   5 +
 2 files changed, 243 insertions(+), 111 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
index 5355b57490d..d25ffd225d9 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
@@ -9,24 +9,18 @@
 import org.junit.Test;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
 import com.jayway.restassured.response.Response;
-import com.jayway.restassured.path.json.JsonPath;
 import com.jayway.restassured.path.xml.XmlPath;
 import com.jayway.restassured.path.xml.element.Node;
-import static edu.harvard.iq.dataverse.api.UtilIT.API_TOKEN_HTTP_HEADER;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collections;
-import javax.json.Json;
-import javax.json.JsonArray;
-import static javax.ws.rs.core.Response.Status.FORBIDDEN;
 import static javax.ws.rs.core.Response.Status.OK;
 import static org.hamcrest.CoreMatchers.equalTo;
-import org.junit.Ignore;
 import java.util.List;
-import static junit.framework.Assert.assertEquals;
+//import static junit.framework.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertEquals;
 
 /**
  * Tests for the Harvesting Server functionality
@@ -184,142 +178,204 @@ public void testOaiListMetadataFormats() {
     
     
     @Test
-    public void testSetCreateAPIandOAIlistIdentifiers() {
-        // Create the set with Dataverse /api/harvest/server API:
+    public void testNativeSetAPI() {
         String setName = UtilIT.getRandomString(6);
         String def = "*";
-
-        // make sure the set does not exist
+        
+        // This test focuses on the Create/List/Edit functionality of the 
+        // Dataverse OAI Sets API (/api/harvest/server):
+ 
+        // API Test 1. Make sure the set does not exist yet
         String setPath = String.format("/api/harvest/server/oaisets/%s", setName);
         String createPath ="/api/harvest/server/oaisets/add";
-        Response r0 = given()
+        Response getSetResponse = given()
                 .get(setPath);
-        assertEquals(404, r0.getStatusCode());
+        assertEquals(404, getSetResponse.getStatusCode());
 
-        // try to create set as normal user, should fail
-        Response r1 = given()
+        // API Test 2. Try to create set as normal user, should fail
+        Response createSetResponse = given()
                 .header(UtilIT.API_TOKEN_HTTP_HEADER, normalUserAPIKey)
                 .body(jsonForTestSpec(setName, def))
                 .post(createPath);
-        assertEquals(400, r1.getStatusCode());
+        assertEquals(400, createSetResponse.getStatusCode());
 
-        // try to create set as admin user, should succeed
-        Response r2 = given()
+        // API Test 3. Try to create set as admin user, should succeed
+        createSetResponse = given()
                 .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
                 .body(jsonForTestSpec(setName, def))
                 .post(createPath);
-        assertEquals(201, r2.getStatusCode());
+        assertEquals(201, createSetResponse.getStatusCode());
         
-        Response getSet = given()
-                .get(setPath);
+        // API Test 4. Retrieve the set we've just created, validate the response
+        getSetResponse = given().get(setPath);
         
-        logger.info("getSet.getStatusCode(): " + getSet.getStatusCode());
-        logger.info("getSet printresponse:  " + getSet.prettyPrint());
-        assertEquals(200, getSet.getStatusCode());
+        System.out.println("getSetResponse.getStatusCode(): " + getSetResponse.getStatusCode());
+        System.out.println("getSetResponse, full:  " + getSetResponse.prettyPrint());
+        assertEquals(200, getSetResponse.getStatusCode());
+        
+        getSetResponse.then().assertThat()
+                .body("status", equalTo(AbstractApiBean.STATUS_OK))
+                .body("data.definition", equalTo("*"))
+                .body("data.description", equalTo(""))
+                .body("data.name", equalTo(setName));
         
+        
+        // API Test 5. Retrieve all sets, check that our new set is listed 
         Response responseAll = given()
                 .get("/api/harvest/server/oaisets");
         
-        logger.info("responseAll.getStatusCode(): " + responseAll.getStatusCode());
-        logger.info("responseAll printresponse:  " + responseAll.prettyPrint());
+        System.out.println("responseAll.getStatusCode(): " + responseAll.getStatusCode());
+        System.out.println("responseAll full:  " + responseAll.prettyPrint());
         assertEquals(200, responseAll.getStatusCode());
-
-        // try to create set with same name as admin user, should fail
-        Response r3 = given()
+        assertTrue(responseAll.body().jsonPath().getList("data.oaisets").size() > 0);
+        assertTrue(responseAll.body().jsonPath().getList("data.oaisets.name").toString().contains(setName));  // todo: simplify     
+        
+        // API Test 6. Try to create a set with the same name, should fail
+        createSetResponse = given()
                 .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
                 .body(jsonForTestSpec(setName, def))
                 .post(createPath);
-        assertEquals(400, r3.getStatusCode());
+        assertEquals(400, createSetResponse.getStatusCode());
 
-        // try to export set as admin user, should succeed (under admin API, not checking that normal user will fail)
+        // API Test 7. Try to export set as admin user, should succeed. Set export
+        // is under /api/admin, no need to try to access it as a non-admin user
         Response r4 = UtilIT.exportOaiSet(setName);
         assertEquals(200, r4.getStatusCode());
-        
-        
-        
-        // try to delete as normal user, should fail
-        Response r5 = given()
+                
+        // API TEST 8. Try to delete the set as normal user, should fail
+        Response deleteResponse = given()
                 .header(UtilIT.API_TOKEN_HTTP_HEADER, normalUserAPIKey)
                 .delete(setPath);
-        logger.info("r5.getStatusCode(): " + r5.getStatusCode());
-        assertEquals(400, r5.getStatusCode());
+        logger.info("deleteResponse.getStatusCode(): " + deleteResponse.getStatusCode());
+        assertEquals(400, deleteResponse.getStatusCode());
         
-        // try to delete as admin user, should work
-        Response r6 = given()
+        // API TEST 9. Delete as admin user, should work
+        deleteResponse = given()
                 .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
                 .delete(setPath);
-        logger.info("r6.getStatusCode(): " + r6.getStatusCode());
-        assertEquals(200, r6.getStatusCode());
+        logger.info("deleteResponse.getStatusCode(): " + deleteResponse.getStatusCode());
+        assertEquals(200, deleteResponse.getStatusCode());
 
     }
     
     @Test
-    public void testSetEdit() {
-        //setupUsers();
+    public void testSetEditAPIandOAIlistSets() {
+        // This test focuses on testing the Edit functionality of the Dataverse
+        // OAI Set API and the ListSets method of the Dataverse OAI server.
+        
+        // Initial setup: crete a test set. 
+        // Since the Create and List (POST and GET) functionality of the API 
+        // is tested extensively in the previous test, we will not be paying 
+        // as much attention to these methods, aside from confirming the 
+        // expected HTTP result codes. 
+        
         String setName = UtilIT.getRandomString(6);
-        String def = "*";
+        String setDef = "*";
 
-        // make sure the set does not exist
-        String u0 = String.format("/api/harvest/server/oaisets/%s", setName);
+        // Make sure the set does not exist
+        String setPath = String.format("/api/harvest/server/oaisets/%s", setName);
         String createPath ="/api/harvest/server/oaisets/add";
-        Response r0 = given()
-                .get(u0);
-        assertEquals(404, r0.getStatusCode());
+        Response getSetResponse = given()
+                .get(setPath);
+        assertEquals(404, getSetResponse.getStatusCode());
 
 
-        // try to create set as admin user, should succeed
-        Response r1 = given()
+        // Create the set as admin user
+        Response createSetResponse = given()
                 .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
-                .body(jsonForTestSpec(setName, def))
+                .body(jsonForTestSpec(setName, setDef))
                 .post(createPath);
-        assertEquals(201, r1.getStatusCode());
+        assertEquals(201, createSetResponse.getStatusCode());
 
+        // I. Test the Modify/Edit (POST method) functionality of the 
+        // Dataverse OAI Sets API
         
-        // try to edit as normal user  should fail
-        Response r2 = given()
+        String newDefinition = "title:New";
+        String newDescription = "updated";
+        
+        // API Test 1. Try to modify the set as normal user, should fail
+        Response editSetResponse = given()
                 .header(UtilIT.API_TOKEN_HTTP_HEADER, normalUserAPIKey)
-                .body(jsonForEditSpec(setName, def,""))
-                .put(u0);
-        logger.info("r2.getStatusCode(): " + r2.getStatusCode());
-        assertEquals(400, r2.getStatusCode());
+                .body(jsonForEditSpec(setName, setDef, ""))
+                .put(setPath);
+        logger.info("non-admin user editSetResponse.getStatusCode(): " + editSetResponse.getStatusCode());
+        assertEquals(400, editSetResponse.getStatusCode());
         
-        // try to edit as with blanks should fail
-        Response r3 = given()
+        // API Test 2. Try to modify as admin, but with invalid (empty) values, 
+        // should fail
+        editSetResponse = given()
                 .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
                 .body(jsonForEditSpec(setName, "",""))
-                .put(u0);
-        logger.info("r3.getStatusCode(): " + r3.getStatusCode());
-        assertEquals(400, r3.getStatusCode());
+                .put(setPath);
+        logger.info("invalid values editSetResponse.getStatusCode(): " + editSetResponse.getStatusCode());
+        assertEquals(400, editSetResponse.getStatusCode());
         
-        // try to edit as with something should pass
-        Response r4 = given()
+        // API Test 3. Try to modify as admin, with sensible values
+        editSetResponse = given()
                 .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
-                .body(jsonForEditSpec(setName, "newDef","newDesc"))
-                .put(u0);
-        logger.info("r4 Status code: " + r4.getStatusCode());
-        logger.info("r4.prettyPrint(): " + r4.prettyPrint());
-        assertEquals(OK.getStatusCode(), r4.getStatusCode());
-        
-        logger.info("u0: " + u0);
-        // now delete it...
-        Response r6 = given()
+                .body(jsonForEditSpec(setName, newDefinition, newDescription))
+                .put(setPath);
+        logger.info("admin user editSetResponse status code: " + editSetResponse.getStatusCode());
+        logger.info("admin user editSetResponse.prettyPrint(): " + editSetResponse.prettyPrint());
+        assertEquals(OK.getStatusCode(), editSetResponse.getStatusCode());
+        
+        // API Test 4. List the set, confirm that the new values are shown
+        getSetResponse = given().get(setPath);
+        
+        System.out.println("getSetResponse.getStatusCode(): " + getSetResponse.getStatusCode());
+        System.out.println("getSetResponse, full:  " + getSetResponse.prettyPrint());
+        assertEquals(200, getSetResponse.getStatusCode());
+        
+        getSetResponse.then().assertThat()
+                .body("status", equalTo(AbstractApiBean.STATUS_OK))
+                .body("data.definition", equalTo(newDefinition))
+                .body("data.description", equalTo(newDescription))
+                .body("data.name", equalTo(setName));
+
+        // II. Test the ListSets functionality of the OAI server 
+        
+        Response listSetsResponse = UtilIT.getOaiListSets();
+        
+        // 1. Validate the service section of the OAI response: 
+        
+        XmlPath responseXmlPath = validateOaiVerbResponse(listSetsResponse, "ListSets");
+        
+        // 2. Validate the payload of the response, by confirming that the set 
+        // we created and modified, above, is being listed by the OAI server 
+        // and its xml record is properly formatted
+        
+        List<Node> listSets = responseXmlPath.getList("OAI-PMH.ListSets.set.list()"); // TODO - maybe try it with findAll()?
+        assertNotNull(listSets);
+        assertTrue(listSets.size() > 0);
+
+        Node foundSetNode = null; 
+        for (Node setNode : listSets) {
+            
+            if (setName.equals(setNode.get("setName").toString())) {
+                foundSetNode = setNode; 
+                break;
+            }
+        }
+        
+        assertNotNull("Newly-created set is not listed by the OAI server", foundSetNode);
+        assertEquals("Incorrect description in the ListSets entry", newDescription, foundSetNode.getPath("setDescription.metadata.element.field", String.class));
+
+        // ok, the xml record looks good! 
+
+        // Cleanup. Delete the set with the DELETE API
+        Response deleteSetResponse = given()
                 .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
-                .delete(u0);
-        logger.info("r6.getStatusCode(): " + r6.getStatusCode());
-        assertEquals(200, r6.getStatusCode());
+                .delete(setPath);
+        assertEquals(200, deleteSetResponse.getStatusCode());
 
     }
 
-    // A more elaborate test - we'll create and publish a dataset, then create an
-    // OAI set with that one dataset, and attempt to retrieve the OAI record
-    // with GetRecord. 
+    // A more elaborate test - we will create and export an 
+    // OAI set with a single dataset, and attempt to retrieve 
+    // it and validate the OAI server responses of the corresponding 
+    // ListIdentifiers, ListRecords and GetRecord methods. 
     @Test
     public void testSingleRecordOaiSet() throws InterruptedException {
-
-        //setupUsers();
-
-        
-
         // Let's try and create an OAI set with the "single set dataset" that 
         // was created as part of the initial setup:
         
@@ -333,12 +389,18 @@ public void testSingleRecordOaiSet() throws InterruptedException {
                 .post(createPath);
         assertEquals(201, createSetResponse.getStatusCode());
 
-        // TODO: a) look up the set via native harvest/server api; 
-        //       b) look up the set via the OAI ListSets;
-        // export set: 
-        // (this is asynchronous - so we should probably wait a little)
+        // The GET method of the oai set API, as well as the OAI ListSets
+        // method are tested extensively in another method in this class, so 
+        // we'll skip checking those here. 
+        
+        // Let's export the set. This is asynchronous - so we will try to 
+        // wait a little - but in practice, everything potentially time-consuming
+        // must have been done when the dataset was exported, in the setup method. 
+        
         Response exportSetResponse = UtilIT.exportOaiSet(setName);
         assertEquals(200, exportSetResponse.getStatusCode());
+        Thread.sleep(1000L);
+        
         Response getSet = given()
                 .get(apiPath);
         
@@ -350,25 +412,38 @@ public void testSingleRecordOaiSet() throws InterruptedException {
         do {
             
 
-            // Run ListIdentifiers on this newly-created set:
+            // OAI Test 1. Run ListIdentifiers on this newly-created set:
             Response listIdentifiersResponse = UtilIT.getOaiListIdentifiers(setName, "oai_dc");
-            List ret = listIdentifiersResponse.getBody().xmlPath().getList("OAI-PMH.ListIdentifiers.header");
-
             assertEquals(OK.getStatusCode(), listIdentifiersResponse.getStatusCode());
+            
+            // Validate the service section of the OAI response: 
+            XmlPath responseXmlPath = validateOaiVerbResponse(listIdentifiersResponse, "ListIdentifiers");
+            
+            List ret = responseXmlPath.getList("OAI-PMH.ListIdentifiers.header");
             assertNotNull(ret);
-            logger.info("setName: " + setName);
+                        
             if (logger.isLoggable(Level.FINE)) {
                 logger.info("listIdentifiersResponse.prettyPrint:..... ");
                 listIdentifiersResponse.prettyPrint();
             }
-            if (ret.size() != 1) {
+            if (ret.isEmpty()) {
+                // OK, we'll sleep for another second - provided it's been less
+                // than 10 sec. total.
                 i++;
             } else {
-                // There should be 1 and only 1 record in the response:
+                // Validate the payload of the ListRecords response:
+                // a) There should be 1 and only 1 record in the response:
                 assertEquals(1, ret.size());
-                // And the record should be the dataset we have just created:
-                assertEquals(singleSetDatasetPersistentId, listIdentifiersResponse.getBody().xmlPath()
+                // b) The one record in it should be the dataset we have just created:
+                assertEquals(singleSetDatasetPersistentId, responseXmlPath
                         .getString("OAI-PMH.ListIdentifiers.header.identifier"));
+                assertEquals(setName, responseXmlPath
+                        .getString("OAI-PMH.ListIdentifiers.header.setSpec"));
+                assertNotNull(responseXmlPath.getString("OAI-PMH.ListIdentifiers.header.dateStamp"));
+                // TODO: validate the formatting of the date string in the record
+                // header, above!
+                
+                // ok, ListIdentifiers response looks valid.
                 break;
             }
             Thread.sleep(1000L);
@@ -379,34 +454,86 @@ public void testSingleRecordOaiSet() throws InterruptedException {
         // already happened during its publishing (we made sure to wait there). 
         // Exporting the set should not take any time - but I'll keep that code 
         // in place since it's not going to hurt. - L.A. 
+        
         System.out.println("Waited " + i + " seconds for OIA export.");
         //Fail if we didn't find the exported record before the timeout
         assertTrue(i < maxWait);
+        
+        
+        // OAI Test 2. Run ListRecords, request oai_dc:
         Response listRecordsResponse = UtilIT.getOaiListRecords(setName, "oai_dc");
         assertEquals(OK.getStatusCode(), listRecordsResponse.getStatusCode());
-        List listRecords = listRecordsResponse.getBody().xmlPath().getList("OAI-PMH.ListRecords.record");
+        
+        // Validate the service section of the OAI response: 
+        
+        XmlPath responseXmlPath = validateOaiVerbResponse(listRecordsResponse, "ListRecords");
+        
+        // Validate the payload of the response: 
+        // (the header portion must be identical to that of ListIdentifiers above, 
+        // plus the response must contain a metadata section with a valid oai_dc 
+        // record)
+        
+        List listRecords = responseXmlPath.getList("OAI-PMH.ListRecords.record");
 
+        // Same deal, there must be 1 record only in the set:
         assertNotNull(listRecords);
         assertEquals(1, listRecords.size());
-        assertEquals(singleSetDatasetPersistentId, listRecordsResponse.getBody().xmlPath().getString("OAI-PMH.ListRecords.record[0].header.identifier"));
-
-        // assert that Datacite format does not contain the XML prolog
+        // a) header section:
+        assertEquals(singleSetDatasetPersistentId, responseXmlPath.getString("OAI-PMH.ListRecords.record.header.identifier"));
+        assertEquals(setName, responseXmlPath
+                .getString("OAI-PMH.ListRecords.record.header.setSpec"));
+        assertNotNull(responseXmlPath.getString("OAI-PMH.ListRecords.record.header.dateStamp"));
+        // b) metadata section: 
+        // in the metadata section we are showing the resolver url form of the doi:
+        String persistentIdUrl = singleSetDatasetPersistentId.replace("doi:", "https://doi.org/");
+        assertEquals(persistentIdUrl, responseXmlPath.getString("OAI-PMH.ListRecords.record.metadata.dc.identifier"));
+        assertEquals("Darwin's Finches", responseXmlPath.getString("OAI-PMH.ListRecords.record.metadata.dc.title"));
+        assertEquals("Finch, Fiona", responseXmlPath.getString("OAI-PMH.ListRecords.record.metadata.dc.creator"));        
+        assertEquals("Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.", 
+                responseXmlPath.getString("OAI-PMH.ListRecords.record.metadata.dc.description"));
+        assertEquals("Medicine, Health and Life Sciences", 
+                responseXmlPath.getString("OAI-PMH.ListRecords.record.metadata.dc.subject"));
+        // ok, looks legit!
+        
+        // OAI Test 3.
+        // Assert that Datacite format does not contain the XML prolog
+        // (this is a reference to a resolved issue; generally, harvestable XML
+        // exports must NOT contain the "<?xml ..." headers - but there is now
+        // efficient code in the XOAI library that checks for, and strips it, 
+        // if necessary. - L.A.)
         Response listRecordsResponseDatacite = UtilIT.getOaiListRecords(setName, "Datacite");
         assertEquals(OK.getStatusCode(), listRecordsResponseDatacite.getStatusCode());
         String body = listRecordsResponseDatacite.getBody().asString();
         assertFalse(body.contains("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"));
 
-        // And now run GetRecord on the OAI record for the dataset:
-        Response getRecordResponse = UtilIT.getOaiRecord(singleSetDatasetPersistentId, "oai_dc");
+        // OAI Test 4. run and validate GetRecord response
         
+        Response getRecordResponse = UtilIT.getOaiRecord(singleSetDatasetPersistentId, "oai_dc");
         System.out.println("GetRecord response in its entirety: "+getRecordResponse.getBody().prettyPrint());
-        System.out.println("one more time:");
-        getRecordResponse.prettyPrint();
+         
+        // Validate the service section of the OAI response: 
+        responseXmlPath = validateOaiVerbResponse(getRecordResponse, "GetRecord");
+        
+        // Validate the payload of the response:
+        
+        // Note that for a set with a single record the output of ListRecrods is
+        // essentially identical to that of GetRecord!
+        // (we'll test a multi-record set in a different method)
+        // a) header section:
+        assertEquals(singleSetDatasetPersistentId, responseXmlPath.getString("OAI-PMH.GetRecord.record.header.identifier"));
+        assertEquals(setName, responseXmlPath
+                .getString("OAI-PMH.GetRecord.record.header.setSpec"));
+        assertNotNull(responseXmlPath.getString("OAI-PMH.GetRecord.record.header.dateStamp"));
+        // b) metadata section: 
+        assertEquals(persistentIdUrl, responseXmlPath.getString("OAI-PMH.GetRecord.record.metadata.dc.identifier"));
+        assertEquals("Darwin's Finches", responseXmlPath.getString("OAI-PMH.GetRecord.record.metadata.dc.title"));
+        assertEquals("Finch, Fiona", responseXmlPath.getString("OAI-PMH.GetRecord.record.metadata.dc.creator"));        
+        assertEquals("Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.", 
+                responseXmlPath.getString("OAI-PMH.GetRecord.record.metadata.dc.description"));
+        assertEquals("Medicine, Health and Life Sciences", responseXmlPath.getString("OAI-PMH.GetRecord.record.metadata.dc.subject"));
         
-        assertEquals(singleSetDatasetPersistentId, getRecordResponse.getBody().xmlPath().getString("OAI-PMH.GetRecord.record.header.identifier"));
+        // ok, looks legit!
 
-        // TODO: 
-        // check the actual metadata payload of the OAI record more carefully?
     }
     
     // This test will attempt to create a set with multiple records (enough 
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
index 9fa47db167b..ac767279bd4 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
@@ -2630,6 +2630,11 @@ static Response getOaiListMetadataFormats() {
         return given().get(oaiVerbPath);
     }
     
+    static Response getOaiListSets() {
+        String oaiVerbPath = "/oai?verb=ListSets";
+        return given().get(oaiVerbPath);
+    }
+    
     static Response getOaiRecord(String datasetPersistentId, String metadataFormat) {
         String apiPath = String.format("/oai?verb=GetRecord&identifier=%s&metadataPrefix=%s", datasetPersistentId, metadataFormat);
         return given().get(apiPath);

From 9cbfa31d4489ed4ce6df6e37a0fecf92f3a77d18 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Mon, 12 Dec 2022 13:51:58 -0500
Subject: [PATCH 112/173] extra (extra tedious) server tests validating paging
 (resumptionToken) functionality of ListIdentifiers and ListRecords (#8843)

---
 .../iq/dataverse/api/HarvestingServerIT.java  | 340 +++++++++++++++++-
 .../edu/harvard/iq/dataverse/api/UtilIT.java  |  18 +-
 2 files changed, 351 insertions(+), 7 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
index d25ffd225d9..3497c71e169 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
@@ -16,6 +16,8 @@
 import static javax.ws.rs.core.Response.Status.OK;
 import static org.hamcrest.CoreMatchers.equalTo;
 import java.util.List;
+import java.util.Set;
+import java.util.HashSet;
 //import static junit.framework.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
@@ -35,6 +37,7 @@ public class HarvestingServerIT {
     private static String adminUserAPIKey;
     private static String singleSetDatasetIdentifier;
     private static String singleSetDatasetPersistentId;
+    private static List<String> extraDatasetsIdentifiers = new ArrayList<>();
 
     @BeforeClass
     public static void setUpClass() {
@@ -98,6 +101,28 @@ private static void setupDatasets() {
         // takes longer than just publish/reindex.
         // So wait for all of this to finish.
         UtilIT.sleepForReexport(singleSetDatasetPersistentId, adminUserAPIKey, 10);
+        
+        // ... And let's create 4 more datasets for a multi-dataset experiment:
+        
+        for (int i = 0; i < 4; i++) {
+            // create dataset: 
+            createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, adminUserAPIKey);
+            createDatasetResponse.prettyPrint();
+            datasetId = UtilIT.getDatasetIdFromResponse(createDatasetResponse);
+
+            // retrieve the global id: 
+            String thisDatasetPersistentId = UtilIT.getDatasetPersistentIdFromResponse(createDatasetResponse);
+
+            // publish dataset:
+            publishDataset = UtilIT.publishDatasetViaNativeApi(thisDatasetPersistentId, "major", adminUserAPIKey);
+            assertEquals(200, publishDataset.getStatusCode());
+
+            UtilIT.sleepForReexport(thisDatasetPersistentId, adminUserAPIKey, 10);
+            
+            extraDatasetsIdentifiers.add(thisDatasetPersistentId.substring(thisDatasetPersistentId.lastIndexOf('/') + 1));
+        }
+        
+        
     }
 
     private String jsonForTestSpec(String name, String def) {
@@ -423,16 +448,16 @@ public void testSingleRecordOaiSet() throws InterruptedException {
             assertNotNull(ret);
                         
             if (logger.isLoggable(Level.FINE)) {
-                logger.info("listIdentifiersResponse.prettyPrint:..... ");
-                listIdentifiersResponse.prettyPrint();
+                logger.info("listIdentifiersResponse.prettyPrint: " 
+                        + listIdentifiersResponse.prettyPrint());
             }
             if (ret.isEmpty()) {
                 // OK, we'll sleep for another second - provided it's been less
                 // than 10 sec. total.
                 i++;
             } else {
-                // Validate the payload of the ListRecords response:
-                // a) There should be 1 and only 1 record in the response:
+                // Validate the payload of the ListIdentifiers response:
+                // a) There should be 1 and only 1 item listed:
                 assertEquals(1, ret.size());
                 // b) The one record in it should be the dataset we have just created:
                 assertEquals(singleSetDatasetPersistentId, responseXmlPath
@@ -537,12 +562,315 @@ public void testSingleRecordOaiSet() throws InterruptedException {
     }
     
     // This test will attempt to create a set with multiple records (enough 
-    // to trigger a paged response with a continuation token) and test its
-    // performance. 
+    // to trigger a paged respons) and test the resumption token functionality). 
+    // Note that this test requires the OAI service to be configured with some
+    // non-default settings (the paging limits for ListIdentifiers and ListRecords
+    // must be set to something low, like 2). 
     
     
     @Test
     public void testMultiRecordOaiSet() throws InterruptedException {
+        // Setup: Let's create a control OAI set with the 5 datasets created 
+        // in the class init: 
+
+        String setName = UtilIT.getRandomString(6);
+        String setQuery = "(dsPersistentId:" + singleSetDatasetIdentifier;
+        for (String persistentId : extraDatasetsIdentifiers) {
+            setQuery = setQuery.concat(" OR dsPersistentId:" + persistentId);
+        }
+        setQuery = setQuery.concat(")");
+
+        String createPath = "/api/harvest/server/oaisets/add";
+
+        Response createSetResponse = given()
+                .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
+                .body(jsonForTestSpec(setName, setQuery))
+                .post(createPath);
+        assertEquals(201, createSetResponse.getStatusCode());
+
+        // Dataverse OAI Sets API is tested extensively in other methods here, 
+        // so no need to test in any more details than confirming the OK result
+        // above 
+        Response exportSetResponse = UtilIT.exportOaiSet(setName);
+        assertEquals(200, exportSetResponse.getStatusCode());
+        Thread.sleep(1000L);
+
+        // OAI Test 1. Run ListIdentifiers on the set we've just created:
+        Response listIdentifiersResponse = UtilIT.getOaiListIdentifiers(setName, "oai_dc");
+        assertEquals(OK.getStatusCode(), listIdentifiersResponse.getStatusCode());
+
+        // Validate the service section of the OAI response: 
+        XmlPath responseXmlPath = validateOaiVerbResponse(listIdentifiersResponse, "ListIdentifiers");
+
+        List<String> ret = responseXmlPath.getList("OAI-PMH.ListIdentifiers.header.identifier");
+        assertNotNull(ret);
+
+        if (logger.isLoggable(Level.FINE)) {
+            logger.info("listIdentifiersResponse.prettyPrint: "+listIdentifiersResponse.prettyPrint());
+        }
+
+        // Validate the payload of the ListIdentifiers response:
+        // 1a) There should be 2 items listed:
+        assertEquals("Wrong number of items on the first ListIdentifiers page",
+                2, ret.size());
+        
+        // 1b) The response contains a resumptionToken for the next page of items:
+        String resumptionToken = responseXmlPath.getString("OAI-PMH.ListIdentifiers.resumptionToken");
+        assertNotNull("No resumption token in the ListIdentifiers response", resumptionToken);
+        
+        // 1c) The total number of items in the set (5) is listed correctly:
+        assertEquals(5, responseXmlPath.getInt("OAI-PMH.ListIdentifiers.resumptionToken.@completeListSize"));
+        
+        // 1d) ... and the offset (cursor) is at the right position (0): 
+        assertEquals(0, responseXmlPath.getInt("OAI-PMH.ListIdentifiers.resumptionToken.@cursor"));
+
+        // The formatting of individual item records in the ListIdentifiers response
+        // is tested extensively in the previous test method, so we are not 
+        // looking at them in such detail here; but we should record the 
+        // identifiers listed, so that we can confirm that all the set is 
+        // served as expected. 
+        
+        Set<String> persistentIdsInListIdentifiers = new HashSet<>();
+        
+        for (String persistentId : ret) {
+            persistentIdsInListIdentifiers.add(persistentId.substring(persistentId.lastIndexOf('/') + 1));
+        }
+
+        // ok, let's move on to the next ListIdentifiers page: 
+        // (we repeat the exact same checks as the above; minus the different
+        // expected offset)
+        
+        // OAI Test 2. Run ListIdentifiers with the resumptionToken obtained 
+        // in the previous step:
+        
+        listIdentifiersResponse = UtilIT.getOaiListIdentifiersWithResumptionToken(resumptionToken);
+        assertEquals(OK.getStatusCode(), listIdentifiersResponse.getStatusCode());
+
+        // Validate the service section of the OAI response: 
+        responseXmlPath = validateOaiVerbResponse(listIdentifiersResponse, "ListIdentifiers");
+
+        ret = responseXmlPath.getList("OAI-PMH.ListIdentifiers.header.identifier");
+        assertNotNull(ret);
+
+        if (logger.isLoggable(Level.FINE)) {
+            logger.info("listIdentifiersResponse.prettyPrint: "+listIdentifiersResponse.prettyPrint());
+        }
+        
+        // Validate the payload of the ListIdentifiers response:
+        // 2a) There should still be 2 items listed:
+        assertEquals("Wrong number of items on the second ListIdentifiers page",
+                2, ret.size());
+        
+        // 2b) The response should contain a resumptionToken for the next page of items:
+        resumptionToken = responseXmlPath.getString("OAI-PMH.ListIdentifiers.resumptionToken");
+        assertNotNull("No resumption token in the ListIdentifiers response", resumptionToken);
+        
+        // 2c) The total number of items in the set (5) is listed correctly:
+        assertEquals(5, responseXmlPath.getInt("OAI-PMH.ListIdentifiers.resumptionToken.@completeListSize"));
+        
+        // 2d) ... and the offset (cursor) is at the right position (2): 
+        assertEquals(2, responseXmlPath.getInt("OAI-PMH.ListIdentifiers.resumptionToken.@cursor"));
+        
+        // Record the identifiers listed on this results page:
+        
+        for (String persistentId : ret) {
+            persistentIdsInListIdentifiers.add(persistentId.substring(persistentId.lastIndexOf('/') + 1));
+        }
+        
+        // And now the next and the final ListIdentifiers page.
+        // This time around we should get an *empty* resumptionToken (indicating
+        // that there are no more results):
+        
+        // OAI Test 3. Run ListIdentifiers with the final resumptionToken
+        
+        listIdentifiersResponse = UtilIT.getOaiListIdentifiersWithResumptionToken(resumptionToken);
+        assertEquals(OK.getStatusCode(), listIdentifiersResponse.getStatusCode());
+
+        // Validate the service section of the OAI response: 
+        responseXmlPath = validateOaiVerbResponse(listIdentifiersResponse, "ListIdentifiers");
+
+        ret = responseXmlPath.getList("OAI-PMH.ListIdentifiers.header.identifier");
+        assertNotNull(ret);
+
+        if (logger.isLoggable(Level.FINE)) {
+            logger.info("listIdentifiersResponse.prettyPrint: "+listIdentifiersResponse.prettyPrint());
+        }
+        
+        // Validate the payload of the ListIdentifiers response:
+        // 3a) There should be only 1 item listed:
+        assertEquals("Wrong number of items on the final ListIdentifiers page", 
+                1, ret.size());
+        
+        // 3b) The response contains a resumptionToken for the next page of items:
+        resumptionToken = responseXmlPath.getString("OAI-PMH.ListIdentifiers.resumptionToken");
+        assertNotNull("No resumption token in the final ListIdentifiers response", resumptionToken);
+        assertTrue("Non-empty resumption token in the final ListIdentifiers response", "".equals(resumptionToken));
+        
+        // 3c) The total number of items in the set (5) is still listed correctly:
+        assertEquals(5, responseXmlPath.getInt("OAI-PMH.ListIdentifiers.resumptionToken.@completeListSize"));
+        
+        // 3d) ... and the offset (cursor) is at the right position (4): 
+        assertEquals(4, responseXmlPath.getInt("OAI-PMH.ListIdentifiers.resumptionToken.@cursor"));
         
+        // Record the last identifier listed on this final page:
+        persistentIdsInListIdentifiers.add(ret.get(0).substring(ret.get(0).lastIndexOf('/') + 1));
+        
+        // Finally, let's confirm that the expected 5 datasets have been listed
+        // as part of this Set: 
+        
+        boolean allDatasetsListed = true; 
+        
+        allDatasetsListed = persistentIdsInListIdentifiers.contains(singleSetDatasetIdentifier);
+        for (String persistentId : extraDatasetsIdentifiers) {
+            allDatasetsListed = persistentIdsInListIdentifiers.contains(persistentId); 
+        }
+        
+        assertTrue("Control datasets not properly listed in the paged ListIdentifiers response", 
+                allDatasetsListed);
+        
+        // OK, it is safe to assume ListIdentifiers works as it should in page mode.
+        
+        // We will now repeat the exact same tests for ListRecords (again, no 
+        // need to pay close attention to the formatting of the individual records, 
+        // since that's tested in the previous test method, since our focus is
+        // testing the paging/resumptionToken functionality)
+        
+        // OAI Test 4. Run ListRecords on the set we've just created:
+        Response listRecordsResponse = UtilIT.getOaiListRecords(setName, "oai_dc");
+        assertEquals(OK.getStatusCode(), listRecordsResponse.getStatusCode());
+
+        // Validate the service section of the OAI response: 
+        responseXmlPath = validateOaiVerbResponse(listRecordsResponse, "ListRecords");
+
+        ret = responseXmlPath.getList("OAI-PMH.ListRecords.record.header.identifier");
+        assertNotNull(ret);
+
+        if (logger.isLoggable(Level.FINE)) {
+            logger.info("listRecordsResponse.prettyPrint: "+listRecordsResponse.prettyPrint());
+        }
+        
+        // Validate the payload of the ListRecords response:
+        // 4a) There should be 2 items listed:
+        assertEquals("Wrong number of items on the first ListRecords page",
+                2, ret.size());
+        
+        // 4b) The response contains a resumptionToken for the next page of items:
+        resumptionToken = responseXmlPath.getString("OAI-PMH.ListRecords.resumptionToken");
+        assertNotNull("No resumption token in the ListRecords response", resumptionToken);
+        
+        // 4c) The total number of items in the set (5) is listed correctly:
+        assertEquals(5, responseXmlPath.getInt("OAI-PMH.ListRecords.resumptionToken.@completeListSize"));
+        
+        // 4d) ... and the offset (cursor) is at the right position (0): 
+        assertEquals(0, responseXmlPath.getInt("OAI-PMH.ListRecords.resumptionToken.@cursor"));
+        
+        Set<String> persistentIdsInListRecords = new HashSet<>();
+        
+        for (String persistentId : ret) {
+            persistentIdsInListRecords.add(persistentId.substring(persistentId.lastIndexOf('/') + 1));
+        }
+
+        // ok, let's move on to the next ListRecords page: 
+        // (we repeat the exact same checks as the above; minus the different
+        // expected offset)
+        
+        // OAI Test 5. Run ListRecords with the resumptionToken obtained 
+        // in the previous step:
+        
+        listRecordsResponse = UtilIT.getOaiListRecordsWithResumptionToken(resumptionToken);
+        assertEquals(OK.getStatusCode(), listRecordsResponse.getStatusCode());
+
+        // Validate the service section of the OAI response: 
+        responseXmlPath = validateOaiVerbResponse(listRecordsResponse, "ListRecords");
+
+        ret = responseXmlPath.getList("OAI-PMH.ListRecords.record.header.identifier");
+        assertNotNull(ret);
+
+        if (logger.isLoggable(Level.FINE)) {
+            logger.info("listRecordsResponse.prettyPrint: "+listRecordsResponse.prettyPrint());
+        }
+        
+        // Validate the payload of the ListRecords response:
+        // 4a) There should still be 2 items listed:
+        assertEquals("Wrong number of items on the second ListRecords page",
+                2, ret.size());
+        
+        // 4b) The response should contain a resumptionToken for the next page of items:
+        resumptionToken = responseXmlPath.getString("OAI-PMH.ListRecords.resumptionToken");
+        assertNotNull("No resumption token in the ListRecords response", resumptionToken);
+        
+        // 4c) The total number of items in the set (5) is listed correctly:
+        assertEquals(5, responseXmlPath.getInt("OAI-PMH.ListRecords.resumptionToken.@completeListSize"));
+        
+        // 4d) ... and the offset (cursor) is at the right position (2): 
+        assertEquals(2, responseXmlPath.getInt("OAI-PMH.ListRecords.resumptionToken.@cursor"));
+        
+        // Record the identifiers listed on this results page:
+        
+        for (String persistentId : ret) {
+            persistentIdsInListRecords.add(persistentId.substring(persistentId.lastIndexOf('/') + 1));
+        }
+        
+        // And now the next and the final ListRecords page.
+        // This time around we should get an *empty* resumptionToken (indicating
+        // that there are no more results):
+        
+        // OAI Test 6. Run ListRecords with the final resumptionToken
+        
+        listRecordsResponse = UtilIT.getOaiListRecordsWithResumptionToken(resumptionToken);
+        assertEquals(OK.getStatusCode(), listRecordsResponse.getStatusCode());
+
+        // Validate the service section of the OAI response: 
+        responseXmlPath = validateOaiVerbResponse(listRecordsResponse, "ListRecords");
+
+        ret = responseXmlPath.getList("OAI-PMH.ListRecords.record.header.identifier");
+        assertNotNull(ret);
+
+        if (logger.isLoggable(Level.FINE)) {
+            logger.info("listRecordsResponse.prettyPrint: "+listRecordsResponse.prettyPrint());
+        }
+        
+        // Validate the payload of the ListRecords response:
+        // 6a) There should be only 1 item listed:
+        assertEquals("Wrong number of items on the final ListRecords page", 
+                1, ret.size());
+        
+        // 6b) The response contains a resumptionToken for the next page of items:
+        resumptionToken = responseXmlPath.getString("OAI-PMH.ListRecords.resumptionToken");
+        assertNotNull("No resumption token in the final ListRecords response", resumptionToken);
+        assertTrue("Non-empty resumption token in the final ListRecords response", "".equals(resumptionToken));
+        
+        // 6c) The total number of items in the set (5) is still listed correctly:
+        assertEquals(5, responseXmlPath.getInt("OAI-PMH.ListRecords.resumptionToken.@completeListSize"));
+        
+        // 6d) ... and the offset (cursor) is at the right position (4): 
+        assertEquals(4, responseXmlPath.getInt("OAI-PMH.ListRecords.resumptionToken.@cursor"));
+        
+        // Record the last identifier listed on this final page:
+        persistentIdsInListRecords.add(ret.get(0).substring(ret.get(0).lastIndexOf('/') + 1));
+        
+        // Finally, let's confirm that the expected 5 datasets have been listed
+        // as part of this Set: 
+        
+        allDatasetsListed = true; 
+        
+        allDatasetsListed = persistentIdsInListRecords.contains(singleSetDatasetIdentifier);
+        for (String persistentId : extraDatasetsIdentifiers) {
+            allDatasetsListed = persistentIdsInListRecords.contains(persistentId); 
+        }
+        
+        assertTrue("Control datasets not properly listed in the paged ListRecords response", 
+                allDatasetsListed);
+        
+        // OK, it is safe to assume ListRecords works as it should in page mode
+        // as well. 
+        
+        // And finally, let's delete the set
+        String setPath = String.format("/api/harvest/server/oaisets/%s", setName);
+        Response deleteResponse = given()
+                .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
+                .delete(setPath);
+        logger.info("deleteResponse.getStatusCode(): " + deleteResponse.getStatusCode());
+        assertEquals("Failed to delete the control multi-record set", 200, deleteResponse.getStatusCode());
     }
 }
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
index ac767279bd4..e669a268010 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
@@ -2641,7 +2641,18 @@ static Response getOaiRecord(String datasetPersistentId, String metadataFormat)
     }
     
     static Response getOaiListIdentifiers(String setName, String metadataFormat) {
-        String apiPath = String.format("/oai?verb=ListIdentifiers&set=%s&metadataPrefix=%s", setName, metadataFormat);
+        
+        String apiPath;
+        if (StringUtil.nonEmpty(setName)) {
+            apiPath = String.format("/oai?verb=ListIdentifiers&set=%s&metadataPrefix=%s", setName, metadataFormat);
+        } else {
+            apiPath = String.format("/oai?verb=ListIdentifiers&metadataPrefix=%s", metadataFormat);
+        }
+        return given().get(apiPath);
+    }
+    
+    static Response getOaiListIdentifiersWithResumptionToken(String resumptionToken) {
+        String apiPath = String.format("/oai?verb=ListIdentifiers&resumptionToken=%s", resumptionToken);
         return given().get(apiPath);
     }
 
@@ -2649,6 +2660,11 @@ static Response getOaiListRecords(String setName, String metadataFormat) {
         String apiPath = String.format("/oai?verb=ListRecords&set=%s&metadataPrefix=%s", setName, metadataFormat);
         return given().get(apiPath);
     }
+    
+    static Response getOaiListRecordsWithResumptionToken(String resumptionToken) {
+        String apiPath = String.format("/oai?verb=ListRecords&resumptionToken=%s", resumptionToken);
+        return given().get(apiPath);
+    }
 
     static Response changeAuthenticatedUserIdentifier(String oldIdentifier, String newIdentifier, String apiToken) {
         Response response;

From 395d605a8e156dd2ee295a8aa2a0892cad898617 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Mon, 12 Dec 2022 17:04:44 -0500
Subject: [PATCH 113/173] An automated test of an actual harvest (#8843)

---
 .../iq/dataverse/api/HarvestingClients.java   |  31 +---
 .../iq/dataverse/api/HarvestingClientsIT.java | 169 ++++++++++++++++--
 .../iq/dataverse/api/HarvestingServerIT.java  |   8 +
 3 files changed, 164 insertions(+), 44 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java b/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java
index 42534514b68..b75cb687c62 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/HarvestingClients.java
@@ -373,13 +373,13 @@ public Response startHarvestingJob(@PathParam("nickName") String clientNickname,
             }
             
             if (authenticatedUser == null || !authenticatedUser.isSuperuser()) {
-                return error(Response.Status.FORBIDDEN, "Only the Dataverse Admin user can run harvesting jobs");
+                return error(Response.Status.FORBIDDEN, "Only admin users can run harvesting jobs");
             }
             
             HarvestingClient harvestingClient = harvestingClientService.findByNickname(clientNickname);
             
             if (harvestingClient == null) {
-                return error(Response.Status.NOT_FOUND, "No such dataverse: "+clientNickname);
+                return error(Response.Status.NOT_FOUND, "No such client: "+clientNickname);
             }
             
             DataverseRequest dataverseRequest = createDataverseRequest(authenticatedUser);
@@ -391,35 +391,8 @@ public Response startHarvestingJob(@PathParam("nickName") String clientNickname,
         return this.accepted();
     }
     
-    // This GET shows the status of the harvesting run in progress for this 
-    // client, if present: 
-    // @GET
-    // @Path("{nickName}/run")
-    // TODO: 
-    
-    // This DELETE kills the harvesting run in progress for this client, 
-    // if present: 
-    // @DELETE
-    // @Path("{nickName}/run")
-    // TODO: 
-    
-    
-    
-    
-    
     /* Auxiliary, helper methods: */ 
     
-    /*
-    @Deprecated
-    public static JsonArrayBuilder harvestingConfigsAsJsonArray(List<Dataverse> harvestingDataverses) {
-        JsonArrayBuilder hdArr = Json.createArrayBuilder();
-        
-        for (Dataverse hd : harvestingDataverses) {
-            hdArr.add(harvestingConfigAsJson(hd.getHarvestingClientConfig()));
-        }
-        return hdArr;
-    }*/
-    
     public static JsonObjectBuilder harvestingConfigAsJson(HarvestingClient harvestingConfig) {
         if (harvestingConfig == null) {
             return null; 
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java
index 9eac3545e54..8fef360c68b 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java
@@ -1,34 +1,58 @@
 package edu.harvard.iq.dataverse.api;
 
 import java.util.logging.Logger;
+import java.util.logging.Level;
 import com.jayway.restassured.RestAssured;
 import static com.jayway.restassured.RestAssured.given;
 import org.junit.Test;
 import com.jayway.restassured.response.Response;
+import static javax.ws.rs.core.Response.Status.CREATED;
+import static javax.ws.rs.core.Response.Status.UNAUTHORIZED;
+import static javax.ws.rs.core.Response.Status.ACCEPTED;
+import static javax.ws.rs.core.Response.Status.OK;
 import static org.hamcrest.CoreMatchers.equalTo;
-import static junit.framework.Assert.assertEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
 import org.junit.BeforeClass;
 
 /**
- * extremely minimal (for now) API tests for creating OAI clients.
+ * This class tests Harvesting Client functionality. 
+ * Note that these methods test BOTH the proprietary Dataverse rest API for 
+ * creating and managing harvesting clients, AND the underlining OAI-PMH 
+ * harvesting functionality itself. I.e., we will use the Dataverse 
+ * /api/harvest/clients/ api to run an actual harvest of a control set and
+ * then validate the resulting harvested content. 
  */
 public class HarvestingClientsIT {
 
     private static final Logger logger = Logger.getLogger(HarvestingClientsIT.class.getCanonicalName());
 
     private static final String harvestClientsApi = "/api/harvest/clients/";
-    private static final String harvestCollection = "root";
+    private static final String rootCollection = "root";
     private static final String harvestUrl = "https://demo.dataverse.org/oai";
     private static final String archiveUrl = "https://demo.dataverse.org";
     private static final String harvestMetadataFormat = "oai_dc";
     private static final String archiveDescription = "RestAssured harvesting client test";
+    private static final String controlOaiSet = "controlTestSet";
+    private static final int datasetsInControlSet = 7;
+    private static String normalUserAPIKey;
+    private static String adminUserAPIKey;
+    private static String harvestCollectionAlias; 
     
     @BeforeClass
     public static void setUpClass() {
         RestAssured.baseURI = UtilIT.getRestAssuredBaseUri();
+        
+        // Create the users, an admin and a non-admin:
+        setupUsers(); 
+        
+        // Create a collection that we will use to harvest remote content into: 
+        setupCollection();
+        
     }
 
-    private void setupUsers() {
+    private static void setupUsers() {
         Response cu0 = UtilIT.createRandomUser();
         normalUserAPIKey = UtilIT.getApiTokenFromResponse(cu0);
         Response cu1 = UtilIT.createRandomUser();
@@ -36,13 +60,22 @@ private void setupUsers() {
         Response u1a = UtilIT.makeSuperUser(un1);
         adminUserAPIKey = UtilIT.getApiTokenFromResponse(cu1);
     }
+    
+    private static void setupCollection() {
+        Response createDataverseResponse = UtilIT.createRandomDataverse(adminUserAPIKey);
+        createDataverseResponse.prettyPrint();
+        assertEquals(CREATED.getStatusCode(), createDataverseResponse.getStatusCode());
+        
+        harvestCollectionAlias = UtilIT.getAliasFromResponse(createDataverseResponse);
 
-    private String normalUserAPIKey;
-    private String adminUserAPIKey;
+        // publish dataverse:
+        Response publishDataverse = UtilIT.publishDataverseViaNativeApi(harvestCollectionAlias, adminUserAPIKey);
+        assertEquals(OK.getStatusCode(), publishDataverse.getStatusCode());
+    }
 
     @Test
     public void testCreateEditDeleteClient() {
-        setupUsers();
+        //setupUsers();
         String nickName = UtilIT.getRandomString(6);
         
 
@@ -52,7 +85,7 @@ public void testCreateEditDeleteClient() {
                 + "\"harvestUrl\":\"%s\","
                 + "\"archiveUrl\":\"%s\","
                 + "\"metadataFormat\":\"%s\"}", 
-                harvestCollection, harvestUrl, archiveUrl, harvestMetadataFormat);
+                rootCollection, harvestUrl, archiveUrl, harvestMetadataFormat);
 
         
         // Try to create a client as normal user, should fail:
@@ -61,7 +94,7 @@ public void testCreateEditDeleteClient() {
                 .header(UtilIT.API_TOKEN_HTTP_HEADER, normalUserAPIKey)
                 .body(clientJson)
                 .post(clientApiPath);
-        assertEquals(401, rCreate.getStatusCode());
+        assertEquals(UNAUTHORIZED.getStatusCode(), rCreate.getStatusCode());
 
         
         // Try to create the same as admin user, should succeed:
@@ -70,7 +103,7 @@ public void testCreateEditDeleteClient() {
                 .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
                 .body(clientJson)
                 .post(clientApiPath);
-        assertEquals(201, rCreate.getStatusCode());
+        assertEquals(CREATED.getStatusCode(), rCreate.getStatusCode());
         
         // Try to update the client we have just created:
         
@@ -80,7 +113,7 @@ public void testCreateEditDeleteClient() {
                 .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
                 .body(updateJson)
                 .put(clientApiPath);
-        assertEquals(200, rUpdate.getStatusCode());
+        assertEquals(OK.getStatusCode(), rUpdate.getStatusCode());
         
         // Now let's retrieve the client we've just created and edited: 
                 
@@ -89,7 +122,7 @@ public void testCreateEditDeleteClient() {
         
         logger.info("getClient.getStatusCode(): " + getClientResponse.getStatusCode());
         logger.info("getClient printresponse:  " + getClientResponse.prettyPrint());
-        assertEquals(200, getClientResponse.getStatusCode());
+        assertEquals(OK.getStatusCode(), getClientResponse.getStatusCode());
         
         // ... and validate the values:
         
@@ -98,7 +131,7 @@ public void testCreateEditDeleteClient() {
                 .body("data.type", equalTo("oai"))
                 .body("data.nickName", equalTo(nickName))
                 .body("data.archiveDescription", equalTo(archiveDescription))
-                .body("data.dataverseAlias", equalTo(harvestCollection))
+                .body("data.dataverseAlias", equalTo(rootCollection))
                 .body("data.harvestUrl", equalTo(harvestUrl))
                 .body("data.archiveUrl", equalTo(archiveUrl))
                 .body("data.metadataFormat", equalTo(harvestMetadataFormat));        
@@ -109,7 +142,7 @@ public void testCreateEditDeleteClient() {
                 .header(UtilIT.API_TOKEN_HTTP_HEADER, normalUserAPIKey)
                 .delete(clientApiPath);
         logger.info("rDelete.getStatusCode(): " + rDelete.getStatusCode());
-        assertEquals(401, rDelete.getStatusCode());
+        assertEquals(UNAUTHORIZED.getStatusCode(), rDelete.getStatusCode());
         
         // Try to delete as admin user  should work:
         
@@ -117,6 +150,112 @@ public void testCreateEditDeleteClient() {
                 .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
                 .delete(clientApiPath);
         logger.info("rDelete.getStatusCode(): " + rDelete.getStatusCode());
-        assertEquals(200, rDelete.getStatusCode());
+        assertEquals(OK.getStatusCode(), rDelete.getStatusCode());
+    }
+    
+    @Test
+    public void testHarvestingClientRun()  throws InterruptedException {
+        // This test will create a client and attempt to perform an actual 
+        // harvest and validate the resulting harvested content. 
+        
+        // Setup: create the client via the API
+        // since this API is tested somewhat extensively in the previous 
+        // method, we don't need to pay too much attention to this method, aside 
+        // from confirming the expected HTTP status code.
+        
+        String nickName = UtilIT.getRandomString(6);
+
+        String clientApiPath = String.format(harvestClientsApi+"%s", nickName);
+        String clientJson = String.format("{\"dataverseAlias\":\"%s\","
+                + "\"type\":\"oai\","
+                + "\"harvestUrl\":\"%s\","
+                + "\"archiveUrl\":\"%s\","
+                + "\"set\":\"%s\","
+                + "\"metadataFormat\":\"%s\"}", 
+                harvestCollectionAlias, harvestUrl, archiveUrl, controlOaiSet, harvestMetadataFormat);
+                
+        Response createResponse = given()
+                .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
+                .body(clientJson)
+                .post(clientApiPath);
+        assertEquals(CREATED.getStatusCode(), createResponse.getStatusCode());
+        
+        // API TEST 1. Run the harvest using the configuration (client) we have 
+        // just created
+        
+        String runHarvestApiPath = String.format(harvestClientsApi+"%s/run", nickName);
+        
+        // TODO? - verify that a non-admin user cannot perform this operation (401)
+        
+        Response runResponse = given()
+                .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
+                .post(runHarvestApiPath);
+        assertEquals(ACCEPTED.getStatusCode(), runResponse.getStatusCode());
+        
+        // API TEST 2. As indicated by the ACCEPTED status code above, harvesting
+        // is an asynchronous operation that will be performed in the background.
+        // Verify that this "in progress" status is properly reported while it's 
+        // running, and that it completes in some reasonable amount of time. 
+        
+        int i = 0;
+        int maxWait=20; // a very conservative interval; this harvest has no business taking this long
+        do {
+            // keep checking the status of the client with the GET api:
+            Response getClientResponse = given()
+                .get(clientApiPath);
+        
+            assertEquals(OK.getStatusCode(), getClientResponse.getStatusCode());
+            assertEquals(AbstractApiBean.STATUS_OK, getClientResponse.body().jsonPath().getString("status")); 
+            
+            if (logger.isLoggable(Level.FINE)) {
+                logger.info("listIdentifiersResponse.prettyPrint: " 
+                        + getClientResponse.prettyPrint());
+            }
+            
+            String clientStatus = getClientResponse.body().jsonPath().getString("data.status");
+            assertNotNull(clientStatus);
+            
+            if ("inProgress".equals(clientStatus)) {
+                // we'll sleep for another second
+                i++;
+            } else {
+                // Check the values in the response:
+                // a) Confirm that the harvest has completed: 
+                assertEquals("Unexpected client status: "+clientStatus, "inActive", clientStatus);
+                
+                // b) Confirm that it has actually succeeded:
+                assertEquals("Last harvest not reported a success", "SUCCESS", getClientResponse.body().jsonPath().getString("data.lastResult"));
+                String harvestTimeStamp = getClientResponse.body().jsonPath().getString("data.lastHarvest");
+                assertNotNull(harvestTimeStamp); 
+                
+                // c) Confirm that the other timestamps match: 
+                assertEquals(harvestTimeStamp, getClientResponse.body().jsonPath().getString("data.lastSuccessful"));
+                assertEquals(harvestTimeStamp, getClientResponse.body().jsonPath().getString("data.lastNonEmpty"));
+                
+                // d) Confirm that the correct number of datasets have been harvested:
+                assertEquals(datasetsInControlSet, getClientResponse.body().jsonPath().getInt("data.lastDatasetsHarvested"));
+                
+                // ok, it looks like the harvest has completed successfully.
+                break;
+            }
+            Thread.sleep(1000L);
+        } while (i<maxWait); 
+        
+        System.out.println("Waited " + i + " seconds for the harvest to complete.");
+        
+        // Fail if it hasn't completed in maxWait seconds
+        assertTrue(i < maxWait);
+        
+        // TODO: use the native Dataverses/Datasets apis to verify that the expected
+        // datasets have been harvested. 
+        
+        // Cleanup: delete the client 
+        
+        Response deleteResponse = given()
+                .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
+                .delete(clientApiPath);
+        System.out.println("deleteResponse.getStatusCode(): " + deleteResponse.getStatusCode());
+        assertEquals(OK.getStatusCode(), deleteResponse.getStatusCode());
+        
     }
 }
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
index 3497c71e169..b5563c926e5 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
@@ -873,4 +873,12 @@ public void testMultiRecordOaiSet() throws InterruptedException {
         logger.info("deleteResponse.getStatusCode(): " + deleteResponse.getStatusCode());
         assertEquals("Failed to delete the control multi-record set", 200, deleteResponse.getStatusCode());
     }
+    
+    // TODO: 
+    // What else can we test? 
+    // Some ideas: 
+    // - Test handling of deleted dataset records
+    // - Test "from" and "until" time parameters
+    // - Test validating full verb response records against XML schema
+    //   (for each supported metadata format, possibly?)
 }

From bc90f5cbe7048218e0e340c9a34db280ba4c5c42 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Mon, 12 Dec 2022 17:10:35 -0500
Subject: [PATCH 114/173] comments (#8843)

---
 .../iq/dataverse/api/HarvestingClientsIT.java       | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java
index 8fef360c68b..448faa20b0b 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java
@@ -75,7 +75,9 @@ private static void setupCollection() {
 
     @Test
     public void testCreateEditDeleteClient() {
-        //setupUsers();
+        // This method focuses on testing the native Dataverse harvesting client
+        // API. 
+        
         String nickName = UtilIT.getRandomString(6);
         
 
@@ -158,7 +160,7 @@ public void testHarvestingClientRun()  throws InterruptedException {
         // This test will create a client and attempt to perform an actual 
         // harvest and validate the resulting harvested content. 
         
-        // Setup: create the client via the API
+        // Setup: create the client via native API
         // since this API is tested somewhat extensively in the previous 
         // method, we don't need to pay too much attention to this method, aside 
         // from confirming the expected HTTP status code.
@@ -246,8 +248,11 @@ public void testHarvestingClientRun()  throws InterruptedException {
         // Fail if it hasn't completed in maxWait seconds
         assertTrue(i < maxWait);
         
-        // TODO: use the native Dataverses/Datasets apis to verify that the expected
-        // datasets have been harvested. 
+        // TODO(?) use the native Dataverses/Datasets apis to verify that the expected
+        // datasets have been harvested. This may or may not be necessary, seeing 
+        // how we have already confirmed the number of successfully harvested 
+        // datasets from the control set; somewhat hard to imagine a practical 
+        // situation where that would not be enough (?).  
         
         // Cleanup: delete the client 
         

From 9dcbfa05de4316cc3c5560e5350a1f46ebf30d4a Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Tue, 13 Dec 2022 17:57:03 +0100
Subject: [PATCH 115/173] revert(metadata): remove CodeMeta fields from Solr
 schema #7844

This reverts commit 8d5edf23a13631e878c413e55c320cb704a579b5.

@IQSS decided we will not include fields from experimental blocks
in the schema.
---
 conf/solr/8.11.1/schema.xml | 48 +------------------------------------
 1 file changed, 1 insertion(+), 47 deletions(-)

diff --git a/conf/solr/8.11.1/schema.xml b/conf/solr/8.11.1/schema.xml
index 2656abf0dc5..63312ab5d40 100644
--- a/conf/solr/8.11.1/schema.xml
+++ b/conf/solr/8.11.1/schema.xml
@@ -405,31 +405,9 @@
     <field name="universe" type="text_en" multiValued="true" stored="true" indexed="true"/>
     <field name="weighting" type="text_en" multiValued="false" stored="true" indexed="true"/>
     <field name="westLongitude" type="text_en" multiValued="true" stored="true" indexed="true"/>
-    <field name="codeVersion" type="text_en" multiValued="false" stored="true" indexed="true"/>
-    <field name="developmentStatus" type="text_en" multiValued="false" stored="true" indexed="true"/>
-    <field name="codeRepository" type="text_en" multiValued="true" stored="true" indexed="true"/>
-    <field name="applicationCategory" type="text_en" multiValued="true" stored="true" indexed="true"/>
-    <field name="applicationSubCategory" type="text_en" multiValued="true" stored="true" indexed="true"/>
-    <field name="programmingLanguage" type="text_en" multiValued="true" stored="true" indexed="true"/>
-    <field name="runtimePlatform" type="text_en" multiValued="true" stored="true" indexed="true"/>
-    <field name="operatingSystem" type="text_en" multiValued="true" stored="true" indexed="true"/>
-    <field name="targetProduct" type="text_en" multiValued="true" stored="true" indexed="true"/>
-    <field name="buildInstructions" type="text_en" multiValued="true" stored="true" indexed="true"/>
-    <field name="softwareRequirements" type="text_en" multiValued="true" stored="true" indexed="true"/>
-    <field name="softwareRequirementsInfoUrl" type="text_en" multiValued="true" stored="true" indexed="true"/>
-    <field name="softwareSuggestions" type="text_en" multiValued="true" stored="true" indexed="true"/>
-    <field name="softwareSuggestionsInfoUrl" type="text_en" multiValued="true" stored="true" indexed="true"/>
-    <field name="memoryRequirements" type="text_en" multiValued="false" stored="true" indexed="true"/>
-    <field name="processorRequirements" type="text_en" multiValued="true" stored="true" indexed="true"/>
-    <field name="storageRequirements" type="text_en" multiValued="false" stored="true" indexed="true"/>
-    <field name="permissions" type="text_en" multiValued="true" stored="true" indexed="true"/>
-    <field name="softwareHelp" type="text_en" multiValued="true" stored="true" indexed="true"/>
-    <field name="readme" type="text_en" multiValued="false" stored="true" indexed="true"/>
-    <field name="releaseNotes" type="text_en" multiValued="false" stored="true" indexed="true"/>
-    <field name="contIntegration" type="text_en" multiValued="true" stored="true" indexed="true"/>
-    <field name="issueTracker" type="text_en" multiValued="false" stored="true" indexed="true"/>
     <!-- SCHEMA-FIELDS::END -->
     
+
     <copyField source="description" dest="_text_" maxChars="3000"/>
 
     <!-- Added for Dataverse 4.0 Beta: make variable names and labels searchable in basic search https://redmine.hmdc.harvard.edu/issues/3945 -->
@@ -667,30 +645,6 @@
     <copyField source="universe" dest="_text_" maxChars="3000"/>
     <copyField source="weighting" dest="_text_" maxChars="3000"/>
     <copyField source="westLongitude" dest="_text_" maxChars="3000"/>
-    <copyField source="codeVersion" dest="_text_" maxChars="3000"/>
-    <copyField source="developmentStatus" dest="_text_" maxChars="3000"/>
-    <copyField source="codeRepository" dest="_text_" maxChars="3000"/>
-    <copyField source="applicationCategory" dest="_text_" maxChars="3000"/>
-    <copyField source="applicationSubCategory" dest="_text_" maxChars="3000"/>
-    <copyField source="programmingLanguage" dest="_text_" maxChars="3000"/>
-    <copyField source="runtimePlatform" dest="_text_" maxChars="3000"/>
-    <copyField source="operatingSystem" dest="_text_" maxChars="3000"/>
-    <copyField source="targetProduct" dest="_text_" maxChars="3000"/>
-    <copyField source="buildInstructions" dest="_text_" maxChars="3000"/>
-    <copyField source="softwareRequirements" dest="_text_" maxChars="3000"/>
-    <copyField source="softwareRequirementsInfoUrl" dest="_text_" maxChars="3000"/>
-    <copyField source="softwareSuggestions" dest="_text_" maxChars="3000"/>
-    <copyField source="softwareSuggestionsInfoUrl" dest="_text_" maxChars="3000"/>
-    <copyField source="memoryRequirements" dest="_text_" maxChars="3000"/>
-    <copyField source="processorRequirements" dest="_text_" maxChars="3000"/>
-    <copyField source="storageRequirements" dest="_text_" maxChars="3000"/>
-    <copyField source="permissions" dest="_text_" maxChars="3000"/>
-    <copyField source="softwareHelp" dest="_text_" maxChars="3000"/>
-    <copyField source="readme" dest="_text_" maxChars="3000"/>
-    <copyField source="releaseNotes" dest="_text_" maxChars="3000"/>
-    <copyField source="contIntegration" dest="_text_" maxChars="3000"/>
-    <copyField source="issueTracker" dest="_text_" maxChars="3000"/>
-    
     <!-- SCHEMA-COPY-FIELDS::END -->
     
     <!-- This can be enabled, in case the client does not know what fields may be searched. It isn't enabled by default

From 8e310c35801d5ce6c1f033236d2b588d6ef1f9a9 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Tue, 13 Dec 2022 13:54:24 -0500
Subject: [PATCH 116/173] logic! (#8843)

---
 .../iq/dataverse/api/HarvestingServerIT.java     | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
index b5563c926e5..dad32bcaa60 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
@@ -565,7 +565,11 @@ public void testSingleRecordOaiSet() throws InterruptedException {
     // to trigger a paged respons) and test the resumption token functionality). 
     // Note that this test requires the OAI service to be configured with some
     // non-default settings (the paging limits for ListIdentifiers and ListRecords
-    // must be set to something low, like 2). 
+    // must be set to 2, in order to be able to trigger this paging behavior without
+    // having to create and export too many datasets).
+    // So you will need to do this:
+    //    asadmin create-jvm-options "-Ddataverse.oai.server.maxidentifiers=2"
+    //    asadmin create-jvm-options "-Ddataverse.oai.server.maxrecords=2"
     
     
     @Test
@@ -616,7 +620,7 @@ public void testMultiRecordOaiSet() throws InterruptedException {
         
         // 1b) The response contains a resumptionToken for the next page of items:
         String resumptionToken = responseXmlPath.getString("OAI-PMH.ListIdentifiers.resumptionToken");
-        assertNotNull("No resumption token in the ListIdentifiers response", resumptionToken);
+        assertNotNull("No resumption token in the ListIdentifiers response (has the jvm option dataverse.oai.server.maxidentifiers been configured?)", resumptionToken);
         
         // 1c) The total number of items in the set (5) is listed correctly:
         assertEquals(5, responseXmlPath.getInt("OAI-PMH.ListIdentifiers.resumptionToken.@completeListSize"));
@@ -722,7 +726,7 @@ public void testMultiRecordOaiSet() throws InterruptedException {
         
         allDatasetsListed = persistentIdsInListIdentifiers.contains(singleSetDatasetIdentifier);
         for (String persistentId : extraDatasetsIdentifiers) {
-            allDatasetsListed = persistentIdsInListIdentifiers.contains(persistentId); 
+            allDatasetsListed = allDatasetsListed && persistentIdsInListIdentifiers.contains(persistentId); 
         }
         
         assertTrue("Control datasets not properly listed in the paged ListIdentifiers response", 
@@ -756,7 +760,7 @@ public void testMultiRecordOaiSet() throws InterruptedException {
         
         // 4b) The response contains a resumptionToken for the next page of items:
         resumptionToken = responseXmlPath.getString("OAI-PMH.ListRecords.resumptionToken");
-        assertNotNull("No resumption token in the ListRecords response", resumptionToken);
+        assertNotNull("No resumption token in the ListRecords response (has the jvm option dataverse.oai.server.maxrecords been configured?)", resumptionToken);
         
         // 4c) The total number of items in the set (5) is listed correctly:
         assertEquals(5, responseXmlPath.getInt("OAI-PMH.ListRecords.resumptionToken.@completeListSize"));
@@ -856,7 +860,7 @@ public void testMultiRecordOaiSet() throws InterruptedException {
         
         allDatasetsListed = persistentIdsInListRecords.contains(singleSetDatasetIdentifier);
         for (String persistentId : extraDatasetsIdentifiers) {
-            allDatasetsListed = persistentIdsInListRecords.contains(persistentId); 
+            allDatasetsListed = allDatasetsListed && persistentIdsInListRecords.contains(persistentId); 
         }
         
         assertTrue("Control datasets not properly listed in the paged ListRecords response", 
@@ -879,6 +883,6 @@ public void testMultiRecordOaiSet() throws InterruptedException {
     // Some ideas: 
     // - Test handling of deleted dataset records
     // - Test "from" and "until" time parameters
-    // - Test validating full verb response records against XML schema
+    // - Validate full verb response records against XML schema
     //   (for each supported metadata format, possibly?)
 }

From a8039bb56d24d2d3ceed52ba5d96c4275d4184c9 Mon Sep 17 00:00:00 2001
From: JayanthyChengan <JayanthyChengan@users.noreply.github.com>
Date: Tue, 13 Dec 2022 14:35:42 -0500
Subject: [PATCH 117/173] import function DatasetUtil

---
 src/main/webapp/dataset-license-terms.xhtml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/main/webapp/dataset-license-terms.xhtml b/src/main/webapp/dataset-license-terms.xhtml
index 8b5c86b9c1c..3f062ec5ca0 100644
--- a/src/main/webapp/dataset-license-terms.xhtml
+++ b/src/main/webapp/dataset-license-terms.xhtml
@@ -25,6 +25,7 @@
                <p:fragment id="touFragment">
                    <o:importFunctions type="edu.harvard.iq.dataverse.util.MarkupChecker"/>
                    <o:importFunctions type="org.apache.commons.text.StringEscapeUtils" />
+                   <o:importFunctions type="edu.harvard.iq.dataverse.dataset.DatasetUtil" />
                    <div class="panel-body">
                        <div class="form-group">
                            <label for="datasetForm:tabView:metadata_Terms" class="col-sm-3 control-label">

From b5986fa94c954de72f4280e1e9bde81ed9389910 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Tue, 13 Dec 2022 16:44:38 -0500
Subject: [PATCH 118/173] cleanup (#8843)

---
 .../harvest/client/FastGetRecord.java         |  2 +-
 .../harvest/client/HarvesterServiceBean.java  |  2 +-
 .../iq/dataverse/api/HarvestingClientsIT.java | 66 +++++++-------
 .../iq/dataverse/api/HarvestingServerIT.java  | 88 ++++++++-----------
 4 files changed, 73 insertions(+), 85 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java
index 5b3e4df331d..c5e3a93e2df 100644
--- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java
+++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java
@@ -130,7 +130,7 @@ public void harvestRecord(String baseURL, String identifier, String metadataPref
         int responseCode = 0;
 
         con = (HttpURLConnection) url.openConnection();
-        con.setRequestProperty("User-Agent", "DataverseHarvester/3.0");
+        con.setRequestProperty("User-Agent", "Dataverse Harvesting Client v5");
         con.setRequestProperty("Accept-Encoding",
                                    "compress, gzip, identify");
         try {
diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java
index e7156dfe9aa..a0c52e4b80c 100644
--- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java
@@ -372,7 +372,7 @@ File retrieveProprietaryDataverseMetadata (HttpClient client, String remoteApiUr
         HttpRequest request = HttpRequest.newBuilder()
                 .uri(URI.create(remoteApiUrl))
                 .GET()
-                .header("User-Agent", "DataverseHarvester/6.0")
+                .header("User-Agent", "Dataverse Harvesting Client v5")
                 .build();
         
         HttpResponse<InputStream> response;
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java
index 448faa20b0b..d9b4d502f59 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java
@@ -4,6 +4,7 @@
 import java.util.logging.Level;
 import com.jayway.restassured.RestAssured;
 import static com.jayway.restassured.RestAssured.given;
+import com.jayway.restassured.path.json.JsonPath;
 import org.junit.Test;
 import com.jayway.restassured.response.Response;
 import static javax.ws.rs.core.Response.Status.CREATED;
@@ -28,14 +29,14 @@ public class HarvestingClientsIT {
 
     private static final Logger logger = Logger.getLogger(HarvestingClientsIT.class.getCanonicalName());
 
-    private static final String harvestClientsApi = "/api/harvest/clients/";
-    private static final String rootCollection = "root";
-    private static final String harvestUrl = "https://demo.dataverse.org/oai";
-    private static final String archiveUrl = "https://demo.dataverse.org";
-    private static final String harvestMetadataFormat = "oai_dc";
-    private static final String archiveDescription = "RestAssured harvesting client test";
-    private static final String controlOaiSet = "controlTestSet";
-    private static final int datasetsInControlSet = 7;
+    private static final String HARVEST_CLIENTS_API = "/api/harvest/clients/";
+    private static final String ROOT_COLLECTION = "root";
+    private static final String HARVEST_URL = "https://demo.dataverse.org/oai";
+    private static final String ARCHIVE_URL = "https://demo.dataverse.org";
+    private static final String HARVEST_METADATA_FORMAT = "oai_dc";
+    private static final String ARCHIVE_DESCRIPTION = "RestAssured harvesting client test";
+    private static final String CONTROL_OAI_SET = "controlTestSet";
+    private static final int DATASETS_IN_CONTROL_SET = 7;
     private static String normalUserAPIKey;
     private static String adminUserAPIKey;
     private static String harvestCollectionAlias; 
@@ -81,13 +82,13 @@ public void testCreateEditDeleteClient() {
         String nickName = UtilIT.getRandomString(6);
         
 
-        String clientApiPath = String.format(harvestClientsApi+"%s", nickName);
+        String clientApiPath = String.format(HARVEST_CLIENTS_API+"%s", nickName);
         String clientJson = String.format("{\"dataverseAlias\":\"%s\","
                 + "\"type\":\"oai\","
                 + "\"harvestUrl\":\"%s\","
                 + "\"archiveUrl\":\"%s\","
                 + "\"metadataFormat\":\"%s\"}", 
-                rootCollection, harvestUrl, archiveUrl, harvestMetadataFormat);
+                ROOT_COLLECTION, HARVEST_URL, ARCHIVE_URL, HARVEST_METADATA_FORMAT);
 
         
         // Try to create a client as normal user, should fail:
@@ -109,7 +110,7 @@ public void testCreateEditDeleteClient() {
         
         // Try to update the client we have just created:
         
-        String updateJson = String.format("{\"archiveDescription\":\"%s\"}", archiveDescription);
+        String updateJson = String.format("{\"archiveDescription\":\"%s\"}", ARCHIVE_DESCRIPTION);
         
         Response rUpdate = given()
                 .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
@@ -132,11 +133,11 @@ public void testCreateEditDeleteClient() {
                 .body("status", equalTo(AbstractApiBean.STATUS_OK))
                 .body("data.type", equalTo("oai"))
                 .body("data.nickName", equalTo(nickName))
-                .body("data.archiveDescription", equalTo(archiveDescription))
-                .body("data.dataverseAlias", equalTo(rootCollection))
-                .body("data.harvestUrl", equalTo(harvestUrl))
-                .body("data.archiveUrl", equalTo(archiveUrl))
-                .body("data.metadataFormat", equalTo(harvestMetadataFormat));        
+                .body("data.archiveDescription", equalTo(ARCHIVE_DESCRIPTION))
+                .body("data.dataverseAlias", equalTo(ROOT_COLLECTION))
+                .body("data.harvestUrl", equalTo(HARVEST_URL))
+                .body("data.archiveUrl", equalTo(ARCHIVE_URL))
+                .body("data.metadataFormat", equalTo(HARVEST_METADATA_FORMAT));        
         
         // Try to delete the client as normal user  should fail: 
         
@@ -167,14 +168,14 @@ public void testHarvestingClientRun()  throws InterruptedException {
         
         String nickName = UtilIT.getRandomString(6);
 
-        String clientApiPath = String.format(harvestClientsApi+"%s", nickName);
+        String clientApiPath = String.format(HARVEST_CLIENTS_API+"%s", nickName);
         String clientJson = String.format("{\"dataverseAlias\":\"%s\","
                 + "\"type\":\"oai\","
                 + "\"harvestUrl\":\"%s\","
                 + "\"archiveUrl\":\"%s\","
                 + "\"set\":\"%s\","
                 + "\"metadataFormat\":\"%s\"}", 
-                harvestCollectionAlias, harvestUrl, archiveUrl, controlOaiSet, harvestMetadataFormat);
+                harvestCollectionAlias, HARVEST_URL, ARCHIVE_URL, CONTROL_OAI_SET, HARVEST_METADATA_FORMAT);
                 
         Response createResponse = given()
                 .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
@@ -185,7 +186,7 @@ public void testHarvestingClientRun()  throws InterruptedException {
         // API TEST 1. Run the harvest using the configuration (client) we have 
         // just created
         
-        String runHarvestApiPath = String.format(harvestClientsApi+"%s/run", nickName);
+        String runHarvestApiPath = String.format(HARVEST_CLIENTS_API+"%s/run", nickName);
         
         // TODO? - verify that a non-admin user cannot perform this operation (401)
         
@@ -207,35 +208,36 @@ public void testHarvestingClientRun()  throws InterruptedException {
                 .get(clientApiPath);
         
             assertEquals(OK.getStatusCode(), getClientResponse.getStatusCode());
-            assertEquals(AbstractApiBean.STATUS_OK, getClientResponse.body().jsonPath().getString("status")); 
+            JsonPath responseJsonPath = getClientResponse.body().jsonPath();
+            assertNotNull("Invalid JSON in GET client response", responseJsonPath);
+            assertEquals(AbstractApiBean.STATUS_OK, responseJsonPath.getString("status")); 
             
-            if (logger.isLoggable(Level.FINE)) {
-                logger.info("listIdentifiersResponse.prettyPrint: " 
-                        + getClientResponse.prettyPrint());
-            }
-            
-            String clientStatus = getClientResponse.body().jsonPath().getString("data.status");
+            String clientStatus = responseJsonPath.getString("data.status");
             assertNotNull(clientStatus);
             
-            if ("inProgress".equals(clientStatus)) {
+            if ("inProgress".equals(clientStatus) || "IN PROGRESS".equals(responseJsonPath.getString("data.lastResult"))) {
                 // we'll sleep for another second
                 i++;
             } else {
+                if (logger.isLoggable(Level.FINE)) {
+                    logger.info("getClientResponse.prettyPrint: " 
+                            + getClientResponse.prettyPrint());
+                }
                 // Check the values in the response:
                 // a) Confirm that the harvest has completed: 
                 assertEquals("Unexpected client status: "+clientStatus, "inActive", clientStatus);
                 
                 // b) Confirm that it has actually succeeded:
-                assertEquals("Last harvest not reported a success", "SUCCESS", getClientResponse.body().jsonPath().getString("data.lastResult"));
-                String harvestTimeStamp = getClientResponse.body().jsonPath().getString("data.lastHarvest");
+                assertEquals("Last harvest not reported a success", "SUCCESS", responseJsonPath.getString("data.lastResult"));
+                String harvestTimeStamp = responseJsonPath.getString("data.lastHarvest");
                 assertNotNull(harvestTimeStamp); 
                 
                 // c) Confirm that the other timestamps match: 
-                assertEquals(harvestTimeStamp, getClientResponse.body().jsonPath().getString("data.lastSuccessful"));
-                assertEquals(harvestTimeStamp, getClientResponse.body().jsonPath().getString("data.lastNonEmpty"));
+                assertEquals(harvestTimeStamp, responseJsonPath.getString("data.lastSuccessful"));
+                assertEquals(harvestTimeStamp, responseJsonPath.getString("data.lastNonEmpty"));
                 
                 // d) Confirm that the correct number of datasets have been harvested:
-                assertEquals(datasetsInControlSet, getClientResponse.body().jsonPath().getInt("data.lastDatasetsHarvested"));
+                assertEquals(DATASETS_IN_CONTROL_SET, responseJsonPath.getInt("data.lastDatasetsHarvested"));
                 
                 // ok, it looks like the harvest has completed successfully.
                 break;
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
index dad32bcaa60..d10e0c4c6d7 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
@@ -18,7 +18,6 @@
 import java.util.List;
 import java.util.Set;
 import java.util.HashSet;
-//import static junit.framework.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
@@ -141,9 +140,12 @@ private XmlPath validateOaiVerbResponse(Response oaiResponse, String verb) {
         assertNotNull(responseXmlPath);
         
         String dateString = responseXmlPath.getString("OAI-PMH.responseDate");
-        assertNotNull(dateString); // TODO: validate that it's well-formatted!
-        logger.info("date string from the OAI output:"+dateString);
-        assertEquals("http://localhost:8080/oai", responseXmlPath.getString("OAI-PMH.request"));
+        assertNotNull(dateString); 
+        // TODO: validate the formatting of the date string in the record
+        // header, above. (could be slightly tricky - since this formatting
+        // is likely locale-specific)
+        logger.fine("date string from the OAI output:"+dateString);
+        //assertEquals("http://localhost:8080/oai", responseXmlPath.getString("OAI-PMH.request"));
         assertEquals(verb, responseXmlPath.getString("OAI-PMH.request.@verb"));
         return responseXmlPath;
     }
@@ -153,12 +155,11 @@ public void testOaiIdentify() {
         // Run Identify:
         Response identifyResponse = UtilIT.getOaiIdentify();
         assertEquals(OK.getStatusCode(), identifyResponse.getStatusCode());
-        //logger.info("Identify response: "+identifyResponse.prettyPrint());
 
         // Validate the response: 
         
         XmlPath responseXmlPath = validateOaiVerbResponse(identifyResponse, "Identify");
-        assertEquals("http://localhost:8080/oai", responseXmlPath.getString("OAI-PMH.Identify.baseURL"));
+        //assertEquals("http://localhost:8080/oai", responseXmlPath.getString("OAI-PMH.Identify.baseURL"));
         // Confirm that the server is reporting the correct parameters that 
         // our server implementation should be using:
         assertEquals("2.0", responseXmlPath.getString("OAI-PMH.Identify.protocolVersion"));
@@ -171,7 +172,6 @@ public void testOaiListMetadataFormats() {
         // Run ListMeatadataFormats:
         Response listFormatsResponse = UtilIT.getOaiListMetadataFormats();
         assertEquals(OK.getStatusCode(), listFormatsResponse.getStatusCode());
-        //logger.info("ListMetadataFormats response: "+listFormatsResponse.prettyPrint());
 
         // Validate the response: 
         
@@ -253,7 +253,7 @@ public void testNativeSetAPI() {
         System.out.println("responseAll full:  " + responseAll.prettyPrint());
         assertEquals(200, responseAll.getStatusCode());
         assertTrue(responseAll.body().jsonPath().getList("data.oaisets").size() > 0);
-        assertTrue(responseAll.body().jsonPath().getList("data.oaisets.name").toString().contains(setName));  // todo: simplify     
+        assertTrue(responseAll.body().jsonPath().getList("data.oaisets.name", String.class).contains(setName));
         
         // API Test 6. Try to create a set with the same name, should fail
         createSetResponse = given()
@@ -369,22 +369,14 @@ public void testSetEditAPIandOAIlistSets() {
         // we created and modified, above, is being listed by the OAI server 
         // and its xml record is properly formatted
         
-        List<Node> listSets = responseXmlPath.getList("OAI-PMH.ListSets.set.list()"); // TODO - maybe try it with findAll()?
-        assertNotNull(listSets);
-        assertTrue(listSets.size() > 0);
-
-        Node foundSetNode = null; 
-        for (Node setNode : listSets) {
-            
-            if (setName.equals(setNode.get("setName").toString())) {
-                foundSetNode = setNode; 
-                break;
-            }
-        }
+        List<Node> listSets = responseXmlPath.getList("OAI-PMH.ListSets.set.list().findAll{it.setName=='"+setName+"'}", Node.class);
+        
+        // 2a. Confirm that our set is listed:
+        assertNotNull("Unexpected response from ListSets", listSets);
+        assertTrue("Newly-created set isn't properly listed by the OAI server", listSets.size() == 1);
+        // 2b. Confirm that the set entry contains the updated description: 
+        assertEquals("Incorrect description in the ListSets entry", newDescription, listSets.get(0).getPath("setDescription.metadata.element.field", String.class));
         
-        assertNotNull("Newly-created set is not listed by the OAI server", foundSetNode);
-        assertEquals("Incorrect description in the ListSets entry", newDescription, foundSetNode.getPath("setDescription.metadata.element.field", String.class));
-
         // ok, the xml record looks good! 
 
         // Cleanup. Delete the set with the DELETE API
@@ -416,26 +408,30 @@ public void testSingleRecordOaiSet() throws InterruptedException {
 
         // The GET method of the oai set API, as well as the OAI ListSets
         // method are tested extensively in another method in this class, so 
-        // we'll skip checking those here. 
+        // we'll skip looking too closely into those here. 
         
-        // Let's export the set. This is asynchronous - so we will try to 
-        // wait a little - but in practice, everything potentially time-consuming
-        // must have been done when the dataset was exported, in the setup method. 
+        // A quick test that the new set is listed under native API
+        Response getSet = given()
+                .get(apiPath);
+        assertEquals(200, getSet.getStatusCode());
+        
+        // Export the set. 
         
         Response exportSetResponse = UtilIT.exportOaiSet(setName);
         assertEquals(200, exportSetResponse.getStatusCode());
-        Thread.sleep(1000L);
-        
-        Response getSet = given()
-                .get(apiPath);
+                
+        // Strictly speaking, exporting an OAI set is an asynchronous operation. 
+        // So the code below was written to expect to have to wait for up to 10 
+        // additional seconds for it to complete. In retrospect, this is 
+        // most likely unnecessary (because the only potentially expensive part 
+        // of the process is the metadata export, and in this case that must have
+        // already happened - when the dataset was published (that operation
+        // now has its own wait mechanism). But I'll keep this extra code in 
+        // place since it's not going to hurt. - L.A. 
         
-        logger.info("getSet.getStatusCode(): " + getSet.getStatusCode());
-        logger.fine("getSet printresponse:  " + getSet.prettyPrint());
-        assertEquals(200, getSet.getStatusCode());
         int i = 0;
         int maxWait=10;
         do {
-            
 
             // OAI Test 1. Run ListIdentifiers on this newly-created set:
             Response listIdentifiersResponse = UtilIT.getOaiListIdentifiers(setName, "oai_dc");
@@ -445,17 +441,14 @@ public void testSingleRecordOaiSet() throws InterruptedException {
             XmlPath responseXmlPath = validateOaiVerbResponse(listIdentifiersResponse, "ListIdentifiers");
             
             List ret = responseXmlPath.getList("OAI-PMH.ListIdentifiers.header");
-            assertNotNull(ret);
                         
-            if (logger.isLoggable(Level.FINE)) {
-                logger.info("listIdentifiersResponse.prettyPrint: " 
-                        + listIdentifiersResponse.prettyPrint());
-            }
-            if (ret.isEmpty()) {
-                // OK, we'll sleep for another second - provided it's been less
-                // than 10 sec. total.
+            if (ret == null || ret.isEmpty()) {
+                // OK, we'll sleep for another second
                 i++;
             } else {
+                if (logger.isLoggable(Level.FINE)) {
+                    logger.info("listIdentifiersResponse.prettyPrint: " + listIdentifiersResponse.prettyPrint());
+                }
                 // Validate the payload of the ListIdentifiers response:
                 // a) There should be 1 and only 1 item listed:
                 assertEquals(1, ret.size());
@@ -465,20 +458,13 @@ public void testSingleRecordOaiSet() throws InterruptedException {
                 assertEquals(setName, responseXmlPath
                         .getString("OAI-PMH.ListIdentifiers.header.setSpec"));
                 assertNotNull(responseXmlPath.getString("OAI-PMH.ListIdentifiers.header.dateStamp"));
-                // TODO: validate the formatting of the date string in the record
-                // header, above!
+                // TODO: validate the formatting of the date string here as well.
                 
                 // ok, ListIdentifiers response looks valid.
                 break;
             }
             Thread.sleep(1000L);
         } while (i<maxWait); 
-        // OK, the code above that expects to have to wait for up to 10 seconds 
-        // for the set to export is most likely utterly unnecessary (the potentially
-        // expensive part of the operation - exporting the metadata of our dataset -
-        // already happened during its publishing (we made sure to wait there). 
-        // Exporting the set should not take any time - but I'll keep that code 
-        // in place since it's not going to hurt. - L.A. 
         
         System.out.println("Waited " + i + " seconds for OIA export.");
         //Fail if we didn't find the exported record before the timeout

From abd6ded3aa9b6c7d095e2ae577b7390f8d7ceeab Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Tue, 13 Dec 2022 19:43:44 -0500
Subject: [PATCH 119/173] one more jenkins run, with a bit more logging (#8843)

---
 .../edu/harvard/iq/dataverse/api/HarvestingClientsIT.java   | 6 ++----
 .../edu/harvard/iq/dataverse/api/HarvestingServerIT.java    | 1 +
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java
index d9b4d502f59..3fc72125145 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java
@@ -219,10 +219,8 @@ public void testHarvestingClientRun()  throws InterruptedException {
                 // we'll sleep for another second
                 i++;
             } else {
-                if (logger.isLoggable(Level.FINE)) {
-                    logger.info("getClientResponse.prettyPrint: " 
-                            + getClientResponse.prettyPrint());
-                }
+                logger.info("getClientResponse.prettyPrint: " 
+                        + getClientResponse.prettyPrint());
                 // Check the values in the response:
                 // a) Confirm that the harvest has completed: 
                 assertEquals("Unexpected client status: "+clientStatus, "inActive", clientStatus);
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
index d10e0c4c6d7..ccc0629bb69 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
@@ -429,6 +429,7 @@ public void testSingleRecordOaiSet() throws InterruptedException {
         // now has its own wait mechanism). But I'll keep this extra code in 
         // place since it's not going to hurt. - L.A. 
         
+        Thread.sleep(1000L); // initial sleep interval
         int i = 0;
         int maxWait=10;
         do {

From f6d08bb2fcef119b39a3e3a193dc5826026ea7a9 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Wed, 14 Dec 2022 10:55:12 -0500
Subject: [PATCH 120/173] trigger another Jenkins run, with the time delay
 slightly rearranged in the wait for an async. operation + some extra logging
 (#8843)

---
 .../edu/harvard/iq/dataverse/api/HarvestingClientsIT.java | 8 ++++++--
 .../edu/harvard/iq/dataverse/api/HarvestingServerIT.java  | 1 +
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java
index 3fc72125145..094eb0df77c 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java
@@ -203,6 +203,11 @@ public void testHarvestingClientRun()  throws InterruptedException {
         int i = 0;
         int maxWait=20; // a very conservative interval; this harvest has no business taking this long
         do {
+            // Give it an initial 1 sec. delay, to make sure the client state 
+            // has been updated in the database, which can take some appreciable 
+            // amount of time on a heavily-loaded server running a full suite of
+            // tests:
+            Thread.sleep(1000L);
             // keep checking the status of the client with the GET api:
             Response getClientResponse = given()
                 .get(clientApiPath);
@@ -226,7 +231,7 @@ public void testHarvestingClientRun()  throws InterruptedException {
                 assertEquals("Unexpected client status: "+clientStatus, "inActive", clientStatus);
                 
                 // b) Confirm that it has actually succeeded:
-                assertEquals("Last harvest not reported a success", "SUCCESS", responseJsonPath.getString("data.lastResult"));
+                assertEquals("Last harvest not reported a success (took "+i+" seconds)", "SUCCESS", responseJsonPath.getString("data.lastResult"));
                 String harvestTimeStamp = responseJsonPath.getString("data.lastHarvest");
                 assertNotNull(harvestTimeStamp); 
                 
@@ -240,7 +245,6 @@ public void testHarvestingClientRun()  throws InterruptedException {
                 // ok, it looks like the harvest has completed successfully.
                 break;
             }
-            Thread.sleep(1000L);
         } while (i<maxWait); 
         
         System.out.println("Waited " + i + " seconds for the harvest to complete.");
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
index ccc0629bb69..94a8e373848 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
@@ -135,6 +135,7 @@ private String jsonForEditSpec(String name, String def, String desc) {
     }
 
     private XmlPath validateOaiVerbResponse(Response oaiResponse, String verb) {
+        logger.info(verb+" response: "+oaiResponse.prettyPrint());
         // confirm that the response is in fact XML:
         XmlPath responseXmlPath = oaiResponse.getBody().xmlPath();
         assertNotNull(responseXmlPath);

From 5fe0de280b54a8125a4030405f2eb8c151be8d97 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Wed, 14 Dec 2022 14:05:17 -0500
Subject: [PATCH 121/173] add note of this upload option in user guide

---
 doc/sphinx-guides/source/user/dataset-management.rst | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/doc/sphinx-guides/source/user/dataset-management.rst b/doc/sphinx-guides/source/user/dataset-management.rst
index ec3bb392ce5..c8c4cfe1296 100755
--- a/doc/sphinx-guides/source/user/dataset-management.rst
+++ b/doc/sphinx-guides/source/user/dataset-management.rst
@@ -93,6 +93,11 @@ Dropbox Upload
 
 Some Dataverse installations support the ability to upload files directly from Dropbox. To do so, click the "Upload from Dropbox" button, log in to Dropbox in the pop-up window, and select the files you'd like to transfer over.
 
+Folder Upload
+-------------
+
+Some Dataverse installations support the ability to upload some/all files from a local folder and subfolders. To do this, click the "Upload from Folder" button, select the folder you wish to upload, select/unselect specific files, and click 'Start Uploads'. More detailed instructions are available in the `DVWebloader wiki <https://github.com/gdcc/dvwebloader/wiki#use>`_.
+
 .. _rsync_upload:
 
 rsync + SSH Upload
@@ -268,7 +273,7 @@ After you :ref:`upload your files <dataset-file-upload>`, you can apply a "Workf
 |cw-image4|
 
 How to Describe Your Computational Workflow
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 The Dataverse installation you are using may have enabled Computational Workflow metadata fields for your use. If so, when :ref:`editing your dataset metadata <adding-new-dataset>`, you will see the fields described below.
 
@@ -517,8 +522,8 @@ A **Provenance Description** allows you to add more provenance information in ad
 You can return to attach provenance to your data file later on by clicking the "Add + Edit Metadata" button on the file page, and then clicking the "Edit -> Provenance" button.
 
 ..	COMMENTED OUT UNTIL PROV TAB IS ADDED: 
-..	You can also attach provenance to your data file later on by clicking the "Add Provenance" button on the file page, under the Provenance tab: 
-..
+..	You can also attach provenance to your data file later on by clicking the "Add Provenance" button on the file page, under the Provenance tab:
+................................................................................................................................................
 ..	**(Insert screenshot of Provenance Tab's "Add Provenance button" here, once that functionality is developed)**
 
 .. COMMENTED OUT UNTIL PROV GRAPH IS ADDED: 

From 25526bc04502c24858c3cf1d549778fa2f6aa061 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Wed, 14 Dec 2022 14:08:11 -0500
Subject: [PATCH 122/173] release note

---
 doc/release-notes/9096-folder-upload.md | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 doc/release-notes/9096-folder-upload.md

diff --git a/doc/release-notes/9096-folder-upload.md b/doc/release-notes/9096-folder-upload.md
new file mode 100644
index 00000000000..44c3f8973bc
--- /dev/null
+++ b/doc/release-notes/9096-folder-upload.md
@@ -0,0 +1 @@
+Dataverse can now support upload of an entire folder tree of files and retain the relative paths of files as directory path metadata for the uploaded files.

From fa711a4c17535285b302099164674e6a2f490244 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Wed, 14 Dec 2022 14:14:35 -0500
Subject: [PATCH 123/173] revert auto change

---
 doc/sphinx-guides/source/user/dataset-management.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/user/dataset-management.rst b/doc/sphinx-guides/source/user/dataset-management.rst
index c8c4cfe1296..53ca0740aeb 100755
--- a/doc/sphinx-guides/source/user/dataset-management.rst
+++ b/doc/sphinx-guides/source/user/dataset-management.rst
@@ -523,7 +523,7 @@ You can return to attach provenance to your data file later on by clicking the "
 
 ..	COMMENTED OUT UNTIL PROV TAB IS ADDED: 
 ..	You can also attach provenance to your data file later on by clicking the "Add Provenance" button on the file page, under the Provenance tab:
-................................................................................................................................................
+.. 
 ..	**(Insert screenshot of Provenance Tab's "Add Provenance button" here, once that functionality is developed)**
 
 .. COMMENTED OUT UNTIL PROV GRAPH IS ADDED: 

From 05cc6bf39621f410555a3223f9bbb94bbc8b2045 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Wed, 14 Dec 2022 14:17:50 -0500
Subject: [PATCH 124/173] revert to original source

---
 doc/sphinx-guides/source/user/dataset-management.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/sphinx-guides/source/user/dataset-management.rst b/doc/sphinx-guides/source/user/dataset-management.rst
index 53ca0740aeb..685e3e2e1b4 100755
--- a/doc/sphinx-guides/source/user/dataset-management.rst
+++ b/doc/sphinx-guides/source/user/dataset-management.rst
@@ -522,8 +522,8 @@ A **Provenance Description** allows you to add more provenance information in ad
 You can return to attach provenance to your data file later on by clicking the "Add + Edit Metadata" button on the file page, and then clicking the "Edit -> Provenance" button.
 
 ..	COMMENTED OUT UNTIL PROV TAB IS ADDED: 
-..	You can also attach provenance to your data file later on by clicking the "Add Provenance" button on the file page, under the Provenance tab:
-.. 
+..	You can also attach provenance to your data file later on by clicking the "Add Provenance" button on the file page, under the Provenance tab: 
+..
 ..	**(Insert screenshot of Provenance Tab's "Add Provenance button" here, once that functionality is developed)**
 
 .. COMMENTED OUT UNTIL PROV GRAPH IS ADDED: 

From 438b86cf7a04e3be44c84883f2842680bd98450a Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Wed, 14 Dec 2022 15:33:41 -0500
Subject: [PATCH 125/173] extract NcML earlier, from temp file during upload
 #9153

---
 .../harvard/iq/dataverse/AuxiliaryFile.java   |  5 +-
 .../dataverse/AuxiliaryFileServiceBean.java   | 23 ++++-
 .../edu/harvard/iq/dataverse/DatasetPage.java |  1 -
 .../iq/dataverse/EditDatafilesPage.java       |  1 -
 .../datadeposit/MediaResourceManagerImpl.java |  1 -
 .../datasetutility/AddReplaceFileHelper.java  |  2 -
 .../dataverse/ingest/IngestServiceBean.java   | 93 ++++++++-----------
 src/main/java/propertyFiles/Bundle.properties |  1 +
 .../harvard/iq/dataverse/api/NetcdfIT.java    |  2 +-
 9 files changed, 65 insertions(+), 64 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java
index a7a89934f47..344032ef5e3 100644
--- a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java
+++ b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFile.java
@@ -55,7 +55,10 @@ public class AuxiliaryFile implements Serializable {
     private String formatTag;
     
     private String formatVersion;
-    
+
+    /**
+     * The application/entity that created the auxiliary file.
+     */
     private String origin;
     
     private boolean isPublic;
diff --git a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java
index 76c91382868..05f3e209632 100644
--- a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java
@@ -70,9 +70,13 @@ public AuxiliaryFile save(AuxiliaryFile auxiliaryFile) {
      * @param type how to group the files such as "DP" for "Differentially
      * @param mediaType user supplied content type (MIME type)
      * Private Statistics".
-     * @return success boolean - returns whether the save was successful
+     * @param save boolean - true to save immediately, false to let the cascade
+     * do persist to the database.
+     * @return an AuxiliaryFile with an id when save=true (assuming no
+     * exceptions) or an AuxiliaryFile without an id that will be persisted
+     * later through the cascade.
      */
-    public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile dataFile, String formatTag, String formatVersion, String origin, boolean isPublic, String type, MediaType mediaType) {
+    public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile dataFile, String formatTag, String formatVersion, String origin, boolean isPublic, String type, MediaType mediaType, boolean save) {
 
         StorageIO<DataFile> storageIO = null;
         AuxiliaryFile auxFile = new AuxiliaryFile();
@@ -114,7 +118,14 @@ public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile
             auxFile.setType(type);
             auxFile.setDataFile(dataFile);
             auxFile.setFileSize(storageIO.getAuxObjectSize(auxExtension));
-            auxFile = save(auxFile);
+            if (save) {
+                auxFile = save(auxFile);
+            } else {
+                if (dataFile.getAuxiliaryFiles() == null) {
+                    dataFile.setAuxiliaryFiles(new ArrayList<>());
+                }
+                dataFile.getAuxiliaryFiles().add(auxFile);
+            }
         } catch (IOException ioex) {
             logger.severe("IO Exception trying to save auxiliary file: " + ioex.getMessage());
             throw new InternalServerErrorException();
@@ -129,7 +140,11 @@ public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile
         }
         return auxFile;
     }
-    
+
+    public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile dataFile, String formatTag, String formatVersion, String origin, boolean isPublic, String type, MediaType mediaType) {
+        return processAuxiliaryFile(fileInputStream, dataFile, formatTag, formatVersion, origin, isPublic, type, mediaType, true);
+    }
+
     public AuxiliaryFile lookupAuxiliaryFile(DataFile dataFile, String formatTag, String formatVersion) {
         
         Query query = em.createNamedQuery("AuxiliaryFile.lookupAuxiliaryFile");
diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
index b538aaca2c6..6e71f6c5042 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
@@ -3733,7 +3733,6 @@ public String save() {
         // Call Ingest Service one more time, to
         // queue the data ingest jobs for asynchronous execution:
         ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) session.getUser());
-        ingestService.extractMetadata(dataset, (AuthenticatedUser) session.getUser());
 
         //After dataset saved, then persist prov json data
         if(systemConfig.isProvCollectionEnabled()) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java
index d045126a3aa..fc8df8681af 100644
--- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java
@@ -1225,7 +1225,6 @@ public String save() {
         // queue the data ingest jobs for asynchronous execution:
         if (mode == FileEditMode.UPLOAD) {
             ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) session.getUser());
-            ingestService.extractMetadata(dataset, (AuthenticatedUser) session.getUser());
         }
 
         if (FileEditMode.EDIT == mode && Referrer.FILE == referrer && fileMetadatas.size() > 0) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java
index e8d25bb4148..5491024c73c 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java
@@ -373,7 +373,6 @@ DepositReceipt replaceOrAddFiles(String uri, Deposit deposit, AuthCredentials au
             }
 
             ingestService.startIngestJobsForDataset(dataset, user);
-            ingestService.extractMetadata(dataset, user);
 
             ReceiptGenerator receiptGenerator = new ReceiptGenerator();
             String baseUrl = urlManager.getHostnamePlusBaseUrlPath(uri);
diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
index 5277d014430..febbb249a91 100644
--- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
+++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
@@ -1932,7 +1932,6 @@ private boolean step_100_startIngestJobs(){
             // start the ingest!
             ingestService.startIngestJobsForDataset(dataset, dvRequest.getAuthenticatedUser());
             msg("post ingest start");
-            ingestService.extractMetadata(dataset, dvRequest.getAuthenticatedUser());
         }
         return true;
     }
@@ -2146,7 +2145,6 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) {
                 }
                 //ingest job
                 ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) authUser);
-                ingestService.extractMetadata(dataset, (AuthenticatedUser) authUser);
 
             }
         }
diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
index e261efce642..b5934c1167f 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
@@ -241,6 +241,45 @@ public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
 						savedSuccess = true;
 						logger.fine("Success: permanently saved file " + dataFile.getFileMetadata().getLabel());
 
+                                            // TODO: reformat this file to remove the many tabs added in cc08330
+                                            InputStream inputStream = null;
+                                            if (tempLocationPath != null) {
+                                                try ( NetcdfFile netcdfFile = NetcdfFiles.open(tempLocationPath.toString())) {
+                                                    if (netcdfFile != null) {
+                                                        // For now, empty string. What should we pass as a URL to toNcml()? The filename (including the path) most commonly at https://docs.unidata.ucar.edu/netcdf-java/current/userguide/ncml_cookbook.html
+                                                        // With an empty string the XML will show 'location="file:"'.
+                                                        String ncml = netcdfFile.toNcml("");
+                                                        inputStream = new ByteArrayInputStream(ncml.getBytes(StandardCharsets.UTF_8));
+                                                    } else {
+                                                        logger.info("NetcdfFiles.open() could open file id " + dataFile.getId() + " (null returned).");
+                                                    }
+                                                } catch (IOException ex) {
+                                                    logger.info("NetcdfFiles.open() could open file id " + dataFile.getId() + ". Exception caught: " + ex);
+                                                }
+                                            } else {
+                                                logger.info("tempLocationPath is null for file id " + dataFile.getId() + ". Can't extract NcML.");
+                                            }
+                                            if (inputStream != null) {
+                                                // If you change NcML, you must also change the previewer.
+                                                String formatTag = "NcML";
+                                                // 0.1 is arbitrary. It's our first attempt to put out NcML so we're giving it a low number.
+                                                // If you bump the number here, be sure the bump the number in the previewer as well.
+                                                // We could use 2.2 here since that's the current version of NcML.
+                                                String formatVersion = "0.1";
+                                                String origin = "netcdf-java";
+                                                boolean isPublic = true;
+                                                // See also file.auxfiles.types.NcML in Bundle.properties. Used to group aux files in UI.
+                                                String type = "NcML";
+                                                // XML because NcML doesn't have its own MIME/content type at https://www.iana.org/assignments/media-types/media-types.xhtml
+                                                MediaType mediaType = new MediaType("text", "xml");
+                                                try {
+                                                    AuxiliaryFile auxFile = auxiliaryFileService.processAuxiliaryFile(inputStream, dataFile, formatTag, formatVersion, origin, isPublic, type, mediaType, false);
+                                                    logger.fine ("Aux file extracted from NetCDF/HDF5 file saved to storage (but not to the database yet) from file id  " + dataFile.getId());
+                                                } catch (Exception ex) {
+                                                    logger.info("exception throw calling processAuxiliaryFile: " + ex);
+                                                }
+                                            }
+
 					} catch (IOException ioex) {
                     logger.warning("Failed to save the file, storage id " + dataFile.getStorageIdentifier() + " (" + ioex.getMessage() + ")");
 					} finally {
@@ -302,6 +341,7 @@ public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
 					// Any necessary post-processing:
 					// performPostProcessingTasks(dataFile);
 				} else {
+                                    System.out.println("driver is not tmp");
 					try {
 						StorageIO<DvObject> dataAccess = DataAccess.getStorageIO(dataFile);
 						//Populate metadata
@@ -575,58 +615,6 @@ public int compare(DataFile d1, DataFile d2) {
         return sb.toString();
     }
 
-    // Note: There is another method called extractMetadata for FITS files.
-    public void extractMetadata(Dataset dataset, AuthenticatedUser user) {
-        for (DataFile dataFile : dataset.getFiles()) {
-            Path pathToLocalDataFile = null;
-            try {
-                pathToLocalDataFile = dataFile.getStorageIO().getFileSystemPath();
-            } catch (IOException ex) {
-                logger.info("Exception calling dataAccess.getFileSystemPath: " + ex);
-            }
-            InputStream inputStream = null;
-            if (pathToLocalDataFile != null) {
-                try ( NetcdfFile netcdfFile = NetcdfFiles.open(pathToLocalDataFile.toString())) {
-                    if (netcdfFile != null) {
-                        // TODO: What should we pass as a URL to toNcml()?
-                        String ncml = netcdfFile.toNcml("FIXME_URL");
-                        inputStream = new ByteArrayInputStream(ncml.getBytes(StandardCharsets.UTF_8));
-                    } else {
-                        logger.info("NetcdfFiles.open() could open file id " + dataFile.getId() + " (null returned).");
-                    }
-                } catch (IOException ex) {
-                    logger.info("NetcdfFiles.open() could open file id " + dataFile.getId() + ". Exception caught: " + ex);
-                }
-            } else {
-                logger.info("pathToLocalDataFile is null! Are you on S3? Metadata extraction from NetCDF/HDF5 is not yet available.");
-                // As a tabular file, we'll probably need to download the NetCDF/HDF5 files from S3 and then try to extra the metadata,
-                // unless we can get some sort of S3 interface working:
-                // https://docs.unidata.ucar.edu/netcdf-java/current/userguide/dataset_urls.html#object-stores
-                // If we need to download the file and extract only some of the bytes (hopefully the first bytes) here's the spec for NetCDF:
-                // https://docs.unidata.ucar.edu/netcdf-c/current/file_format_specifications.html
-            }
-            if (inputStream != null) {
-                // TODO: What should the tag be?
-                String formatTag = "ncml";
-                // TODO: What should the version be?
-                String formatVersion = "0.1";
-                // TODO: What should the origin be?
-                String origin = "myOrigin";
-                boolean isPublic = true;
-                // TODO: What should the type be?
-                String type = "myType";
-                // TODO: Does NcML have its own content type? (MIME type)
-                MediaType mediaType = new MediaType("text", "xml");
-                try {
-                    AuxiliaryFile auxFile = auxiliaryFileService.processAuxiliaryFile(inputStream, dataFile, formatTag, formatVersion, origin, isPublic, type, mediaType);
-                    logger.info("Aux file extracted from NetCDF/HDF5 file saved: " + auxFile);
-                } catch (Exception ex) {
-                    logger.info("exception throw calling processAuxiliaryFile: " + ex);
-                }
-            }
-        }
-    }
-
     public void produceSummaryStatistics(DataFile dataFile, File generatedTabularFile) throws IOException {
         /*
         logger.info("Skipping summary statistics and UNF.");
@@ -1220,7 +1208,6 @@ public boolean fileMetadataExtractable(DataFile dataFile) {
      * extractMetadata: 
      * framework for extracting metadata from uploaded files. The results will 
      * be used to populate the metadata of the Dataset to which the file belongs. 
-     * Note that another method called extractMetadata creates aux files from data files.
     */
     public boolean extractMetadata(String tempFileLocation, DataFile dataFile, DatasetVersion editVersion) throws IOException {
         boolean ingestSuccessful = false;
diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties
index b19e80020ba..0ec81cb7d6b 100644
--- a/src/main/java/propertyFiles/Bundle.properties
+++ b/src/main/java/propertyFiles/Bundle.properties
@@ -2007,6 +2007,7 @@ file.remotelyStored=This file is stored remotely - click for more info
 file.auxfiles.download.header=Download Auxiliary Files
 # These types correspond to the AuxiliaryFile.Type enum.
 file.auxfiles.types.DP=Differentially Private Statistics
+file.auxfiles.types.NcML=XML from NetCDF/HDF5 (NcML)
 # Add more types here
 file.auxfiles.unspecifiedTypes=Other Auxiliary Files
 
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java b/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java
index a83af514935..74179b98833 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java
@@ -45,7 +45,7 @@ public void testNmclFromNetcdf() throws IOException {
         uploadFile.then().assertThat().statusCode(OK.getStatusCode());
 
         long fileId = JsonPath.from(uploadFile.body().asString()).getLong("data.files[0].dataFile.id");
-        String tag = "ncml";
+        String tag = "NcML";
         String version = "0.1";
 
         Response downloadNcml = UtilIT.downloadAuxFile(fileId, tag, version, apiToken);

From 0e00766eec1d3cb043c6863b10856be877eb6da7 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Wed, 14 Dec 2022 15:59:46 -0500
Subject: [PATCH 126/173] add NetcdfIT to list of tests #9153

---
 tests/integration-tests.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration-tests.txt b/tests/integration-tests.txt
index 6e6668d45af..1e9110be2de 100644
--- a/tests/integration-tests.txt
+++ b/tests/integration-tests.txt
@@ -1 +1 @@
-DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,HarvestingClientsIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT,DownloadFilesIT,LinkIT,DeleteUsersIT,DeactivateUsersIT,AuxiliaryFilesIT,InvalidCharactersIT,LicensesIT,NotificationsIT,BagIT,MetadataBlocksIT
+DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,HarvestingClientsIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT,DownloadFilesIT,LinkIT,DeleteUsersIT,DeactivateUsersIT,AuxiliaryFilesIT,InvalidCharactersIT,LicensesIT,NotificationsIT,BagIT,MetadataBlocksIT,NetcdfIT

From 19ee864d7c5b037433e5056854e8c4385242c89e Mon Sep 17 00:00:00 2001
From: GPortas <hey@gportas.me>
Date: Thu, 15 Dec 2022 17:29:21 +0000
Subject: [PATCH 127/173] Added: Bash script for keycloak container setup and
 json config for realm initialization

---
 conf/docker-keycloak/oidc-realm.json | 2108 ++++++++++++++++++++++++++
 conf/docker-keycloak/run-keycloak.sh |   36 +
 2 files changed, 2144 insertions(+)
 create mode 100644 conf/docker-keycloak/oidc-realm.json
 create mode 100755 conf/docker-keycloak/run-keycloak.sh

diff --git a/conf/docker-keycloak/oidc-realm.json b/conf/docker-keycloak/oidc-realm.json
new file mode 100644
index 00000000000..199ba4bf96f
--- /dev/null
+++ b/conf/docker-keycloak/oidc-realm.json
@@ -0,0 +1,2108 @@
+{
+  "id": "oidc-realm",
+  "realm": "oidc-realm",
+  "notBefore": 0,
+  "defaultSignatureAlgorithm": "RS256",
+  "revokeRefreshToken": false,
+  "refreshTokenMaxReuse": 0,
+  "accessTokenLifespan": 300,
+  "accessTokenLifespanForImplicitFlow": 900,
+  "ssoSessionIdleTimeout": 1800,
+  "ssoSessionMaxLifespan": 36000,
+  "ssoSessionIdleTimeoutRememberMe": 0,
+  "ssoSessionMaxLifespanRememberMe": 0,
+  "offlineSessionIdleTimeout": 2592000,
+  "offlineSessionMaxLifespanEnabled": false,
+  "offlineSessionMaxLifespan": 5184000,
+  "clientSessionIdleTimeout": 0,
+  "clientSessionMaxLifespan": 0,
+  "clientOfflineSessionIdleTimeout": 0,
+  "clientOfflineSessionMaxLifespan": 0,
+  "accessCodeLifespan": 60,
+  "accessCodeLifespanUserAction": 300,
+  "accessCodeLifespanLogin": 1800,
+  "actionTokenGeneratedByAdminLifespan": 43200,
+  "actionTokenGeneratedByUserLifespan": 300,
+  "oauth2DeviceCodeLifespan": 600,
+  "oauth2DevicePollingInterval": 5,
+  "enabled": true,
+  "sslRequired": "external",
+  "registrationAllowed": false,
+  "registrationEmailAsUsername": false,
+  "rememberMe": false,
+  "verifyEmail": false,
+  "loginWithEmailAllowed": true,
+  "duplicateEmailsAllowed": false,
+  "resetPasswordAllowed": false,
+  "editUsernameAllowed": false,
+  "bruteForceProtected": false,
+  "permanentLockout": false,
+  "maxFailureWaitSeconds": 900,
+  "minimumQuickLoginWaitSeconds": 60,
+  "waitIncrementSeconds": 60,
+  "quickLoginCheckMilliSeconds": 1000,
+  "maxDeltaTimeSeconds": 43200,
+  "failureFactor": 30,
+  "roles": {
+    "realm": [
+      {
+        "id": "13d76240-fcf8-4361-9dbf-de268717cfb2",
+        "name": "uma_authorization",
+        "description": "${role_uma_authorization}",
+        "composite": false,
+        "clientRole": false,
+        "containerId": "oidc-realm",
+        "attributes": {}
+      },
+      {
+        "id": "88b414c4-3516-4486-8f8b-a811ed0e0ce5",
+        "name": "default-roles-oidc-realm",
+        "description": "${role_default-roles}",
+        "composite": true,
+        "composites": {
+          "realm": [
+            "offline_access",
+            "uma_authorization"
+          ]
+        },
+        "clientRole": false,
+        "containerId": "oidc-realm",
+        "attributes": {}
+      },
+      {
+        "id": "b907fd4e-0e54-461c-9411-3f736eef7d2f",
+        "name": "offline_access",
+        "description": "${role_offline-access}",
+        "composite": false,
+        "clientRole": false,
+        "containerId": "oidc-realm",
+        "attributes": {}
+      }
+    ],
+    "client": {
+      "realm-management": [
+        {
+          "id": "39342ea9-0b4e-4841-8996-433759e9297f",
+          "name": "create-client",
+          "description": "${role_create-client}",
+          "composite": false,
+          "clientRole": true,
+          "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5",
+          "attributes": {}
+        },
+        {
+          "id": "f8680034-617d-45d3-9801-7bf0d704c549",
+          "name": "manage-users",
+          "description": "${role_manage-users}",
+          "composite": false,
+          "clientRole": true,
+          "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5",
+          "attributes": {}
+        },
+        {
+          "id": "b08e4cc3-71e2-4395-b66b-fb1277b48b88",
+          "name": "manage-realm",
+          "description": "${role_manage-realm}",
+          "composite": false,
+          "clientRole": true,
+          "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5",
+          "attributes": {}
+        },
+        {
+          "id": "c15dc407-d012-43af-9a21-a2923e1d7b74",
+          "name": "manage-events",
+          "description": "${role_manage-events}",
+          "composite": false,
+          "clientRole": true,
+          "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5",
+          "attributes": {}
+        },
+        {
+          "id": "66c07cb7-42cd-4155-8485-6cc7bd37cba9",
+          "name": "view-realm",
+          "description": "${role_view-realm}",
+          "composite": false,
+          "clientRole": true,
+          "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5",
+          "attributes": {}
+        },
+        {
+          "id": "0419515f-4ab8-43ca-ac69-e842195813c0",
+          "name": "view-events",
+          "description": "${role_view-events}",
+          "composite": false,
+          "clientRole": true,
+          "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5",
+          "attributes": {}
+        },
+        {
+          "id": "aa553d5a-b2dc-4f81-979a-2af0a019fee0",
+          "name": "impersonation",
+          "description": "${role_impersonation}",
+          "composite": false,
+          "clientRole": true,
+          "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5",
+          "attributes": {}
+        },
+        {
+          "id": "9567e1e9-b755-43a8-93ed-d5929391316f",
+          "name": "manage-clients",
+          "description": "${role_manage-clients}",
+          "composite": false,
+          "clientRole": true,
+          "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5",
+          "attributes": {}
+        },
+        {
+          "id": "e3dab69f-7323-4aad-bf98-8b7697f36d57",
+          "name": "query-users",
+          "description": "${role_query-users}",
+          "composite": false,
+          "clientRole": true,
+          "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5",
+          "attributes": {}
+        },
+        {
+          "id": "ee8a4855-d0d5-4261-bdba-b419d304a824",
+          "name": "query-groups",
+          "description": "${role_query-groups}",
+          "composite": false,
+          "clientRole": true,
+          "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5",
+          "attributes": {}
+        },
+        {
+          "id": "4f251212-e922-4ac0-9cce-3ada607648d2",
+          "name": "view-identity-providers",
+          "description": "${role_view-identity-providers}",
+          "composite": false,
+          "clientRole": true,
+          "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5",
+          "attributes": {}
+        },
+        {
+          "id": "34e1dc59-a975-424f-887b-52465e184a4b",
+          "name": "realm-admin",
+          "description": "${role_realm-admin}",
+          "composite": true,
+          "composites": {
+            "client": {
+              "realm-management": [
+                "create-client",
+                "manage-users",
+                "manage-realm",
+                "manage-events",
+                "view-realm",
+                "view-events",
+                "impersonation",
+                "manage-clients",
+                "query-users",
+                "view-identity-providers",
+                "query-groups",
+                "view-clients",
+                "view-users",
+                "manage-authorization",
+                "manage-identity-providers",
+                "query-realms",
+                "query-clients",
+                "view-authorization"
+              ]
+            }
+          },
+          "clientRole": true,
+          "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5",
+          "attributes": {}
+        },
+        {
+          "id": "d35aca04-0182-40d3-96b8-1ce5cc118729",
+          "name": "view-clients",
+          "description": "${role_view-clients}",
+          "composite": true,
+          "composites": {
+            "client": {
+              "realm-management": [
+                "query-clients"
+              ]
+            }
+          },
+          "clientRole": true,
+          "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5",
+          "attributes": {}
+        },
+        {
+          "id": "7d3b28d5-471a-4b2b-bc80-56d4ff80fd28",
+          "name": "view-users",
+          "description": "${role_view-users}",
+          "composite": true,
+          "composites": {
+            "client": {
+              "realm-management": [
+                "query-users",
+                "query-groups"
+              ]
+            }
+          },
+          "clientRole": true,
+          "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5",
+          "attributes": {}
+        },
+        {
+          "id": "651059eb-fc1a-4f8d-9ced-ed28b0a2f965",
+          "name": "manage-authorization",
+          "description": "${role_manage-authorization}",
+          "composite": false,
+          "clientRole": true,
+          "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5",
+          "attributes": {}
+        },
+        {
+          "id": "73f447e9-def8-4214-8516-56571f2c6f65",
+          "name": "manage-identity-providers",
+          "description": "${role_manage-identity-providers}",
+          "composite": false,
+          "clientRole": true,
+          "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5",
+          "attributes": {}
+        },
+        {
+          "id": "1b5f7c39-885e-4246-8cf5-25769544fc3d",
+          "name": "query-realms",
+          "description": "${role_query-realms}",
+          "composite": false,
+          "clientRole": true,
+          "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5",
+          "attributes": {}
+        },
+        {
+          "id": "350da4c1-69d4-4557-a9a8-8ba760db0225",
+          "name": "query-clients",
+          "description": "${role_query-clients}",
+          "composite": false,
+          "clientRole": true,
+          "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5",
+          "attributes": {}
+        },
+        {
+          "id": "43d51082-6922-4765-8022-529d91a4603f",
+          "name": "view-authorization",
+          "description": "${role_view-authorization}",
+          "composite": false,
+          "clientRole": true,
+          "containerId": "43ffb712-f233-48e2-ae79-d6993bac34a5",
+          "attributes": {}
+        }
+      ],
+      "security-admin-console": [],
+      "admin-cli": [],
+      "account-console": [],
+      "broker": [],
+      "oidc-client": [],
+      "account": [
+        {
+          "id": "a163535c-71de-4b2d-9530-26b25eeb1c1e",
+          "name": "delete-account",
+          "description": "${role_delete-account}",
+          "composite": false,
+          "clientRole": true,
+          "containerId": "aed2e103-ee29-4d5c-a34e-1b8c65b7d537",
+          "attributes": {}
+        },
+        {
+          "id": "851c6a9f-bce7-4c70-be82-084c25d61b25",
+          "name": "manage-account",
+          "composite": false,
+          "clientRole": true,
+          "containerId": "aed2e103-ee29-4d5c-a34e-1b8c65b7d537",
+          "attributes": {}
+        }
+      ]
+    }
+  },
+  "groups": [],
+  "defaultRole": {
+    "id": "88b414c4-3516-4486-8f8b-a811ed0e0ce5",
+    "name": "default-roles-oidc-realm",
+    "description": "${role_default-roles}",
+    "composite": true,
+    "clientRole": false,
+    "containerId": "oidc-realm"
+  },
+  "requiredCredentials": [
+    "password"
+  ],
+  "otpPolicyType": "totp",
+  "otpPolicyAlgorithm": "HmacSHA1",
+  "otpPolicyInitialCounter": 0,
+  "otpPolicyDigits": 6,
+  "otpPolicyLookAheadWindow": 1,
+  "otpPolicyPeriod": 30,
+  "otpSupportedApplications": [
+    "FreeOTP",
+    "Google Authenticator"
+  ],
+  "webAuthnPolicyRpEntityName": "keycloak",
+  "webAuthnPolicySignatureAlgorithms": [
+    "ES256"
+  ],
+  "webAuthnPolicyRpId": "",
+  "webAuthnPolicyAttestationConveyancePreference": "not specified",
+  "webAuthnPolicyAuthenticatorAttachment": "not specified",
+  "webAuthnPolicyRequireResidentKey": "not specified",
+  "webAuthnPolicyUserVerificationRequirement": "not specified",
+  "webAuthnPolicyCreateTimeout": 0,
+  "webAuthnPolicyAvoidSameAuthenticatorRegister": false,
+  "webAuthnPolicyAcceptableAaguids": [],
+  "webAuthnPolicyPasswordlessRpEntityName": "keycloak",
+  "webAuthnPolicyPasswordlessSignatureAlgorithms": [
+    "ES256"
+  ],
+  "webAuthnPolicyPasswordlessRpId": "",
+  "webAuthnPolicyPasswordlessAttestationConveyancePreference": "not specified",
+  "webAuthnPolicyPasswordlessAuthenticatorAttachment": "not specified",
+  "webAuthnPolicyPasswordlessRequireResidentKey": "not specified",
+  "webAuthnPolicyPasswordlessUserVerificationRequirement": "not specified",
+  "webAuthnPolicyPasswordlessCreateTimeout": 0,
+  "webAuthnPolicyPasswordlessAvoidSameAuthenticatorRegister": false,
+  "webAuthnPolicyPasswordlessAcceptableAaguids": [],
+  "users": [
+    {
+      "username": "keycloakuser",
+      "enabled": true,
+      "totp": false,
+      "emailVerified": true,
+      "firstName": "Test",
+      "lastName": "Test",
+      "email": "test@test.com",
+      "credentials": [
+        {
+          "type": "keycloakuserpassword",
+          "value": "test"
+        }
+      ]
+    }
+  ],
+  "scopeMappings": [
+    {
+      "clientScope": "offline_access",
+      "roles": [
+        "offline_access"
+      ]
+    }
+  ],
+  "clientScopeMappings": {
+    "account": [
+      {
+        "client": "account-console",
+        "roles": [
+          "manage-account"
+        ]
+      }
+    ]
+  },
+  "clients": [
+    {
+      "id": "aed2e103-ee29-4d5c-a34e-1b8c65b7d537",
+      "clientId": "account",
+      "name": "${client_account}",
+      "rootUrl": "${authBaseUrl}",
+      "baseUrl": "/realms/oidc-realm/account/",
+      "surrogateAuthRequired": false,
+      "enabled": true,
+      "alwaysDisplayInConsole": false,
+      "clientAuthenticatorType": "client-secret",
+      "redirectUris": [
+        "/realms/oidc-realm/account/*"
+      ],
+      "webOrigins": [],
+      "notBefore": 0,
+      "bearerOnly": false,
+      "consentRequired": false,
+      "standardFlowEnabled": true,
+      "implicitFlowEnabled": false,
+      "directAccessGrantsEnabled": false,
+      "serviceAccountsEnabled": false,
+      "publicClient": true,
+      "frontchannelLogout": false,
+      "protocol": "openid-connect",
+      "attributes": {},
+      "authenticationFlowBindingOverrides": {},
+      "fullScopeAllowed": false,
+      "nodeReRegistrationTimeout": 0,
+      "defaultClientScopes": [
+        "web-origins",
+        "roles",
+        "profile",
+        "email"
+      ],
+      "optionalClientScopes": [
+        "address",
+        "phone",
+        "offline_access",
+        "microprofile-jwt"
+      ]
+    },
+    {
+      "id": "1e821c0e-f6b9-4324-9b23-e82b5431fb72",
+      "clientId": "account-console",
+      "name": "${client_account-console}",
+      "rootUrl": "${authBaseUrl}",
+      "baseUrl": "/realms/oidc-realm/account/",
+      "surrogateAuthRequired": false,
+      "enabled": true,
+      "alwaysDisplayInConsole": false,
+      "clientAuthenticatorType": "client-secret",
+      "redirectUris": [
+        "/realms/oidc-realm/account/*"
+      ],
+      "webOrigins": [],
+      "notBefore": 0,
+      "bearerOnly": false,
+      "consentRequired": false,
+      "standardFlowEnabled": true,
+      "implicitFlowEnabled": false,
+      "directAccessGrantsEnabled": false,
+      "serviceAccountsEnabled": false,
+      "publicClient": true,
+      "frontchannelLogout": false,
+      "protocol": "openid-connect",
+      "attributes": {
+        "pkce.code.challenge.method": "S256"
+      },
+      "authenticationFlowBindingOverrides": {},
+      "fullScopeAllowed": false,
+      "nodeReRegistrationTimeout": 0,
+      "protocolMappers": [
+        {
+          "id": "397616ab-4124-4a13-92b6-317423e818a3",
+          "name": "audience resolve",
+          "protocol": "openid-connect",
+          "protocolMapper": "oidc-audience-resolve-mapper",
+          "consentRequired": false,
+          "config": {}
+        }
+      ],
+      "defaultClientScopes": [
+        "web-origins",
+        "roles",
+        "profile",
+        "email"
+      ],
+      "optionalClientScopes": [
+        "address",
+        "phone",
+        "offline_access",
+        "microprofile-jwt"
+      ]
+    },
+    {
+      "id": "dddcc3e0-d742-422b-8b5f-84a292ea9d66",
+      "clientId": "admin-cli",
+      "name": "${client_admin-cli}",
+      "surrogateAuthRequired": false,
+      "enabled": true,
+      "alwaysDisplayInConsole": false,
+      "clientAuthenticatorType": "client-secret",
+      "redirectUris": [],
+      "webOrigins": [],
+      "notBefore": 0,
+      "bearerOnly": false,
+      "consentRequired": false,
+      "standardFlowEnabled": false,
+      "implicitFlowEnabled": false,
+      "directAccessGrantsEnabled": true,
+      "serviceAccountsEnabled": false,
+      "publicClient": true,
+      "frontchannelLogout": false,
+      "protocol": "openid-connect",
+      "attributes": {},
+      "authenticationFlowBindingOverrides": {},
+      "fullScopeAllowed": false,
+      "nodeReRegistrationTimeout": 0,
+      "defaultClientScopes": [
+        "web-origins",
+        "roles",
+        "profile",
+        "email"
+      ],
+      "optionalClientScopes": [
+        "address",
+        "phone",
+        "offline_access",
+        "microprofile-jwt"
+      ]
+    },
+    {
+      "id": "df6f6cd0-a046-492f-84ac-b4fe31909be4",
+      "clientId": "broker",
+      "name": "${client_broker}",
+      "surrogateAuthRequired": false,
+      "enabled": true,
+      "alwaysDisplayInConsole": false,
+      "clientAuthenticatorType": "client-secret",
+      "redirectUris": [],
+      "webOrigins": [],
+      "notBefore": 0,
+      "bearerOnly": true,
+      "consentRequired": false,
+      "standardFlowEnabled": true,
+      "implicitFlowEnabled": false,
+      "directAccessGrantsEnabled": false,
+      "serviceAccountsEnabled": false,
+      "publicClient": false,
+      "frontchannelLogout": false,
+      "protocol": "openid-connect",
+      "attributes": {},
+      "authenticationFlowBindingOverrides": {},
+      "fullScopeAllowed": false,
+      "nodeReRegistrationTimeout": 0,
+      "defaultClientScopes": [
+        "web-origins",
+        "roles",
+        "profile",
+        "email"
+      ],
+      "optionalClientScopes": [
+        "address",
+        "phone",
+        "offline_access",
+        "microprofile-jwt"
+      ]
+    },
+    {
+      "id": "c0af31b9-21aa-4e70-baf3-8d68850c4081",
+      "clientId": "oidc-client",
+      "surrogateAuthRequired": false,
+      "enabled": true,
+      "alwaysDisplayInConsole": false,
+      "clientAuthenticatorType": "client-secret",
+      "redirectUris": [
+        "http://localhost:8080/*"
+      ],
+      "webOrigins": [
+        "+"
+      ],
+      "notBefore": 0,
+      "bearerOnly": false,
+      "consentRequired": false,
+      "standardFlowEnabled": true,
+      "implicitFlowEnabled": false,
+      "directAccessGrantsEnabled": false,
+      "serviceAccountsEnabled": false,
+      "publicClient": true,
+      "frontchannelLogout": false,
+      "protocol": "openid-connect",
+      "attributes": {
+        "saml.force.post.binding": "false",
+        "saml.multivalued.roles": "false",
+        "oauth2.device.authorization.grant.enabled": "false",
+        "use.jwks.url": "true",
+        "backchannel.logout.revoke.offline.tokens": "false",
+        "saml.server.signature.keyinfo.ext": "false",
+        "use.refresh.tokens": "true",
+        "jwt.credential.certificate": "MIICpTCCAY0CBgGE8V6o6TANBgkqhkiG9w0BAQsFADAWMRQwEgYDVQQDDAtvaWRjLWNsaWVudDAeFw0yMjEyMDgxMDUyMDNaFw0zMjEyMDgxMDUzNDNaMBYxFDASBgNVBAMMC29pZGMtY2xpZW50MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEArUffTl+jXWzyY3T4VVtkiGyNnY+RgyAXUzz+dxT7wUQaYSiNPvmaxnio555pWjR403SRUjVxM8eJYgHK9s43qQWdheXBIHyLKaQfjVsTtSmHgFtPmjk+kweQs6fxUi5CNvtx4RTCaOK5wV8q5q1X7mb8cZ5+gLSx1f/pHtayFXMT75nV04aZKWgPztPz8w+QXUx9cuFY4OIiTdRbdyfr1oOiDtMbxxA22tggB/HSMVkSckT3LSPj7fJKJMPFYi/g1AXxGipX/q8XkmOBrvNePCpH0F/IZbC1vXEsDC6urfoijOdiZgPMobuADmWHPiw2zgCN8qa6QuLFaI+JduXT9QIDAQABMA0GCSqGSIb3DQEBCwUAA4IBAQCEOYRHkH8DnBucb+uN5c9U/fZY+mpglxzZvby7dGBXfVwLN+eP1kGcQPaFi+nshk7FgF4mR5/cmuAPZt+YBbgP0z37D49nB7S6sniwzfhCAAplOT4vmm+MjperTDsWFUGhQZJvN/jxqP2Xccw7N//ReYi7yOlmWhwGyqQyTi0ySbE3BY5eFvUKepekybYi/15XlyF8lwS2jH1MvnJAxAMNVpVUcP4wTnq/dOw5ybrVWF0mPnA8KVzTPuPE5nzZvZ3rkXQeEJTffIToR+T/DH/KTLXcNUtx4nG0ajJ0gM6iVAXGnKlI9Viq/M5Ese+52I6rQmxTsFMn57LNzKgMpWcE",
+        "oidc.ciba.grant.enabled": "false",
+        "use.jwks.string": "false",
+        "backchannel.logout.session.required": "false",
+        "client_credentials.use_refresh_token": "false",
+        "require.pushed.authorization.requests": "false",
+        "saml.client.signature": "false",
+        "pkce.code.challenge.method": "S256",
+        "id.token.as.detached.signature": "false",
+        "saml.assertion.signature": "false",
+        "saml.encrypt": "false",
+        "saml.server.signature": "false",
+        "exclude.session.state.from.auth.response": "false",
+        "saml.artifact.binding": "false",
+        "saml_force_name_id_format": "false",
+        "tls.client.certificate.bound.access.tokens": "false",
+        "saml.authnstatement": "false",
+        "display.on.consent.screen": "false",
+        "saml.onetimeuse.condition": "false"
+      },
+      "authenticationFlowBindingOverrides": {},
+      "fullScopeAllowed": true,
+      "nodeReRegistrationTimeout": -1,
+      "defaultClientScopes": [
+        "web-origins",
+        "roles",
+        "profile",
+        "email"
+      ],
+      "optionalClientScopes": [
+        "address",
+        "phone",
+        "offline_access",
+        "microprofile-jwt"
+      ]
+    },
+    {
+      "id": "43ffb712-f233-48e2-ae79-d6993bac34a5",
+      "clientId": "realm-management",
+      "name": "${client_realm-management}",
+      "surrogateAuthRequired": false,
+      "enabled": true,
+      "alwaysDisplayInConsole": false,
+      "clientAuthenticatorType": "client-secret",
+      "redirectUris": [],
+      "webOrigins": [],
+      "notBefore": 0,
+      "bearerOnly": true,
+      "consentRequired": false,
+      "standardFlowEnabled": true,
+      "implicitFlowEnabled": false,
+      "directAccessGrantsEnabled": false,
+      "serviceAccountsEnabled": false,
+      "publicClient": false,
+      "frontchannelLogout": false,
+      "protocol": "openid-connect",
+      "attributes": {},
+      "authenticationFlowBindingOverrides": {},
+      "fullScopeAllowed": false,
+      "nodeReRegistrationTimeout": 0,
+      "defaultClientScopes": [
+        "web-origins",
+        "roles",
+        "profile",
+        "email"
+      ],
+      "optionalClientScopes": [
+        "address",
+        "phone",
+        "offline_access",
+        "microprofile-jwt"
+      ]
+    },
+    {
+      "id": "3747f98f-efbb-49ef-8238-a349bf5ab409",
+      "clientId": "security-admin-console",
+      "name": "${client_security-admin-console}",
+      "rootUrl": "${authAdminUrl}",
+      "baseUrl": "/admin/oidc-realm/console/",
+      "surrogateAuthRequired": false,
+      "enabled": true,
+      "alwaysDisplayInConsole": false,
+      "clientAuthenticatorType": "client-secret",
+      "redirectUris": [
+        "/admin/oidc-realm/console/*"
+      ],
+      "webOrigins": [
+        "+"
+      ],
+      "notBefore": 0,
+      "bearerOnly": false,
+      "consentRequired": false,
+      "standardFlowEnabled": true,
+      "implicitFlowEnabled": false,
+      "directAccessGrantsEnabled": false,
+      "serviceAccountsEnabled": false,
+      "publicClient": true,
+      "frontchannelLogout": false,
+      "protocol": "openid-connect",
+      "attributes": {
+        "pkce.code.challenge.method": "S256"
+      },
+      "authenticationFlowBindingOverrides": {},
+      "fullScopeAllowed": false,
+      "nodeReRegistrationTimeout": 0,
+      "protocolMappers": [
+        {
+          "id": "2fbdf6c9-ee69-4edc-b780-ec62aecfc519",
+          "name": "locale",
+          "protocol": "openid-connect",
+          "protocolMapper": "oidc-usermodel-attribute-mapper",
+          "consentRequired": false,
+          "config": {
+            "userinfo.token.claim": "true",
+            "user.attribute": "locale",
+            "id.token.claim": "true",
+            "access.token.claim": "true",
+            "claim.name": "locale",
+            "jsonType.label": "String"
+          }
+        }
+      ],
+      "defaultClientScopes": [
+        "web-origins",
+        "roles",
+        "profile",
+        "email"
+      ],
+      "optionalClientScopes": [
+        "address",
+        "phone",
+        "offline_access",
+        "microprofile-jwt"
+      ]
+    }
+  ],
+  "clientScopes": [
+    {
+      "id": "f76f507d-7d1c-495b-9504-47830b3834f1",
+      "name": "phone",
+      "description": "OpenID Connect built-in scope: phone",
+      "protocol": "openid-connect",
+      "attributes": {
+        "include.in.token.scope": "true",
+        "display.on.consent.screen": "true",
+        "consent.screen.text": "${phoneScopeConsentText}"
+      },
+      "protocolMappers": [
+        {
+          "id": "be849ec8-1747-4efb-bc00-beeaf44f11c8",
+          "name": "phone number verified",
+          "protocol": "openid-connect",
+          "protocolMapper": "oidc-usermodel-attribute-mapper",
+          "consentRequired": false,
+          "config": {
+            "userinfo.token.claim": "true",
+            "user.attribute": "phoneNumberVerified",
+            "id.token.claim": "true",
+            "access.token.claim": "true",
+            "claim.name": "phone_number_verified",
+            "jsonType.label": "boolean"
+          }
+        },
+        {
+          "id": "8e8600ec-4290-435d-b109-9f0547cb4a1d",
+          "name": "phone number",
+          "protocol": "openid-connect",
+          "protocolMapper": "oidc-usermodel-attribute-mapper",
+          "consentRequired": false,
+          "config": {
+            "userinfo.token.claim": "true",
+            "user.attribute": "phoneNumber",
+            "id.token.claim": "true",
+            "access.token.claim": "true",
+            "claim.name": "phone_number",
+            "jsonType.label": "String"
+          }
+        }
+      ]
+    },
+    {
+      "id": "54b87197-5309-4b2c-8ad9-f561a0fc178a",
+      "name": "role_list",
+      "description": "SAML role list",
+      "protocol": "saml",
+      "attributes": {
+        "consent.screen.text": "${samlRoleListScopeConsentText}",
+        "display.on.consent.screen": "true"
+      },
+      "protocolMappers": [
+        {
+          "id": "5fd831af-19a5-4a9c-b44f-2a806fae011c",
+          "name": "role list",
+          "protocol": "saml",
+          "protocolMapper": "saml-role-list-mapper",
+          "consentRequired": false,
+          "config": {
+            "single": "false",
+            "attribute.nameformat": "Basic",
+            "attribute.name": "Role"
+          }
+        }
+      ]
+    },
+    {
+      "id": "2f85470d-8cb7-4f07-8602-47342d68af86",
+      "name": "web-origins",
+      "description": "OpenID Connect scope for add allowed web origins to the access token",
+      "protocol": "openid-connect",
+      "attributes": {
+        "include.in.token.scope": "false",
+        "display.on.consent.screen": "false",
+        "consent.screen.text": ""
+      },
+      "protocolMappers": [
+        {
+          "id": "c5d2aafc-f72d-4d7b-9d88-cd759f0e045e",
+          "name": "allowed web origins",
+          "protocol": "openid-connect",
+          "protocolMapper": "oidc-allowed-origins-mapper",
+          "consentRequired": false,
+          "config": {}
+        }
+      ]
+    },
+    {
+      "id": "528face9-229a-4adf-98d8-68b1a22e880d",
+      "name": "microprofile-jwt",
+      "description": "Microprofile - JWT built-in scope",
+      "protocol": "openid-connect",
+      "attributes": {
+        "include.in.token.scope": "true",
+        "display.on.consent.screen": "false"
+      },
+      "protocolMappers": [
+        {
+          "id": "89240a7c-10f3-4e09-9d6b-41955b86c58d",
+          "name": "groups",
+          "protocol": "openid-connect",
+          "protocolMapper": "oidc-usermodel-realm-role-mapper",
+          "consentRequired": false,
+          "config": {
+            "multivalued": "true",
+            "userinfo.token.claim": "true",
+            "user.attribute": "foo",
+            "id.token.claim": "true",
+            "access.token.claim": "true",
+            "claim.name": "groups",
+            "jsonType.label": "String"
+          }
+        },
+        {
+          "id": "15b6db72-4870-480e-a675-87f87df5f8a5",
+          "name": "upn",
+          "protocol": "openid-connect",
+          "protocolMapper": "oidc-usermodel-property-mapper",
+          "consentRequired": false,
+          "config": {
+            "userinfo.token.claim": "true",
+            "user.attribute": "username",
+            "id.token.claim": "true",
+            "access.token.claim": "true",
+            "claim.name": "upn",
+            "jsonType.label": "String"
+          }
+        }
+      ]
+    },
+    {
+      "id": "cdd11477-b02b-4886-bc6d-cf4b728ebc0e",
+      "name": "email",
+      "description": "OpenID Connect built-in scope: email",
+      "protocol": "openid-connect",
+      "attributes": {
+        "include.in.token.scope": "true",
+        "display.on.consent.screen": "true",
+        "consent.screen.text": "${emailScopeConsentText}"
+      },
+      "protocolMappers": [
+        {
+          "id": "627b9f4f-23d6-4480-adf4-264faf58de33",
+          "name": "email verified",
+          "protocol": "openid-connect",
+          "protocolMapper": "oidc-usermodel-property-mapper",
+          "consentRequired": false,
+          "config": {
+            "userinfo.token.claim": "true",
+            "user.attribute": "emailVerified",
+            "id.token.claim": "true",
+            "access.token.claim": "true",
+            "claim.name": "email_verified",
+            "jsonType.label": "boolean"
+          }
+        },
+        {
+          "id": "6a2adf2e-db2d-4ebe-8d48-f658f9b4a5ca",
+          "name": "email",
+          "protocol": "openid-connect",
+          "protocolMapper": "oidc-usermodel-property-mapper",
+          "consentRequired": false,
+          "config": {
+            "userinfo.token.claim": "true",
+            "user.attribute": "email",
+            "id.token.claim": "true",
+            "access.token.claim": "true",
+            "claim.name": "email",
+            "jsonType.label": "String"
+          }
+        }
+      ]
+    },
+    {
+      "id": "8f830142-b3f1-40f0-82e2-ceed68857a40",
+      "name": "roles",
+      "description": "OpenID Connect scope for add user roles to the access token",
+      "protocol": "openid-connect",
+      "attributes": {
+        "include.in.token.scope": "false",
+        "display.on.consent.screen": "true",
+        "consent.screen.text": "${rolesScopeConsentText}"
+      },
+      "protocolMappers": [
+        {
+          "id": "28a96dc6-c4dc-4aae-b316-28b56dccd077",
+          "name": "audience resolve",
+          "protocol": "openid-connect",
+          "protocolMapper": "oidc-audience-resolve-mapper",
+          "consentRequired": false,
+          "config": {}
+        },
+        {
+          "id": "3e81050f-540e-4f3d-9abf-86406e484f76",
+          "name": "realm roles",
+          "protocol": "openid-connect",
+          "protocolMapper": "oidc-usermodel-realm-role-mapper",
+          "consentRequired": false,
+          "config": {
+            "user.attribute": "foo",
+            "access.token.claim": "true",
+            "claim.name": "realm_access.roles",
+            "jsonType.label": "String",
+            "multivalued": "true"
+          }
+        },
+        {
+          "id": "13afa1f4-3fac-4c90-a9b4-e84e682f46e9",
+          "name": "client roles",
+          "protocol": "openid-connect",
+          "protocolMapper": "oidc-usermodel-client-role-mapper",
+          "consentRequired": false,
+          "config": {
+            "user.attribute": "foo",
+            "access.token.claim": "true",
+            "claim.name": "resource_access.${client_id}.roles",
+            "jsonType.label": "String",
+            "multivalued": "true"
+          }
+        }
+      ]
+    },
+    {
+      "id": "3beac2fc-e947-408f-8422-ca9a1e66a258",
+      "name": "address",
+      "description": "OpenID Connect built-in scope: address",
+      "protocol": "openid-connect",
+      "attributes": {
+        "include.in.token.scope": "true",
+        "display.on.consent.screen": "true",
+        "consent.screen.text": "${addressScopeConsentText}"
+      },
+      "protocolMappers": [
+        {
+          "id": "12911891-db5c-4a35-80fa-555c5eda7e68",
+          "name": "address",
+          "protocol": "openid-connect",
+          "protocolMapper": "oidc-address-mapper",
+          "consentRequired": false,
+          "config": {
+            "user.attribute.formatted": "formatted",
+            "user.attribute.country": "country",
+            "user.attribute.postal_code": "postal_code",
+            "userinfo.token.claim": "true",
+            "user.attribute.street": "street",
+            "id.token.claim": "true",
+            "user.attribute.region": "region",
+            "access.token.claim": "true",
+            "user.attribute.locality": "locality"
+          }
+        }
+      ]
+    },
+    {
+      "id": "8a29297a-e6f6-41ae-b25d-8a14236de535",
+      "name": "offline_access",
+      "description": "OpenID Connect built-in scope: offline_access",
+      "protocol": "openid-connect",
+      "attributes": {
+        "consent.screen.text": "${offlineAccessScopeConsentText}",
+        "display.on.consent.screen": "true"
+      }
+    },
+    {
+      "id": "ce1622c5-701f-4e3e-9d2d-8dae0f07a295",
+      "name": "profile",
+      "description": "OpenID Connect built-in scope: profile",
+      "protocol": "openid-connect",
+      "attributes": {
+        "include.in.token.scope": "true",
+        "display.on.consent.screen": "true",
+        "consent.screen.text": "${profileScopeConsentText}"
+      },
+      "protocolMappers": [
+        {
+          "id": "98cc62b8-250a-4087-92da-bb0f0931e675",
+          "name": "full name",
+          "protocol": "openid-connect",
+          "protocolMapper": "oidc-full-name-mapper",
+          "consentRequired": false,
+          "config": {
+            "id.token.claim": "true",
+            "access.token.claim": "true",
+            "userinfo.token.claim": "true"
+          }
+        },
+        {
+          "id": "b99c8c44-4cc9-4c87-a5a1-c14e64d472ae",
+          "name": "given name",
+          "protocol": "openid-connect",
+          "protocolMapper": "oidc-usermodel-property-mapper",
+          "consentRequired": false,
+          "config": {
+            "userinfo.token.claim": "true",
+            "user.attribute": "firstName",
+            "id.token.claim": "true",
+            "access.token.claim": "true",
+            "claim.name": "given_name",
+            "jsonType.label": "String"
+          }
+        },
+        {
+          "id": "903d5932-bdec-42bc-a53c-3cce93deaa1c",
+          "name": "zoneinfo",
+          "protocol": "openid-connect",
+          "protocolMapper": "oidc-usermodel-attribute-mapper",
+          "consentRequired": false,
+          "config": {
+            "userinfo.token.claim": "true",
+            "user.attribute": "zoneinfo",
+            "id.token.claim": "true",
+            "access.token.claim": "true",
+            "claim.name": "zoneinfo",
+            "jsonType.label": "String"
+          }
+        },
+        {
+          "id": "ccbdc095-28f7-4769-8261-2e32c7b6fab0",
+          "name": "picture",
+          "protocol": "openid-connect",
+          "protocolMapper": "oidc-usermodel-attribute-mapper",
+          "consentRequired": false,
+          "config": {
+            "userinfo.token.claim": "true",
+            "user.attribute": "picture",
+            "id.token.claim": "true",
+            "access.token.claim": "true",
+            "claim.name": "picture",
+            "jsonType.label": "String"
+          }
+        },
+        {
+          "id": "22a4a38c-f755-44f3-b847-803c7fb3cef5",
+          "name": "birthdate",
+          "protocol": "openid-connect",
+          "protocolMapper": "oidc-usermodel-attribute-mapper",
+          "consentRequired": false,
+          "config": {
+            "userinfo.token.claim": "true",
+            "user.attribute": "birthdate",
+            "id.token.claim": "true",
+            "access.token.claim": "true",
+            "claim.name": "birthdate",
+            "jsonType.label": "String"
+          }
+        },
+        {
+          "id": "78726920-b4e2-4ed2-b9e0-df38a7f82376",
+          "name": "updated at",
+          "protocol": "openid-connect",
+          "protocolMapper": "oidc-usermodel-attribute-mapper",
+          "consentRequired": false,
+          "config": {
+            "userinfo.token.claim": "true",
+            "user.attribute": "updatedAt",
+            "id.token.claim": "true",
+            "access.token.claim": "true",
+            "claim.name": "updated_at",
+            "jsonType.label": "String"
+          }
+        },
+        {
+          "id": "c64c6eb8-5cbe-4092-bf2c-dd02b8c0e0e8",
+          "name": "family name",
+          "protocol": "openid-connect",
+          "protocolMapper": "oidc-usermodel-property-mapper",
+          "consentRequired": false,
+          "config": {
+            "userinfo.token.claim": "true",
+            "user.attribute": "lastName",
+            "id.token.claim": "true",
+            "access.token.claim": "true",
+            "claim.name": "family_name",
+            "jsonType.label": "String"
+          }
+        },
+        {
+          "id": "306784d8-8da1-48d8-92a3-dccfff83bcaf",
+          "name": "middle name",
+          "protocol": "openid-connect",
+          "protocolMapper": "oidc-usermodel-attribute-mapper",
+          "consentRequired": false,
+          "config": {
+            "userinfo.token.claim": "true",
+            "user.attribute": "middleName",
+            "id.token.claim": "true",
+            "access.token.claim": "true",
+            "claim.name": "middle_name",
+            "jsonType.label": "String"
+          }
+        },
+        {
+          "id": "0ff127fa-774e-43a8-a1fc-47ea3f307aa1",
+          "name": "website",
+          "protocol": "openid-connect",
+          "protocolMapper": "oidc-usermodel-attribute-mapper",
+          "consentRequired": false,
+          "config": {
+            "userinfo.token.claim": "true",
+            "user.attribute": "website",
+            "id.token.claim": "true",
+            "access.token.claim": "true",
+            "claim.name": "website",
+            "jsonType.label": "String"
+          }
+        },
+        {
+          "id": "8989c6f8-25c5-4d02-aa06-25b3b77fc227",
+          "name": "profile",
+          "protocol": "openid-connect",
+          "protocolMapper": "oidc-usermodel-attribute-mapper",
+          "consentRequired": false,
+          "config": {
+            "userinfo.token.claim": "true",
+            "user.attribute": "profile",
+            "id.token.claim": "true",
+            "access.token.claim": "true",
+            "claim.name": "profile",
+            "jsonType.label": "String"
+          }
+        },
+        {
+          "id": "3b67000c-9cbf-43ee-9e05-26f560871897",
+          "name": "gender",
+          "protocol": "openid-connect",
+          "protocolMapper": "oidc-usermodel-attribute-mapper",
+          "consentRequired": false,
+          "config": {
+            "userinfo.token.claim": "true",
+            "user.attribute": "gender",
+            "id.token.claim": "true",
+            "access.token.claim": "true",
+            "claim.name": "gender",
+            "jsonType.label": "String"
+          }
+        },
+        {
+          "id": "c28b04de-2770-423e-9b9a-b3321d7300e2",
+          "name": "nickname",
+          "protocol": "openid-connect",
+          "protocolMapper": "oidc-usermodel-attribute-mapper",
+          "consentRequired": false,
+          "config": {
+            "userinfo.token.claim": "true",
+            "user.attribute": "nickname",
+            "id.token.claim": "true",
+            "access.token.claim": "true",
+            "claim.name": "nickname",
+            "jsonType.label": "String"
+          }
+        },
+        {
+          "id": "fd791ed4-d4ab-4df9-81b4-c69a3134bcab",
+          "name": "username",
+          "protocol": "openid-connect",
+          "protocolMapper": "oidc-usermodel-property-mapper",
+          "consentRequired": false,
+          "config": {
+            "userinfo.token.claim": "true",
+            "user.attribute": "username",
+            "id.token.claim": "true",
+            "access.token.claim": "true",
+            "claim.name": "preferred_username",
+            "jsonType.label": "String"
+          }
+        },
+        {
+          "id": "c7378ce5-3673-47b2-9ebc-92c772bebf9f",
+          "name": "locale",
+          "protocol": "openid-connect",
+          "protocolMapper": "oidc-usermodel-attribute-mapper",
+          "consentRequired": false,
+          "config": {
+            "userinfo.token.claim": "true",
+            "user.attribute": "locale",
+            "id.token.claim": "true",
+            "access.token.claim": "true",
+            "claim.name": "locale",
+            "jsonType.label": "String"
+          }
+        }
+      ]
+    }
+  ],
+  "defaultDefaultClientScopes": [
+    "web-origins",
+    "role_list",
+    "roles",
+    "email",
+    "profile"
+  ],
+  "defaultOptionalClientScopes": [
+    "address",
+    "microprofile-jwt",
+    "offline_access",
+    "phone"
+  ],
+  "browserSecurityHeaders": {
+    "contentSecurityPolicyReportOnly": "",
+    "xContentTypeOptions": "nosniff",
+    "xRobotsTag": "none",
+    "xFrameOptions": "SAMEORIGIN",
+    "contentSecurityPolicy": "frame-src 'self'; frame-ancestors 'self'; object-src 'none';",
+    "xXSSProtection": "1; mode=block",
+    "strictTransportSecurity": "max-age=31536000; includeSubDomains"
+  },
+  "smtpServer": {},
+  "eventsEnabled": false,
+  "eventsListeners": [
+    "jboss-logging"
+  ],
+  "enabledEventTypes": [],
+  "adminEventsEnabled": false,
+  "adminEventsDetailsEnabled": false,
+  "identityProviders": [],
+  "identityProviderMappers": [],
+  "components": {
+    "org.keycloak.services.clientregistration.policy.ClientRegistrationPolicy": [
+      {
+        "id": "8e2d0c22-0627-4115-9f14-4225244333d9",
+        "name": "Trusted Hosts",
+        "providerId": "trusted-hosts",
+        "subType": "anonymous",
+        "subComponents": {},
+        "config": {
+          "host-sending-registration-request-must-match": [
+            "true"
+          ],
+          "client-uris-must-match": [
+            "true"
+          ]
+        }
+      },
+      {
+        "id": "45bdde87-a364-4d66-a12e-1a4fd42c85fb",
+        "name": "Full Scope Disabled",
+        "providerId": "scope",
+        "subType": "anonymous",
+        "subComponents": {},
+        "config": {}
+      },
+      {
+        "id": "7b7d3215-68d2-41db-bc0f-db0a45934a84",
+        "name": "Allowed Client Scopes",
+        "providerId": "allowed-client-templates",
+        "subType": "anonymous",
+        "subComponents": {},
+        "config": {
+          "allow-default-scopes": [
+            "true"
+          ]
+        }
+      },
+      {
+        "id": "e067781a-6058-4f2b-9408-3390e9854cf8",
+        "name": "Consent Required",
+        "providerId": "consent-required",
+        "subType": "anonymous",
+        "subComponents": {},
+        "config": {}
+      },
+      {
+        "id": "296be954-8084-45c8-b6f3-94d53f7341f6",
+        "name": "Allowed Protocol Mapper Types",
+        "providerId": "allowed-protocol-mappers",
+        "subType": "anonymous",
+        "subComponents": {},
+        "config": {
+          "allowed-protocol-mapper-types": [
+            "oidc-sha256-pairwise-sub-mapper",
+            "saml-role-list-mapper",
+            "oidc-address-mapper",
+            "saml-user-attribute-mapper",
+            "oidc-usermodel-property-mapper",
+            "oidc-full-name-mapper",
+            "saml-user-property-mapper",
+            "oidc-usermodel-attribute-mapper"
+          ]
+        }
+      },
+      {
+        "id": "b9a2a484-aee1-4633-aa37-a9ab2b74a239",
+        "name": "Allowed Client Scopes",
+        "providerId": "allowed-client-templates",
+        "subType": "authenticated",
+        "subComponents": {},
+        "config": {
+          "allow-default-scopes": [
+            "true"
+          ]
+        }
+      },
+      {
+        "id": "016e4914-a32c-40fa-8aab-3eb25a411df5",
+        "name": "Max Clients Limit",
+        "providerId": "max-clients",
+        "subType": "anonymous",
+        "subComponents": {},
+        "config": {
+          "max-clients": [
+            "200"
+          ]
+        }
+      },
+      {
+        "id": "a4fb2fa3-93b8-4497-8047-424f70f298c7",
+        "name": "Allowed Protocol Mapper Types",
+        "providerId": "allowed-protocol-mappers",
+        "subType": "authenticated",
+        "subComponents": {},
+        "config": {
+          "allowed-protocol-mapper-types": [
+            "saml-role-list-mapper",
+            "oidc-usermodel-attribute-mapper",
+            "oidc-full-name-mapper",
+            "oidc-sha256-pairwise-sub-mapper",
+            "saml-user-property-mapper",
+            "oidc-usermodel-property-mapper",
+            "oidc-address-mapper",
+            "saml-user-attribute-mapper"
+          ]
+        }
+      }
+    ],
+    "org.keycloak.keys.KeyProvider": [
+      {
+        "id": "31b693fa-2b95-47a6-96a1-dfff868ca1df",
+        "name": "rsa-enc-generated",
+        "providerId": "rsa-enc-generated",
+        "subComponents": {},
+        "config": {
+          "priority": [
+            "100"
+          ],
+          "algorithm": [
+            "RSA-OAEP"
+          ]
+        }
+      },
+      {
+        "id": "f1e63d09-45a0-4382-8346-0408ee906649",
+        "name": "hmac-generated",
+        "providerId": "hmac-generated",
+        "subComponents": {},
+        "config": {
+          "priority": [
+            "100"
+          ],
+          "algorithm": [
+            "HS256"
+          ]
+        }
+      },
+      {
+        "id": "99084d92-06f5-4787-b932-a40b5377f3cb",
+        "name": "rsa-generated",
+        "providerId": "rsa-generated",
+        "subComponents": {},
+        "config": {
+          "priority": [
+            "100"
+          ]
+        }
+      },
+      {
+        "id": "9887f1bf-b4f7-4646-9919-a9dbde13ce74",
+        "name": "aes-generated",
+        "providerId": "aes-generated",
+        "subComponents": {},
+        "config": {
+          "priority": [
+            "100"
+          ]
+        }
+      }
+    ]
+  },
+  "internationalizationEnabled": false,
+  "supportedLocales": [],
+  "authenticationFlows": [
+    {
+      "id": "fec20812-5cf4-475d-895e-942790f83a05",
+      "alias": "Account verification options",
+      "description": "Method with which to verity the existing account",
+      "providerId": "basic-flow",
+      "topLevel": false,
+      "builtIn": true,
+      "authenticationExecutions": [
+        {
+          "authenticator": "idp-email-verification",
+          "authenticatorFlow": false,
+          "requirement": "ALTERNATIVE",
+          "priority": 10,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        },
+        {
+          "authenticatorFlow": true,
+          "requirement": "ALTERNATIVE",
+          "priority": 20,
+          "flowAlias": "Verify Existing Account by Re-authentication",
+          "userSetupAllowed": false,
+          "autheticatorFlow": true
+        }
+      ]
+    },
+    {
+      "id": "be77226c-dab4-44b4-86e8-8c88e74027dd",
+      "alias": "Authentication Options",
+      "description": "Authentication options.",
+      "providerId": "basic-flow",
+      "topLevel": false,
+      "builtIn": true,
+      "authenticationExecutions": [
+        {
+          "authenticator": "basic-auth",
+          "authenticatorFlow": false,
+          "requirement": "REQUIRED",
+          "priority": 10,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        },
+        {
+          "authenticator": "basic-auth-otp",
+          "authenticatorFlow": false,
+          "requirement": "DISABLED",
+          "priority": 20,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        },
+        {
+          "authenticator": "auth-spnego",
+          "authenticatorFlow": false,
+          "requirement": "DISABLED",
+          "priority": 30,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        }
+      ]
+    },
+    {
+      "id": "aadf4b09-bb37-4b40-80da-f9fe40ab3d9c",
+      "alias": "Browser - Conditional OTP",
+      "description": "Flow to determine if the OTP is required for the authentication",
+      "providerId": "basic-flow",
+      "topLevel": false,
+      "builtIn": true,
+      "authenticationExecutions": [
+        {
+          "authenticator": "conditional-user-configured",
+          "authenticatorFlow": false,
+          "requirement": "REQUIRED",
+          "priority": 10,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        },
+        {
+          "authenticator": "auth-otp-form",
+          "authenticatorFlow": false,
+          "requirement": "REQUIRED",
+          "priority": 20,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        }
+      ]
+    },
+    {
+      "id": "f2084331-5d78-420f-8ff6-97b0a1218b9c",
+      "alias": "Direct Grant - Conditional OTP",
+      "description": "Flow to determine if the OTP is required for the authentication",
+      "providerId": "basic-flow",
+      "topLevel": false,
+      "builtIn": true,
+      "authenticationExecutions": [
+        {
+          "authenticator": "conditional-user-configured",
+          "authenticatorFlow": false,
+          "requirement": "REQUIRED",
+          "priority": 10,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        },
+        {
+          "authenticator": "direct-grant-validate-otp",
+          "authenticatorFlow": false,
+          "requirement": "REQUIRED",
+          "priority": 20,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        }
+      ]
+    },
+    {
+      "id": "7f1c5b3d-7671-4615-8339-fc046ccb3fde",
+      "alias": "First broker login - Conditional OTP",
+      "description": "Flow to determine if the OTP is required for the authentication",
+      "providerId": "basic-flow",
+      "topLevel": false,
+      "builtIn": true,
+      "authenticationExecutions": [
+        {
+          "authenticator": "conditional-user-configured",
+          "authenticatorFlow": false,
+          "requirement": "REQUIRED",
+          "priority": 10,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        },
+        {
+          "authenticator": "auth-otp-form",
+          "authenticatorFlow": false,
+          "requirement": "REQUIRED",
+          "priority": 20,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        }
+      ]
+    },
+    {
+      "id": "07502848-8395-4810-9eaf-a96369883df2",
+      "alias": "Handle Existing Account",
+      "description": "Handle what to do if there is existing account with same email/username like authenticated identity provider",
+      "providerId": "basic-flow",
+      "topLevel": false,
+      "builtIn": true,
+      "authenticationExecutions": [
+        {
+          "authenticator": "idp-confirm-link",
+          "authenticatorFlow": false,
+          "requirement": "REQUIRED",
+          "priority": 10,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        },
+        {
+          "authenticatorFlow": true,
+          "requirement": "REQUIRED",
+          "priority": 20,
+          "flowAlias": "Account verification options",
+          "userSetupAllowed": false,
+          "autheticatorFlow": true
+        }
+      ]
+    },
+    {
+      "id": "5bcad4d1-6de7-44b5-b451-9636fc664a63",
+      "alias": "Reset - Conditional OTP",
+      "description": "Flow to determine if the OTP should be reset or not. Set to REQUIRED to force.",
+      "providerId": "basic-flow",
+      "topLevel": false,
+      "builtIn": true,
+      "authenticationExecutions": [
+        {
+          "authenticator": "conditional-user-configured",
+          "authenticatorFlow": false,
+          "requirement": "REQUIRED",
+          "priority": 10,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        },
+        {
+          "authenticator": "reset-otp",
+          "authenticatorFlow": false,
+          "requirement": "REQUIRED",
+          "priority": 20,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        }
+      ]
+    },
+    {
+      "id": "122aab94-b9b3-47e8-ac4d-98bf8b28ad11",
+      "alias": "User creation or linking",
+      "description": "Flow for the existing/non-existing user alternatives",
+      "providerId": "basic-flow",
+      "topLevel": false,
+      "builtIn": true,
+      "authenticationExecutions": [
+        {
+          "authenticatorConfig": "create unique user config",
+          "authenticator": "idp-create-user-if-unique",
+          "authenticatorFlow": false,
+          "requirement": "ALTERNATIVE",
+          "priority": 10,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        },
+        {
+          "authenticatorFlow": true,
+          "requirement": "ALTERNATIVE",
+          "priority": 20,
+          "flowAlias": "Handle Existing Account",
+          "userSetupAllowed": false,
+          "autheticatorFlow": true
+        }
+      ]
+    },
+    {
+      "id": "163f8867-3660-4f0f-baf7-fe224ea008f9",
+      "alias": "Verify Existing Account by Re-authentication",
+      "description": "Reauthentication of existing account",
+      "providerId": "basic-flow",
+      "topLevel": false,
+      "builtIn": true,
+      "authenticationExecutions": [
+        {
+          "authenticator": "idp-username-password-form",
+          "authenticatorFlow": false,
+          "requirement": "REQUIRED",
+          "priority": 10,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        },
+        {
+          "authenticatorFlow": true,
+          "requirement": "CONDITIONAL",
+          "priority": 20,
+          "flowAlias": "First broker login - Conditional OTP",
+          "userSetupAllowed": false,
+          "autheticatorFlow": true
+        }
+      ]
+    },
+    {
+      "id": "ad8da048-03bc-4e58-b911-3404749a653e",
+      "alias": "browser",
+      "description": "browser based authentication",
+      "providerId": "basic-flow",
+      "topLevel": true,
+      "builtIn": true,
+      "authenticationExecutions": [
+        {
+          "authenticator": "auth-cookie",
+          "authenticatorFlow": false,
+          "requirement": "ALTERNATIVE",
+          "priority": 10,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        },
+        {
+          "authenticator": "auth-spnego",
+          "authenticatorFlow": false,
+          "requirement": "DISABLED",
+          "priority": 20,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        },
+        {
+          "authenticator": "identity-provider-redirector",
+          "authenticatorFlow": false,
+          "requirement": "ALTERNATIVE",
+          "priority": 25,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        },
+        {
+          "authenticatorFlow": true,
+          "requirement": "ALTERNATIVE",
+          "priority": 30,
+          "flowAlias": "forms",
+          "userSetupAllowed": false,
+          "autheticatorFlow": true
+        }
+      ]
+    },
+    {
+      "id": "1bdd3165-a32d-4ba0-827b-5967a24fc114",
+      "alias": "clients",
+      "description": "Base authentication for clients",
+      "providerId": "client-flow",
+      "topLevel": true,
+      "builtIn": true,
+      "authenticationExecutions": [
+        {
+          "authenticator": "client-secret",
+          "authenticatorFlow": false,
+          "requirement": "ALTERNATIVE",
+          "priority": 10,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        },
+        {
+          "authenticator": "client-jwt",
+          "authenticatorFlow": false,
+          "requirement": "ALTERNATIVE",
+          "priority": 20,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        },
+        {
+          "authenticator": "client-secret-jwt",
+          "authenticatorFlow": false,
+          "requirement": "ALTERNATIVE",
+          "priority": 30,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        },
+        {
+          "authenticator": "client-x509",
+          "authenticatorFlow": false,
+          "requirement": "ALTERNATIVE",
+          "priority": 40,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        }
+      ]
+    },
+    {
+      "id": "eafdf270-6797-4768-8966-58f5885e3c70",
+      "alias": "direct grant",
+      "description": "OpenID Connect Resource Owner Grant",
+      "providerId": "basic-flow",
+      "topLevel": true,
+      "builtIn": true,
+      "authenticationExecutions": [
+        {
+          "authenticator": "direct-grant-validate-username",
+          "authenticatorFlow": false,
+          "requirement": "REQUIRED",
+          "priority": 10,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        },
+        {
+          "authenticator": "direct-grant-validate-password",
+          "authenticatorFlow": false,
+          "requirement": "REQUIRED",
+          "priority": 20,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        },
+        {
+          "authenticatorFlow": true,
+          "requirement": "CONDITIONAL",
+          "priority": 30,
+          "flowAlias": "Direct Grant - Conditional OTP",
+          "userSetupAllowed": false,
+          "autheticatorFlow": true
+        }
+      ]
+    },
+    {
+      "id": "67fb6643-3703-455d-89bc-dc6817cf7eec",
+      "alias": "docker auth",
+      "description": "Used by Docker clients to authenticate against the IDP",
+      "providerId": "basic-flow",
+      "topLevel": true,
+      "builtIn": true,
+      "authenticationExecutions": [
+        {
+          "authenticator": "docker-http-basic-authenticator",
+          "authenticatorFlow": false,
+          "requirement": "REQUIRED",
+          "priority": 10,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        }
+      ]
+    },
+    {
+      "id": "85c4e0e6-074f-481b-8ebd-4869db18cb9c",
+      "alias": "first broker login",
+      "description": "Actions taken after first broker login with identity provider account, which is not yet linked to any Keycloak account",
+      "providerId": "basic-flow",
+      "topLevel": true,
+      "builtIn": true,
+      "authenticationExecutions": [
+        {
+          "authenticatorConfig": "review profile config",
+          "authenticator": "idp-review-profile",
+          "authenticatorFlow": false,
+          "requirement": "REQUIRED",
+          "priority": 10,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        },
+        {
+          "authenticatorFlow": true,
+          "requirement": "REQUIRED",
+          "priority": 20,
+          "flowAlias": "User creation or linking",
+          "userSetupAllowed": false,
+          "autheticatorFlow": true
+        }
+      ]
+    },
+    {
+      "id": "fed80f43-0fde-4dc1-ba9b-ae9d151a4434",
+      "alias": "forms",
+      "description": "Username, password, otp and other auth forms.",
+      "providerId": "basic-flow",
+      "topLevel": false,
+      "builtIn": true,
+      "authenticationExecutions": [
+        {
+          "authenticator": "auth-username-password-form",
+          "authenticatorFlow": false,
+          "requirement": "REQUIRED",
+          "priority": 10,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        },
+        {
+          "authenticatorFlow": true,
+          "requirement": "CONDITIONAL",
+          "priority": 20,
+          "flowAlias": "Browser - Conditional OTP",
+          "userSetupAllowed": false,
+          "autheticatorFlow": true
+        }
+      ]
+    },
+    {
+      "id": "4981597f-5e50-4122-9c4e-4580492e7be4",
+      "alias": "http challenge",
+      "description": "An authentication flow based on challenge-response HTTP Authentication Schemes",
+      "providerId": "basic-flow",
+      "topLevel": true,
+      "builtIn": true,
+      "authenticationExecutions": [
+        {
+          "authenticator": "no-cookie-redirect",
+          "authenticatorFlow": false,
+          "requirement": "REQUIRED",
+          "priority": 10,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        },
+        {
+          "authenticatorFlow": true,
+          "requirement": "REQUIRED",
+          "priority": 20,
+          "flowAlias": "Authentication Options",
+          "userSetupAllowed": false,
+          "autheticatorFlow": true
+        }
+      ]
+    },
+    {
+      "id": "d8110dc0-2614-44b2-acf0-79334e1eae02",
+      "alias": "registration",
+      "description": "registration flow",
+      "providerId": "basic-flow",
+      "topLevel": true,
+      "builtIn": true,
+      "authenticationExecutions": [
+        {
+          "authenticator": "registration-page-form",
+          "authenticatorFlow": true,
+          "requirement": "REQUIRED",
+          "priority": 10,
+          "flowAlias": "registration form",
+          "userSetupAllowed": false,
+          "autheticatorFlow": true
+        }
+      ]
+    },
+    {
+      "id": "25c8023e-c7cf-49a7-b0fa-dbbe4c75e247",
+      "alias": "registration form",
+      "description": "registration form",
+      "providerId": "form-flow",
+      "topLevel": false,
+      "builtIn": true,
+      "authenticationExecutions": [
+        {
+          "authenticator": "registration-user-creation",
+          "authenticatorFlow": false,
+          "requirement": "REQUIRED",
+          "priority": 20,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        },
+        {
+          "authenticator": "registration-profile-action",
+          "authenticatorFlow": false,
+          "requirement": "REQUIRED",
+          "priority": 40,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        },
+        {
+          "authenticator": "registration-password-action",
+          "authenticatorFlow": false,
+          "requirement": "REQUIRED",
+          "priority": 50,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        },
+        {
+          "authenticator": "registration-recaptcha-action",
+          "authenticatorFlow": false,
+          "requirement": "DISABLED",
+          "priority": 60,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        }
+      ]
+    },
+    {
+      "id": "071196cf-e578-45ff-b858-4849beaf879d",
+      "alias": "reset credentials",
+      "description": "Reset credentials for a user if they forgot their password or something",
+      "providerId": "basic-flow",
+      "topLevel": true,
+      "builtIn": true,
+      "authenticationExecutions": [
+        {
+          "authenticator": "reset-credentials-choose-user",
+          "authenticatorFlow": false,
+          "requirement": "REQUIRED",
+          "priority": 10,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        },
+        {
+          "authenticator": "reset-credential-email",
+          "authenticatorFlow": false,
+          "requirement": "REQUIRED",
+          "priority": 20,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        },
+        {
+          "authenticator": "reset-password",
+          "authenticatorFlow": false,
+          "requirement": "REQUIRED",
+          "priority": 30,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        },
+        {
+          "authenticatorFlow": true,
+          "requirement": "CONDITIONAL",
+          "priority": 40,
+          "flowAlias": "Reset - Conditional OTP",
+          "userSetupAllowed": false,
+          "autheticatorFlow": true
+        }
+      ]
+    },
+    {
+      "id": "875bdc07-6ebd-46f6-b7df-2f1aa009bb81",
+      "alias": "saml ecp",
+      "description": "SAML ECP Profile Authentication Flow",
+      "providerId": "basic-flow",
+      "topLevel": true,
+      "builtIn": true,
+      "authenticationExecutions": [
+        {
+          "authenticator": "http-basic-authenticator",
+          "authenticatorFlow": false,
+          "requirement": "REQUIRED",
+          "priority": 10,
+          "userSetupAllowed": false,
+          "autheticatorFlow": false
+        }
+      ]
+    }
+  ],
+  "authenticatorConfig": [
+    {
+      "id": "c58d7bfa-7bab-4ee4-9c85-654db54f5553",
+      "alias": "create unique user config",
+      "config": {
+        "require.password.update.after.registration": "false"
+      }
+    },
+    {
+      "id": "d2fb0a76-a16c-467f-ac8a-bcc3a8e0c367",
+      "alias": "review profile config",
+      "config": {
+        "update.profile.on.first.login": "missing"
+      }
+    }
+  ],
+  "requiredActions": [
+    {
+      "alias": "CONFIGURE_TOTP",
+      "name": "Configure OTP",
+      "providerId": "CONFIGURE_TOTP",
+      "enabled": true,
+      "defaultAction": false,
+      "priority": 10,
+      "config": {}
+    },
+    {
+      "alias": "terms_and_conditions",
+      "name": "Terms and Conditions",
+      "providerId": "terms_and_conditions",
+      "enabled": false,
+      "defaultAction": false,
+      "priority": 20,
+      "config": {}
+    },
+    {
+      "alias": "UPDATE_PASSWORD",
+      "name": "Update Password",
+      "providerId": "UPDATE_PASSWORD",
+      "enabled": true,
+      "defaultAction": false,
+      "priority": 30,
+      "config": {}
+    },
+    {
+      "alias": "UPDATE_PROFILE",
+      "name": "Update Profile",
+      "providerId": "UPDATE_PROFILE",
+      "enabled": true,
+      "defaultAction": false,
+      "priority": 40,
+      "config": {}
+    },
+    {
+      "alias": "VERIFY_EMAIL",
+      "name": "Verify Email",
+      "providerId": "VERIFY_EMAIL",
+      "enabled": true,
+      "defaultAction": false,
+      "priority": 50,
+      "config": {}
+    },
+    {
+      "alias": "delete_account",
+      "name": "Delete Account",
+      "providerId": "delete_account",
+      "enabled": false,
+      "defaultAction": false,
+      "priority": 60,
+      "config": {}
+    },
+    {
+      "alias": "update_user_locale",
+      "name": "Update User Locale",
+      "providerId": "update_user_locale",
+      "enabled": true,
+      "defaultAction": false,
+      "priority": 1000,
+      "config": {}
+    }
+  ],
+  "browserFlow": "browser",
+  "registrationFlow": "registration",
+  "directGrantFlow": "direct grant",
+  "resetCredentialsFlow": "reset credentials",
+  "clientAuthenticationFlow": "clients",
+  "dockerAuthenticationFlow": "docker auth",
+  "attributes": {
+    "cibaBackchannelTokenDeliveryMode": "poll",
+    "cibaExpiresIn": "120",
+    "cibaAuthRequestedUserHint": "login_hint",
+    "oauth2DeviceCodeLifespan": "600",
+    "clientOfflineSessionMaxLifespan": "0",
+    "oauth2DevicePollingInterval": "5",
+    "clientSessionIdleTimeout": "0",
+    "parRequestUriLifespan": "60",
+    "clientSessionMaxLifespan": "0",
+    "clientOfflineSessionIdleTimeout": "0",
+    "cibaInterval": "5"
+  },
+  "keycloakVersion": "16.1.1",
+  "userManagedAccessAllowed": false,
+  "clientProfiles": {
+    "profiles": []
+  },
+  "clientPolicies": {
+    "policies": []
+  }
+},
diff --git a/conf/docker-keycloak/run-keycloak.sh b/conf/docker-keycloak/run-keycloak.sh
new file mode 100755
index 00000000000..0e2e01bef97
--- /dev/null
+++ b/conf/docker-keycloak/run-keycloak.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+
+DOCKER_IMAGE="jboss/keycloak:16.1.1"
+KEYCLOAK_USER="keycloakadmin"
+KEYCLOAK_PASSWORD="keycloakadminpassword"
+
+if [ ! "$(docker ps -q -f name=keycloak)" ]; then
+  if [ "$(docker ps -aq -f status=exited -f name=keycloak)" ]; then
+    echo "INFO - An exited Keycloak container already exists, please select an option:"
+    options=("Recreate container" "Restart container" "Quit")
+    select opt in "${options[@]}"; do
+      case $opt in
+      "Recreate container")
+        docker rm keycloak
+        docker run -d --name keycloak -p 8090:8080 -e KEYCLOAK_USER=$KEYCLOAK_USER -e KEYCLOAK_PASSWORD=$KEYCLOAK_PASSWORD -e KEYCLOAK_IMPORT=/tmp/oidc-realm.json -v "$(pwd)"/oidc-realm.json:/tmp/oidc-realm.json $DOCKER_IMAGE
+        echo "INFO - Keycloak container recreated"
+        break
+        ;;
+      "Restart container")
+        docker start keycloak
+        echo "INFO - Keycloak container restarted"
+        break
+        ;;
+      "Quit")
+        break
+        ;;
+      *) echo "invalid option $REPLY" ;;
+      esac
+    done
+  else
+    docker run -d --name keycloak -p 8090:8080 -e KEYCLOAK_USER=$KEYCLOAK_USER -e KEYCLOAK_PASSWORD=$KEYCLOAK_PASSWORD -e KEYCLOAK_IMPORT=/tmp/oidc-realm.json -v "$(pwd)"/oidc-realm.json:/tmp/oidc-realm.json $DOCKER_IMAGE
+    echo "INFO - Keycloak container created"
+  fi
+else
+  echo "INFO - Keycloak container is already running"
+fi

From a97c8215bb0cef333630a11297a58c029216939a Mon Sep 17 00:00:00 2001
From: GPortas <hey@gportas.me>
Date: Fri, 16 Dec 2022 08:22:30 +0000
Subject: [PATCH 128/173] Changed: separate scripts to run and remove Keycloak
 container

---
 conf/docker-keycloak/rm-keycloak.sh  | 11 ++++++++++
 conf/docker-keycloak/run-keycloak.sh | 33 +++++++---------------------
 2 files changed, 19 insertions(+), 25 deletions(-)
 create mode 100755 conf/docker-keycloak/rm-keycloak.sh

diff --git a/conf/docker-keycloak/rm-keycloak.sh b/conf/docker-keycloak/rm-keycloak.sh
new file mode 100755
index 00000000000..ea29fbb37c0
--- /dev/null
+++ b/conf/docker-keycloak/rm-keycloak.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+
+if [ "$(docker ps -aq -f name=^/keycloak$)" ]; then
+  if [ "$(docker ps -aq -f status=running -f name=^/keycloak$)" ]; then
+    docker kill keycloak
+  fi
+  docker rm keycloak
+  echo "INFO - Keycloak container removed"
+else
+  echo "INFO - No Keycloak container available to remove"
+fi
diff --git a/conf/docker-keycloak/run-keycloak.sh b/conf/docker-keycloak/run-keycloak.sh
index 0e2e01bef97..be229d1a71e 100755
--- a/conf/docker-keycloak/run-keycloak.sh
+++ b/conf/docker-keycloak/run-keycloak.sh
@@ -3,33 +3,16 @@
 DOCKER_IMAGE="jboss/keycloak:16.1.1"
 KEYCLOAK_USER="keycloakadmin"
 KEYCLOAK_PASSWORD="keycloakadminpassword"
+KEYCLOAK_PORT=8090
 
-if [ ! "$(docker ps -q -f name=keycloak)" ]; then
-  if [ "$(docker ps -aq -f status=exited -f name=keycloak)" ]; then
-    echo "INFO - An exited Keycloak container already exists, please select an option:"
-    options=("Recreate container" "Restart container" "Quit")
-    select opt in "${options[@]}"; do
-      case $opt in
-      "Recreate container")
-        docker rm keycloak
-        docker run -d --name keycloak -p 8090:8080 -e KEYCLOAK_USER=$KEYCLOAK_USER -e KEYCLOAK_PASSWORD=$KEYCLOAK_PASSWORD -e KEYCLOAK_IMPORT=/tmp/oidc-realm.json -v "$(pwd)"/oidc-realm.json:/tmp/oidc-realm.json $DOCKER_IMAGE
-        echo "INFO - Keycloak container recreated"
-        break
-        ;;
-      "Restart container")
-        docker start keycloak
-        echo "INFO - Keycloak container restarted"
-        break
-        ;;
-      "Quit")
-        break
-        ;;
-      *) echo "invalid option $REPLY" ;;
-      esac
-    done
+if [ ! "$(docker ps -q -f name=^/keycloak$)" ]; then
+  if [ "$(docker ps -aq -f status=exited -f name=^/keycloak$)" ]; then
+    echo "INFO - An exited Keycloak container already exists, restarting..."
+    docker start keycloak
+    echo "INFO - Keycloak container restarted"
   else
-    docker run -d --name keycloak -p 8090:8080 -e KEYCLOAK_USER=$KEYCLOAK_USER -e KEYCLOAK_PASSWORD=$KEYCLOAK_PASSWORD -e KEYCLOAK_IMPORT=/tmp/oidc-realm.json -v "$(pwd)"/oidc-realm.json:/tmp/oidc-realm.json $DOCKER_IMAGE
-    echo "INFO - Keycloak container created"
+    docker run -d --name keycloak -p $KEYCLOAK_PORT:8080 -e KEYCLOAK_USER=$KEYCLOAK_USER -e KEYCLOAK_PASSWORD=$KEYCLOAK_PASSWORD -e KEYCLOAK_IMPORT=/tmp/oidc-realm.json -v "$(pwd)"/oidc-realm.json:/tmp/oidc-realm.json $DOCKER_IMAGE
+    echo "INFO - Keycloak container created and running"
   fi
 else
   echo "INFO - Keycloak container is already running"

From 919984ae66d6dbadd211b4109dcc8f8b18749188 Mon Sep 17 00:00:00 2001
From: GPortas <hey@gportas.me>
Date: Fri, 16 Dec 2022 08:55:24 +0000
Subject: [PATCH 129/173] Added: docker-compose setup option

---
 conf/docker-keycloak/docker-compose.yml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 conf/docker-keycloak/docker-compose.yml

diff --git a/conf/docker-keycloak/docker-compose.yml b/conf/docker-keycloak/docker-compose.yml
new file mode 100644
index 00000000000..da2ad19b886
--- /dev/null
+++ b/conf/docker-keycloak/docker-compose.yml
@@ -0,0 +1,15 @@
+version: "3.9"
+
+services:
+
+  keycloak:
+    image: 'jboss/keycloak:16.1.1'
+    environment:
+      - KEYCLOAK_USER=keycloakadmin
+      - KEYCLOAK_PASSWORD=keycloakadminpassword
+      - KEYCLOAK_IMPORT=/tmp/oidc-realm.json
+      - KEYCLOAK_LOGLEVEL=DEBUG
+    ports:
+      - "8090:8080"
+    volumes:
+      - './oidc-realm.json:/tmp/oidc-realm.json'

From c2d44c1afeedb70f074f4b4e502e0a3d806aa39e Mon Sep 17 00:00:00 2001
From: GPortas <hey@gportas.me>
Date: Fri, 16 Dec 2022 09:07:57 +0000
Subject: [PATCH 130/173] Fixed: User credential data in realm json file

---
 conf/docker-keycloak/oidc-realm.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/conf/docker-keycloak/oidc-realm.json b/conf/docker-keycloak/oidc-realm.json
index 199ba4bf96f..0ccb8be9b9a 100644
--- a/conf/docker-keycloak/oidc-realm.json
+++ b/conf/docker-keycloak/oidc-realm.json
@@ -375,8 +375,8 @@
       "email": "test@test.com",
       "credentials": [
         {
-          "type": "keycloakuserpassword",
-          "value": "test"
+          "type": "password",
+          "value": "keycloakuserpassword"
         }
       ]
     }

From dc9bf66cd808ed403fc7cc040bb03549d525339b Mon Sep 17 00:00:00 2001
From: GPortas <hey@gportas.me>
Date: Fri, 16 Dec 2022 11:22:32 +0000
Subject: [PATCH 131/173] Refactor: keycloak conf folder name

---
 conf/{docker-keycloak => keycloak}/docker-compose.yml | 0
 conf/{docker-keycloak => keycloak}/oidc-realm.json    | 0
 conf/{docker-keycloak => keycloak}/rm-keycloak.sh     | 0
 conf/{docker-keycloak => keycloak}/run-keycloak.sh    | 0
 4 files changed, 0 insertions(+), 0 deletions(-)
 rename conf/{docker-keycloak => keycloak}/docker-compose.yml (100%)
 rename conf/{docker-keycloak => keycloak}/oidc-realm.json (100%)
 rename conf/{docker-keycloak => keycloak}/rm-keycloak.sh (100%)
 rename conf/{docker-keycloak => keycloak}/run-keycloak.sh (100%)

diff --git a/conf/docker-keycloak/docker-compose.yml b/conf/keycloak/docker-compose.yml
similarity index 100%
rename from conf/docker-keycloak/docker-compose.yml
rename to conf/keycloak/docker-compose.yml
diff --git a/conf/docker-keycloak/oidc-realm.json b/conf/keycloak/oidc-realm.json
similarity index 100%
rename from conf/docker-keycloak/oidc-realm.json
rename to conf/keycloak/oidc-realm.json
diff --git a/conf/docker-keycloak/rm-keycloak.sh b/conf/keycloak/rm-keycloak.sh
similarity index 100%
rename from conf/docker-keycloak/rm-keycloak.sh
rename to conf/keycloak/rm-keycloak.sh
diff --git a/conf/docker-keycloak/run-keycloak.sh b/conf/keycloak/run-keycloak.sh
similarity index 100%
rename from conf/docker-keycloak/run-keycloak.sh
rename to conf/keycloak/run-keycloak.sh

From 040c742cbfe9654cc6f1108e7d73d12704f7a269 Mon Sep 17 00:00:00 2001
From: GPortas <hey@gportas.me>
Date: Fri, 16 Dec 2022 12:25:54 +0000
Subject: [PATCH 132/173] Added: Dataverse authentication provider json file
 and Keycloak oidc-client config changes to make it suitable for Dataverse

---
 .../keycloak/oidc-keycloak-auth-provider.json |  8 +++
 conf/keycloak/oidc-realm.json                 | 68 +++++++++----------
 2 files changed, 42 insertions(+), 34 deletions(-)
 create mode 100644 conf/keycloak/oidc-keycloak-auth-provider.json

diff --git a/conf/keycloak/oidc-keycloak-auth-provider.json b/conf/keycloak/oidc-keycloak-auth-provider.json
new file mode 100644
index 00000000000..bc70640212d
--- /dev/null
+++ b/conf/keycloak/oidc-keycloak-auth-provider.json
@@ -0,0 +1,8 @@
+{
+  "id": "oidc-keycloak",
+  "factoryAlias": "oidc",
+  "title": "OIDC-Keycloak",
+  "subtitle": "OIDC-Keycloak",
+  "factoryData": "type: oidc | issuer: http://localhost:8090/auth/realms/oidc-realm | clientId: oidc-client | clientSecret: ss6gE8mODCDfqesQaSG3gwUwZqZt547E",
+  "enabled": true
+}
diff --git a/conf/keycloak/oidc-realm.json b/conf/keycloak/oidc-realm.json
index 0ccb8be9b9a..9333df4e293 100644
--- a/conf/keycloak/oidc-realm.json
+++ b/conf/keycloak/oidc-realm.json
@@ -575,8 +575,9 @@
       "enabled": true,
       "alwaysDisplayInConsole": false,
       "clientAuthenticatorType": "client-secret",
+      "secret": "ss6gE8mODCDfqesQaSG3gwUwZqZt547E",
       "redirectUris": [
-        "http://localhost:8080/*"
+        "*"
       ],
       "webOrigins": [
         "+"
@@ -588,7 +589,7 @@
       "implicitFlowEnabled": false,
       "directAccessGrantsEnabled": false,
       "serviceAccountsEnabled": false,
-      "publicClient": true,
+      "publicClient": false,
       "frontchannelLogout": false,
       "protocol": "openid-connect",
       "attributes": {
@@ -606,7 +607,6 @@
         "client_credentials.use_refresh_token": "false",
         "require.pushed.authorization.requests": "false",
         "saml.client.signature": "false",
-        "pkce.code.challenge.method": "S256",
         "id.token.as.detached.signature": "false",
         "saml.assertion.signature": "false",
         "saml.encrypt": "false",
@@ -1306,14 +1306,14 @@
         "subComponents": {},
         "config": {
           "allowed-protocol-mapper-types": [
-            "oidc-sha256-pairwise-sub-mapper",
             "saml-role-list-mapper",
+            "saml-user-property-mapper",
+            "oidc-usermodel-attribute-mapper",
             "oidc-address-mapper",
+            "oidc-sha256-pairwise-sub-mapper",
             "saml-user-attribute-mapper",
             "oidc-usermodel-property-mapper",
-            "oidc-full-name-mapper",
-            "saml-user-property-mapper",
-            "oidc-usermodel-attribute-mapper"
+            "oidc-full-name-mapper"
           ]
         }
       },
@@ -1349,13 +1349,13 @@
         "subComponents": {},
         "config": {
           "allowed-protocol-mapper-types": [
-            "saml-role-list-mapper",
-            "oidc-usermodel-attribute-mapper",
-            "oidc-full-name-mapper",
             "oidc-sha256-pairwise-sub-mapper",
+            "oidc-full-name-mapper",
             "saml-user-property-mapper",
-            "oidc-usermodel-property-mapper",
+            "saml-role-list-mapper",
+            "oidc-usermodel-attribute-mapper",
             "oidc-address-mapper",
+            "oidc-usermodel-property-mapper",
             "saml-user-attribute-mapper"
           ]
         }
@@ -1418,7 +1418,7 @@
   "supportedLocales": [],
   "authenticationFlows": [
     {
-      "id": "fec20812-5cf4-475d-895e-942790f83a05",
+      "id": "a7f91199-178d-4399-8319-5063ffcc37b0",
       "alias": "Account verification options",
       "description": "Method with which to verity the existing account",
       "providerId": "basic-flow",
@@ -1444,7 +1444,7 @@
       ]
     },
     {
-      "id": "be77226c-dab4-44b4-86e8-8c88e74027dd",
+      "id": "602533e3-f7a1-4e25-9a12-f3080eeccec3",
       "alias": "Authentication Options",
       "description": "Authentication options.",
       "providerId": "basic-flow",
@@ -1478,7 +1478,7 @@
       ]
     },
     {
-      "id": "aadf4b09-bb37-4b40-80da-f9fe40ab3d9c",
+      "id": "ba7bcdfd-05c6-4da6-827b-24e3513bddbe",
       "alias": "Browser - Conditional OTP",
       "description": "Flow to determine if the OTP is required for the authentication",
       "providerId": "basic-flow",
@@ -1504,7 +1504,7 @@
       ]
     },
     {
-      "id": "f2084331-5d78-420f-8ff6-97b0a1218b9c",
+      "id": "d0f62327-ef2f-4561-8b5a-1f61faecdac0",
       "alias": "Direct Grant - Conditional OTP",
       "description": "Flow to determine if the OTP is required for the authentication",
       "providerId": "basic-flow",
@@ -1530,7 +1530,7 @@
       ]
     },
     {
-      "id": "7f1c5b3d-7671-4615-8339-fc046ccb3fde",
+      "id": "f10b85d0-26ee-4648-b81b-80213b066d76",
       "alias": "First broker login - Conditional OTP",
       "description": "Flow to determine if the OTP is required for the authentication",
       "providerId": "basic-flow",
@@ -1556,7 +1556,7 @@
       ]
     },
     {
-      "id": "07502848-8395-4810-9eaf-a96369883df2",
+      "id": "d6af4ac0-f6bc-4197-bf01-6e2c321ecaad",
       "alias": "Handle Existing Account",
       "description": "Handle what to do if there is existing account with same email/username like authenticated identity provider",
       "providerId": "basic-flow",
@@ -1582,7 +1582,7 @@
       ]
     },
     {
-      "id": "5bcad4d1-6de7-44b5-b451-9636fc664a63",
+      "id": "501ab743-2e2f-427d-820f-14deed111b08",
       "alias": "Reset - Conditional OTP",
       "description": "Flow to determine if the OTP should be reset or not. Set to REQUIRED to force.",
       "providerId": "basic-flow",
@@ -1608,7 +1608,7 @@
       ]
     },
     {
-      "id": "122aab94-b9b3-47e8-ac4d-98bf8b28ad11",
+      "id": "e02c3a63-a09d-4dde-9f6c-22c95eef8534",
       "alias": "User creation or linking",
       "description": "Flow for the existing/non-existing user alternatives",
       "providerId": "basic-flow",
@@ -1635,7 +1635,7 @@
       ]
     },
     {
-      "id": "163f8867-3660-4f0f-baf7-fe224ea008f9",
+      "id": "c348906d-6266-4e68-937e-8f3d15c66524",
       "alias": "Verify Existing Account by Re-authentication",
       "description": "Reauthentication of existing account",
       "providerId": "basic-flow",
@@ -1661,7 +1661,7 @@
       ]
     },
     {
-      "id": "ad8da048-03bc-4e58-b911-3404749a653e",
+      "id": "cf6ba166-43d5-4687-95c4-0a184ca08885",
       "alias": "browser",
       "description": "browser based authentication",
       "providerId": "basic-flow",
@@ -1703,7 +1703,7 @@
       ]
     },
     {
-      "id": "1bdd3165-a32d-4ba0-827b-5967a24fc114",
+      "id": "87cb4f25-9275-4617-9e95-63adf1ce3ece",
       "alias": "clients",
       "description": "Base authentication for clients",
       "providerId": "client-flow",
@@ -1745,7 +1745,7 @@
       ]
     },
     {
-      "id": "eafdf270-6797-4768-8966-58f5885e3c70",
+      "id": "e75b99c5-c566-4009-b0ba-c73716bed254",
       "alias": "direct grant",
       "description": "OpenID Connect Resource Owner Grant",
       "providerId": "basic-flow",
@@ -1779,7 +1779,7 @@
       ]
     },
     {
-      "id": "67fb6643-3703-455d-89bc-dc6817cf7eec",
+      "id": "8a97380c-0f70-45cb-a7b0-780eb70453ba",
       "alias": "docker auth",
       "description": "Used by Docker clients to authenticate against the IDP",
       "providerId": "basic-flow",
@@ -1797,7 +1797,7 @@
       ]
     },
     {
-      "id": "85c4e0e6-074f-481b-8ebd-4869db18cb9c",
+      "id": "131e0aad-5422-4504-bafc-96be2fa44c34",
       "alias": "first broker login",
       "description": "Actions taken after first broker login with identity provider account, which is not yet linked to any Keycloak account",
       "providerId": "basic-flow",
@@ -1824,7 +1824,7 @@
       ]
     },
     {
-      "id": "fed80f43-0fde-4dc1-ba9b-ae9d151a4434",
+      "id": "e7d4b793-b3c2-4ec3-a2b1-04f7217e8f46",
       "alias": "forms",
       "description": "Username, password, otp and other auth forms.",
       "providerId": "basic-flow",
@@ -1850,7 +1850,7 @@
       ]
     },
     {
-      "id": "4981597f-5e50-4122-9c4e-4580492e7be4",
+      "id": "f59a7688-61a1-4ac9-a13a-03f92e022add",
       "alias": "http challenge",
       "description": "An authentication flow based on challenge-response HTTP Authentication Schemes",
       "providerId": "basic-flow",
@@ -1876,7 +1876,7 @@
       ]
     },
     {
-      "id": "d8110dc0-2614-44b2-acf0-79334e1eae02",
+      "id": "80a7b0f5-abb3-4780-be58-4ed1dc3e50fa",
       "alias": "registration",
       "description": "registration flow",
       "providerId": "basic-flow",
@@ -1895,7 +1895,7 @@
       ]
     },
     {
-      "id": "25c8023e-c7cf-49a7-b0fa-dbbe4c75e247",
+      "id": "f18231cf-b803-493b-9dd6-ee8fa602c861",
       "alias": "registration form",
       "description": "registration form",
       "providerId": "form-flow",
@@ -1937,7 +1937,7 @@
       ]
     },
     {
-      "id": "071196cf-e578-45ff-b858-4849beaf879d",
+      "id": "34ccfce6-1488-4db3-b90e-d98e8d8b2ae6",
       "alias": "reset credentials",
       "description": "Reset credentials for a user if they forgot their password or something",
       "providerId": "basic-flow",
@@ -1979,7 +1979,7 @@
       ]
     },
     {
-      "id": "875bdc07-6ebd-46f6-b7df-2f1aa009bb81",
+      "id": "4468100c-fa83-4c16-8970-d53cb592f93a",
       "alias": "saml ecp",
       "description": "SAML ECP Profile Authentication Flow",
       "providerId": "basic-flow",
@@ -1999,14 +1999,14 @@
   ],
   "authenticatorConfig": [
     {
-      "id": "c58d7bfa-7bab-4ee4-9c85-654db54f5553",
+      "id": "c3bb087e-7fe9-4f13-b1bd-c2d7d1320054",
       "alias": "create unique user config",
       "config": {
         "require.password.update.after.registration": "false"
       }
     },
     {
-      "id": "d2fb0a76-a16c-467f-ac8a-bcc3a8e0c367",
+      "id": "09820d9d-3c12-45f3-bc62-97b53f8a7efe",
       "alias": "review profile config",
       "config": {
         "update.profile.on.first.login": "missing"
@@ -2105,4 +2105,4 @@
   "clientPolicies": {
     "policies": []
   }
-},
+}

From 5122515ebe2c8beb04306b79e04fc7735bef3d66 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Fri, 16 Dec 2022 14:35:16 -0500
Subject: [PATCH 133/173] tweak docs #8932

---
 .../source/container/base-image.rst           |  8 +++----
 doc/sphinx-guides/source/container/index.rst  | 23 ++++++++++---------
 .../source/developers/containers.rst          |  2 ++
 3 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/doc/sphinx-guides/source/container/base-image.rst b/doc/sphinx-guides/source/container/base-image.rst
index b03879f83f8..931c722f91b 100644
--- a/doc/sphinx-guides/source/container/base-image.rst
+++ b/doc/sphinx-guides/source/container/base-image.rst
@@ -23,7 +23,7 @@ Supported Image Tags
 ++++++++++++++++++++
 
 This image is sourced from the main upstream code `repository of the Dataverse software <https://github.com/IQSS/dataverse>`_.
-Development and maintenance of the `image's code <https://github.com/IQSS/dataverse/tree/develop/modules/container-base>`
+Development and maintenance of the `image's code <https://github.com/IQSS/dataverse/tree/develop/modules/container-base>`_
 happens there (again, by the community). Community-supported image tags are based on the two most important
 upstream branches:
 
@@ -49,7 +49,7 @@ The base image provides:
 
 This image is created as a "multi-arch image", see :ref:`below <base-multiarch>`.
 
-It inherits being built on an Ubuntu environment from the upstream
+It inherits (is built on) an Ubuntu environment from the upstream
 `base image of Eclipse Temurin <https://hub.docker.com/_/eclipse-temurin>`_.
 You are free to change the JRE/JDK image to your liking (see below).
 
@@ -75,7 +75,7 @@ Some additional notes, using Maven parameters to change the build and use ...:
   | *Note:* default is ``develop``
 - | ... a different image name and tag: add ``-Dbase.image=name:tag``.
   | *Note:* default is ``gdcc/base:${base.image.tag}``
-- ... a different image registry than *Docker Hub*: add ``-Ddocker.registry=registry.example.org`` (see also
+- ... a different image registry than Docker Hub: add ``-Ddocker.registry=registry.example.org`` (see also
   `DMP docs on registries <https://dmp.fabric8.io/#registry>`__)
 - ... a different Payara version: add ``-Dpayara.version=V.YYYY.R``.
 - | ... a different Temurin JRE version ``A``: add ``-Dtarget.java.version=A`` (i.e. ``11``, ``17``, ...).
@@ -351,4 +351,4 @@ from `run-java-sh recommendations`_.
 
 .. _Pre/postboot script docs: https://docs.payara.fish/community/docs/Technical%20Documentation/Payara%20Micro%20Documentation/Payara%20Micro%20Configuration%20and%20Management/Micro%20Management/Asadmin%20Commands/Pre%20and%20Post%20Boot%20Commands.html
 .. _MicroProfile Config Sources: https://docs.payara.fish/community/docs/Technical%20Documentation/MicroProfile/Config/Overview.html
-.. _run-java-sh recommendations: https://github.com/fabric8io-images/run-java-sh/blob/master/TUNING.md#recommandations
\ No newline at end of file
+.. _run-java-sh recommendations: https://github.com/fabric8io-images/run-java-sh/blob/master/TUNING.md#recommandations
diff --git a/doc/sphinx-guides/source/container/index.rst b/doc/sphinx-guides/source/container/index.rst
index 25d891016ed..92ac94e2cf2 100644
--- a/doc/sphinx-guides/source/container/index.rst
+++ b/doc/sphinx-guides/source/container/index.rst
@@ -1,26 +1,27 @@
 Container Guide
 ===============
 
-**Contents:**
-
-.. toctree::
-
-  base-image
-
-Running Dataverse software in containers is quite different than in a :doc:`standard installation <../installation/prep>`.
+Running the Dataverse software in containers is quite different than in a :doc:`standard installation <../installation/prep>`.
 
 Both approaches have pros and cons. These days, containers are very often used for development and testing,
 but there is an ever rising move toward running applications in the cloud using container technology.
 
 **NOTE:**
-**As the Institute for Quantitative Social Sciences (IQSS) at Harvard is running their installations in the classic
-deployment way, the container support is mostly created and maintained by the Dataverse community on a best-effort
+**As the Institute for Quantitative Social Sciences (IQSS) at Harvard is running a standard, non-containerized installation,
+container support described in this guide is mostly created and maintained by the Dataverse community on a best-effort
 basis.**
 
 This guide is *not* about installation on technology like Docker Swarm, Kubernetes, Rancher or other
 solutions to run containers in production. There is the `Dataverse on K8s project <https://k8s-docs.gdcc.io>`_ for this
-purpose.
+purpose, as mentioned in the :doc:`/developers/containers` section of the Developer Guide.
 
 This guide focuses on describing the container images managed from the main Dataverse repository (again: by the
 community, not IQSS), their features and limitations. Instructions on how to build the images yourself and how to
-develop and extend them further may be found in respective subpages.
\ No newline at end of file
+develop and extend them further are provided.
+
+**Contents:**
+
+.. toctree::
+
+  base-image
+
diff --git a/doc/sphinx-guides/source/developers/containers.rst b/doc/sphinx-guides/source/developers/containers.rst
index 64c7710f0f5..63eff266a4f 100755
--- a/doc/sphinx-guides/source/developers/containers.rst
+++ b/doc/sphinx-guides/source/developers/containers.rst
@@ -9,6 +9,8 @@ The Dataverse Community is exploring the use of Docker, Kubernetes, and other co
 
 The :doc:`testing` section mentions using Docker for integration tests.
 
+See also the :doc:`/container/index`.
+
 .. contents:: |toctitle|
 	:local:
 

From 4ed101364bf3762320e85030d8b6e030127db6b5 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Fri, 16 Dec 2022 15:22:02 -0500
Subject: [PATCH 134/173] cross link some docs #7000

---
 .../source/developers/making-releases.rst             | 11 +++++++++++
 doc/sphinx-guides/source/developers/tips.rst          |  6 ++++++
 2 files changed, 17 insertions(+)

diff --git a/doc/sphinx-guides/source/developers/making-releases.rst b/doc/sphinx-guides/source/developers/making-releases.rst
index 55f5f550dd9..a2575bb5f50 100755
--- a/doc/sphinx-guides/source/developers/making-releases.rst
+++ b/doc/sphinx-guides/source/developers/making-releases.rst
@@ -95,6 +95,8 @@ At this point you can send around the draft release for any final feedback. Link
 
 Make corrections to the draft, if necessary. It will be out of sync with the .md file, but that's ok (`#7988 <https://github.com/IQSS/dataverse/issues/7988>`_ is tracking this).
 
+.. _run-build-create-war:
+
 Run a Build to Create the War File
 ----------------------------------
 
@@ -110,6 +112,15 @@ Click "Save" then "Build Now".
 
 The build number will appear in ``/api/info/version`` (along with the commit mentioned above) from a running installation (e.g. ``{"version":"5.10.1","build":"907-b844672``).
 
+Note that the build number comes from script in an early build step...
+
+.. code-block:: bash
+
+  COMMIT_SHA1=`echo $GIT_COMMIT | cut -c-7`
+  echo "build.number=${BUILD_NUMBER}-${COMMIT_SHA1}" > $WORKSPACE/src/main/java/BuildNumber.properties
+
+... but we can explore alternative methods of specifying the build number, as described in :ref:`auto-custom-build-number`.
+
 Build Installer (dvinstall.zip)
 -------------------------------
 
diff --git a/doc/sphinx-guides/source/developers/tips.rst b/doc/sphinx-guides/source/developers/tips.rst
index 2b15948bd34..bf75a05f84e 100755
--- a/doc/sphinx-guides/source/developers/tips.rst
+++ b/doc/sphinx-guides/source/developers/tips.rst
@@ -58,6 +58,8 @@ From the root of the git repo, run the following command to set the build number
 
 This should update or place a file at ``src/main/java/BuildNumber.properties``.
 
+(See also :ref:`auto-custom-build-number` for other ways of changing the build number.)
+
 Then, from Netbeans, click "Run" and then "Clean and Build Project (dataverse)". After this completes successfully, click "Run" and then "Run Project (dataverse)"
 
 Confirm the Change Was Deployed
@@ -164,6 +166,8 @@ Git on Mac
 
 On a Mac, you won't have git installed unless you have "Command Line Developer Tools" installed but running ``git clone`` for the first time will prompt you to install them.
 
+.. _auto-custom-build-number:
+
 Automation of Custom Build Number on Webpage
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -180,6 +184,8 @@ environment variable (``DATAVERSE_BUILD``) or `one of the other config sources
 You could even override the version itself with the option ``dataverse.version`` in the same way, which is usually
 picked up from a build time source.
 
+See also discussion of version numbers in :ref:`run-build-create-war`.
+
 Sample Data
 -----------
 

From 1ab8f03b246c6f24f3871c2c50e71e6dbc8a2f16 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Fri, 16 Dec 2022 16:22:09 -0500
Subject: [PATCH 135/173] ask devs to use same sphinx version as prod #9074

---
 doc/sphinx-guides/requirements.txt | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/requirements.txt b/doc/sphinx-guides/requirements.txt
index eb9f952d013..028f07d11cb 100755
--- a/doc/sphinx-guides/requirements.txt
+++ b/doc/sphinx-guides/requirements.txt
@@ -1,4 +1,7 @@
-# Necessary workaround for building Sphynx guides with Python 3.10+ versions
+# Developers, please use Python 3.9 or lower to build the guides.
+# For your convenience, a solution for Python 3.10 is provided below
+# but we would prefer that you use the same version of Sphinx
+# (below on the < 3.10 line) that is used to build the production guides.
 Sphinx==3.5.4 ; python_version < '3.10'
 Sphinx==5.3.0 ; python_version >= '3.10'
 

From d02c4ba2c94a784b00f35f3395c4c17cec0b8ca0 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Sun, 18 Dec 2022 22:55:40 +0100
Subject: [PATCH 136/173] ci(ct-base): fix pushing to Docker Hub

The Docker Login Actions do not enable logging in to
docker.io, but a standard registry. By not overriding
the registry, Docker Maven Plugin will use the
one supplied by Docker context (which the authorization
was done for by docker/login-action)
---
 .github/workflows/container_base_push.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/container_base_push.yml b/.github/workflows/container_base_push.yml
index 120f55984dc..8f440151d0c 100644
--- a/.github/workflows/container_base_push.yml
+++ b/.github/workflows/container_base_push.yml
@@ -21,7 +21,6 @@ on:
 
 env:
     IMAGE_TAG: unstable
-    REGISTRY: docker.io
 
 jobs:
     build:
@@ -83,4 +82,4 @@ jobs:
               run: echo "IMAGE_TAG=stable"
             - if: ${{ github.event_name != 'pull_request' }}
               name: Deploy multi-arch base container image to Docker Hub
-              run: mvn -f modules/container-base -Pct deploy -Dbase.image.tag=${{ env.IMAGE_TAG }} -Ddocker.registry=${{ env.REGISTRY }}
+              run: mvn -f modules/container-base -Pct deploy -Dbase.image.tag=${{ env.IMAGE_TAG }}

From e8a019eba1547c8d0dd42b24d62079e731c637e7 Mon Sep 17 00:00:00 2001
From: GPortas <hey@gportas.me>
Date: Mon, 19 Dec 2022 12:46:15 +0000
Subject: [PATCH 137/173] Added: Sphinx guides for OIDC remote users

---
 .../source/developers/remote-users.rst        | 36 +++++++++++++++++--
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/doc/sphinx-guides/source/developers/remote-users.rst b/doc/sphinx-guides/source/developers/remote-users.rst
index a5e51aa5e54..cac67fed9cd 100755
--- a/doc/sphinx-guides/source/developers/remote-users.rst
+++ b/doc/sphinx-guides/source/developers/remote-users.rst
@@ -1,6 +1,6 @@
-====================
-Shibboleth and OAuth
-====================
+==========================
+Shibboleth, OAuth and OIDC
+==========================
 
 .. contents:: |toctitle|
 	:local:
@@ -30,4 +30,34 @@ Now when you go to http://localhost:8080/oauth2/firstLogin.xhtml you should be p
 
 ----
 
+OpenID Connect (OIDC)
+---------------------
+
+If you are working on the OpenID Connect (OIDC) user authentication flow, you do not need to connect to a remote provider (as explained in :doc:`/installation/oidc`) to test this feature. Instead, you can use the available configuration that allows you to run a test Keycloak OIDC identity management service locally through a Docker container.
+
+You can find this configuration in ``conf/keycloak``. There are two options available in this directory to run a Keycloak container: bash script or docker-compose.
+
+To run the container via bash script, execute the following command (Positioned in ``conf/keycloak``):
+
+``./run-keycloak.sh``
+
+The script will create a Keycloak container or restart it if the container was already created and stopped. Once the script is executed, Keycloak should be accessible from http://localhost:8090/
+
+Now load the configuration defined in ``oidc-keycloak-auth-provider.json`` into your Dataverse installation to enable Keycloak as an authentication provider.
+
+``curl -X POST -H 'Content-type: application/json' --upload-file oidc-keycloak-auth-provider.json http://localhost:8080/api/admin/authenticationProviders``
+
+You should see the new provider, called “OIDC-Keycloak“, under “Other options” on the Log In page.
+
+You should be able to log into Keycloak with the following credentials:
+
+- username: keycloakuser
+- password: keycloakuserpassword
+
+In case you want to stop and remove the Keycloak container, just run the other available bash script:
+
+``./rm-keycloak.sh``
+
+----
+
 Previous: :doc:`unf/index` | Next: :doc:`geospatial`

From 15fad0edb821d889e2705469ce5c822de7bc16f3 Mon Sep 17 00:00:00 2001
From: GPortas <hey@gportas.me>
Date: Mon, 19 Dec 2022 13:51:16 +0000
Subject: [PATCH 138/173] Changed: shortened Keycloak usernames and passwords

---
 conf/keycloak/docker-compose.yml                     | 4 ++--
 conf/keycloak/oidc-realm.json                        | 4 ++--
 conf/keycloak/run-keycloak.sh                        | 4 ++--
 doc/sphinx-guides/source/developers/remote-users.rst | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/conf/keycloak/docker-compose.yml b/conf/keycloak/docker-compose.yml
index da2ad19b886..2776f6572df 100644
--- a/conf/keycloak/docker-compose.yml
+++ b/conf/keycloak/docker-compose.yml
@@ -5,8 +5,8 @@ services:
   keycloak:
     image: 'jboss/keycloak:16.1.1'
     environment:
-      - KEYCLOAK_USER=keycloakadmin
-      - KEYCLOAK_PASSWORD=keycloakadminpassword
+      - KEYCLOAK_USER=kcadmin
+      - KEYCLOAK_PASSWORD=kcpassword
       - KEYCLOAK_IMPORT=/tmp/oidc-realm.json
       - KEYCLOAK_LOGLEVEL=DEBUG
     ports:
diff --git a/conf/keycloak/oidc-realm.json b/conf/keycloak/oidc-realm.json
index 9333df4e293..1b77f2b4384 100644
--- a/conf/keycloak/oidc-realm.json
+++ b/conf/keycloak/oidc-realm.json
@@ -366,7 +366,7 @@
   "webAuthnPolicyPasswordlessAcceptableAaguids": [],
   "users": [
     {
-      "username": "keycloakuser",
+      "username": "kcuser",
       "enabled": true,
       "totp": false,
       "emailVerified": true,
@@ -376,7 +376,7 @@
       "credentials": [
         {
           "type": "password",
-          "value": "keycloakuserpassword"
+          "value": "kcpassword"
         }
       ]
     }
diff --git a/conf/keycloak/run-keycloak.sh b/conf/keycloak/run-keycloak.sh
index be229d1a71e..effb37f91b8 100755
--- a/conf/keycloak/run-keycloak.sh
+++ b/conf/keycloak/run-keycloak.sh
@@ -1,8 +1,8 @@
 #!/usr/bin/env bash
 
 DOCKER_IMAGE="jboss/keycloak:16.1.1"
-KEYCLOAK_USER="keycloakadmin"
-KEYCLOAK_PASSWORD="keycloakadminpassword"
+KEYCLOAK_USER="kcadmin"
+KEYCLOAK_PASSWORD="kcpassword"
 KEYCLOAK_PORT=8090
 
 if [ ! "$(docker ps -q -f name=^/keycloak$)" ]; then
diff --git a/doc/sphinx-guides/source/developers/remote-users.rst b/doc/sphinx-guides/source/developers/remote-users.rst
index cac67fed9cd..9d3a788fe57 100755
--- a/doc/sphinx-guides/source/developers/remote-users.rst
+++ b/doc/sphinx-guides/source/developers/remote-users.rst
@@ -51,8 +51,8 @@ You should see the new provider, called “OIDC-Keycloak“, under “Other opti
 
 You should be able to log into Keycloak with the following credentials:
 
-- username: keycloakuser
-- password: keycloakuserpassword
+- username: kcuser
+- password: kcpassword
 
 In case you want to stop and remove the Keycloak container, just run the other available bash script:
 

From 8f9639c57542931aceb7fc81b4d90b19256a79c5 Mon Sep 17 00:00:00 2001
From: Jim Myers <myersjd@umich.edu>
Date: Mon, 19 Dec 2022 12:19:30 -0500
Subject: [PATCH 139/173] Update script name

---
 ...imates.sql => V5.13.0.3__8840-improve-guestbook-estimates.sql} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename src/main/resources/db/migration/{V5.12.0.1__8840-improve-guestbook-estimates.sql => V5.13.0.3__8840-improve-guestbook-estimates.sql} (100%)

diff --git a/src/main/resources/db/migration/V5.12.0.1__8840-improve-guestbook-estimates.sql b/src/main/resources/db/migration/V5.13.0.3__8840-improve-guestbook-estimates.sql
similarity index 100%
rename from src/main/resources/db/migration/V5.12.0.1__8840-improve-guestbook-estimates.sql
rename to src/main/resources/db/migration/V5.13.0.3__8840-improve-guestbook-estimates.sql

From ef7030f7fa321f4ed4086144ca8497ef1dbdb6c4 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Tue, 20 Dec 2022 00:32:38 +0100
Subject: [PATCH 140/173] fix(rserve): use correct setting key in mp-c.props
 for user #7000

---
 src/main/resources/META-INF/microprofile-config.properties | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/resources/META-INF/microprofile-config.properties b/src/main/resources/META-INF/microprofile-config.properties
index 172063dc7c2..1d1952f1c56 100644
--- a/src/main/resources/META-INF/microprofile-config.properties
+++ b/src/main/resources/META-INF/microprofile-config.properties
@@ -12,7 +12,7 @@ dataverse.db.name=dataverse
 # RSERVE
 dataverse.rserve.host=localhost
 dataverse.rserve.port=6311
-dataverse.rserve.username=rserve
+dataverse.rserve.user=rserve
 dataverse.rserve.password=rserve
 dataverse.rserve.tempdir=/tmp/Rserve
 

From a01fd58a1d74e349b4c19b0770a68a5011485f69 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Tue, 20 Dec 2022 00:37:16 +0100
Subject: [PATCH 141/173] fix(rserve): align docs and code for Rserve temporary
 file store #7000

---
 doc/sphinx-guides/source/installation/config.rst           | 4 ++--
 src/main/resources/META-INF/microprofile-config.properties | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst
index a6711d32b74..a233e91c427 100644
--- a/doc/sphinx-guides/source/installation/config.rst
+++ b/doc/sphinx-guides/source/installation/config.rst
@@ -1586,13 +1586,13 @@ variable ``DATAVERSE_RSERVE_PASSWORD``.
 dataverse.rserve.tempdir
 ++++++++++++++++++++++++
 
-Temporary directory used by Rserve (defaults to /tmp/Rserv). Note that this
+Temporary directory used by Rserve (defaults to ``/tmp/Rserv``). Note that this
 location is local to the host on which Rserv is running (specified in
 ``dataverse.rserve.host`` above). When talking to Rserve, Dataverse needs to
 know this location in order to generate absolute path names of the files on the
 other end.
 
-Defaults to ``/tmp``.
+Defaults to ``/tmp/Rserv``.
 
 Can also be set via *MicroProfile Config API* sources, e.g. the environment
 variable ``DATAVERSE_RSERVE_TEMPDIR``.
diff --git a/src/main/resources/META-INF/microprofile-config.properties b/src/main/resources/META-INF/microprofile-config.properties
index 1d1952f1c56..978bc292d67 100644
--- a/src/main/resources/META-INF/microprofile-config.properties
+++ b/src/main/resources/META-INF/microprofile-config.properties
@@ -14,7 +14,7 @@ dataverse.rserve.host=localhost
 dataverse.rserve.port=6311
 dataverse.rserve.user=rserve
 dataverse.rserve.password=rserve
-dataverse.rserve.tempdir=/tmp/Rserve
+dataverse.rserve.tempdir=/tmp/Rserv
 
 # OAI SERVER
 dataverse.oai.server.maxidentifiers=100

From 9edaf595480aeae85185e90d24d06d064bf0dc55 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 20 Dec 2022 10:13:52 -0500
Subject: [PATCH 142/173] add "requirements" and "auxFilesExist" to external
 tools #9153

The use case is an external tool that operates on aux files pulled out
of NetCDF/HDF5 files.
---
 .../root/external-tools/auxFileTool.json      |  26 ++++
 .../source/api/external-tools.rst             |  14 +-
 .../edu/harvard/iq/dataverse/DatasetPage.java |   2 +-
 .../edu/harvard/iq/dataverse/FilePage.java    |  15 ++-
 .../edu/harvard/iq/dataverse/api/TestApi.java |   4 +-
 .../dataverse/externaltools/ExternalTool.java |  22 +++-
 .../ExternalToolServiceBean.java              |  45 ++++++-
 .../V5.13.0.3__9153-extract-metadata.sql      |   1 +
 .../iq/dataverse/api/ExternalToolsIT.java     | 121 ++++++++++++++++++
 .../ExternalToolServiceBeanTest.java          |  68 +++++++++-
 10 files changed, 306 insertions(+), 12 deletions(-)
 create mode 100644 doc/sphinx-guides/source/_static/installation/files/root/external-tools/auxFileTool.json
 create mode 100644 src/main/resources/db/migration/V5.13.0.3__9153-extract-metadata.sql

diff --git a/doc/sphinx-guides/source/_static/installation/files/root/external-tools/auxFileTool.json b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/auxFileTool.json
new file mode 100644
index 00000000000..b188520dabb
--- /dev/null
+++ b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/auxFileTool.json
@@ -0,0 +1,26 @@
+{
+  "displayName": "AuxFileViewer",
+  "description": "Show an auxiliary file from a dataset file.",
+  "toolName": "auxPreviewer",
+  "scope": "file",
+  "types": [
+    "preview"
+  ],
+  "toolUrl": "https://example.com/AuxFileViewer.html",
+  "toolParameters": {
+    "queryParameters": [
+      {
+        "fileid": "{fileId}"
+      }
+    ]
+  },
+  "requirements": {
+    "auxFilesExist": [
+      {
+        "formatTag": "myFormatTag",
+        "formatVersion": "0.1"
+      }
+    ]
+  },
+  "contentType": "application/foobar"
+}
diff --git a/doc/sphinx-guides/source/api/external-tools.rst b/doc/sphinx-guides/source/api/external-tools.rst
index 4f6c9a8015c..eec9944338f 100644
--- a/doc/sphinx-guides/source/api/external-tools.rst
+++ b/doc/sphinx-guides/source/api/external-tools.rst
@@ -53,15 +53,21 @@ External tools must be expressed in an external tool manifest file, a specific J
 Examples of Manifests
 +++++++++++++++++++++
 
-Let's look at two examples of external tool manifests (one at the file level and one at the dataset level) before we dive into how they work.
+Let's look at a few examples of external tool manifests (both at the file level and at the dataset level) before we dive into how they work.
+
+.. _tools-for-files:
 
 External Tools for Files
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
-:download:`fabulousFileTool.json <../_static/installation/files/root/external-tools/fabulousFileTool.json>` is a file level both an "explore" tool and a "preview" tool that operates on tabular files:
+:download:`fabulousFileTool.json <../_static/installation/files/root/external-tools/fabulousFileTool.json>` is a file level (both an "explore" tool and a "preview" tool) that operates on tabular files:
 
 .. literalinclude:: ../_static/installation/files/root/external-tools/fabulousFileTool.json
 
+:download:`auxFileTool.json <../_static/installation/files/root/external-tools/auxFileTool.json>` is a file level preview tool that operates on auxiliary files associated with a data file (note the "requirements" section):
+
+.. literalinclude:: ../_static/installation/files/root/external-tools/auxFileTool.json
+
 External Tools for Datasets
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -113,6 +119,10 @@ Terminology
     allowedApiCalls httpMethod   Which HTTP method the specified callback uses such as ``GET`` or ``POST``.
     
     allowedApiCalls timeOut      For non-public datasets and datafiles, how many minutes the signed URLs given to the tool should be valid for. Must be an integer.
+
+    requirements                 **Resources your tool needs to function.** For now, the only requirement you can specify is that one or more auxiliary files exist (see auxFilesExist in the :ref:`tools-for-files` example). Currently, requirements only apply to preview tools. If the requirements are not met, the preview tool is not shown.
+
+    auxFilesExist                **An array containing formatTag and formatVersion pairs** for each auxiliary file that your tool needs to download to function properly. For example, a required aux file could have a ``formatTag`` of "NcML" and a ``formatVersion`` of "1.0". See also :doc:`/developers/aux-file-support`.
     
     toolName                     A **name** of an external tool that is used to differentiate between external tools and also used in bundle.properties for localization in the Dataverse installation web interface. For example, the toolName for Data Explorer is ``explorer``. For the Data Curation Tool the toolName is ``dct``. This is an optional parameter in the manifest JSON file.   
     ===========================  ==========
diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
index 6e71f6c5042..8bb1167afcd 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
@@ -5490,7 +5490,7 @@ public List<ExternalTool> getCachedToolsForDataFile(Long fileId, ExternalTool.Ty
             return cachedTools;
         }
         DataFile dataFile = datafileService.find(fileId);
-        cachedTools = ExternalToolServiceBean.findExternalToolsByFile(externalTools, dataFile);
+        cachedTools = externalToolService.findExternalToolsByFile(externalTools, dataFile);
         cachedToolsByFileId.put(fileId, cachedTools); //add to map so we don't have to do the lifting again
         return cachedTools;
     }
diff --git a/src/main/java/edu/harvard/iq/dataverse/FilePage.java b/src/main/java/edu/harvard/iq/dataverse/FilePage.java
index 85eb79d2ddc..228db0a7584 100644
--- a/src/main/java/edu/harvard/iq/dataverse/FilePage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/FilePage.java
@@ -39,6 +39,7 @@
 import edu.harvard.iq.dataverse.util.JsfHelper;
 import static edu.harvard.iq.dataverse.util.JsfHelper.JH;
 import edu.harvard.iq.dataverse.util.SystemConfig;
+import edu.harvard.iq.dataverse.util.json.JsonUtil;
 import java.io.IOException;
 import java.time.format.DateTimeFormatter;
 import java.util.ArrayList;
@@ -57,6 +58,9 @@
 import javax.faces.view.ViewScoped;
 import javax.inject.Inject;
 import javax.inject.Named;
+import javax.json.JsonArray;
+import javax.json.JsonObject;
+import javax.json.JsonValue;
 import javax.validation.ConstraintViolation;
 
 import org.primefaces.PrimeFaces;
@@ -125,6 +129,8 @@ public class FilePage implements java.io.Serializable {
     ExternalToolServiceBean externalToolService;
     @EJB
     PrivateUrlServiceBean privateUrlService;
+    @EJB
+    AuxiliaryFileServiceBean auxiliaryFileService;
 
     @Inject
     DataverseRequestServiceBean dvRequestService;
@@ -285,8 +291,15 @@ public void setDatasetVersionId(Long datasetVersionId) {
         this.datasetVersionId = datasetVersionId;
     }
 
+    // findPreviewTools would be a better name
     private List<ExternalTool> sortExternalTools(){
-        List<ExternalTool> retList = externalToolService.findFileToolsByTypeAndContentType(ExternalTool.Type.PREVIEW, file.getContentType());
+        List<ExternalTool> retList = new ArrayList<>();
+        List<ExternalTool> previewTools = externalToolService.findFileToolsByTypeAndContentType(ExternalTool.Type.PREVIEW, file.getContentType());
+        for (ExternalTool previewTool : previewTools) {
+            if (externalToolService.meetsRequirements(previewTool, file)) {
+                retList.add(previewTool);
+            }
+        }
         Collections.sort(retList, CompareExternalToolName);
         return retList;
     }
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java b/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java
index b532fbd4154..42caa95b9f5 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java
@@ -63,7 +63,9 @@ public Response getExternalToolsForFile(@PathParam("id") String idSupplied, @Que
                 ApiToken apiToken = externalToolService.getApiToken(getRequestApiKey());
                 ExternalToolHandler externalToolHandler = new ExternalToolHandler(tool, dataFile, apiToken, dataFile.getFileMetadata(), null);
                 JsonObjectBuilder toolToJson = externalToolService.getToolAsJsonWithQueryParameters(externalToolHandler);
-                tools.add(toolToJson);
+                if (externalToolService.meetsRequirements(tool, dataFile)) {
+                    tools.add(toolToJson);
+                }
             }
             return ok(tools);
         } catch (WrappedResponse wr) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java
index 1789b7a90c3..0a238eb5198 100644
--- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java
+++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java
@@ -39,6 +39,7 @@ public class ExternalTool implements Serializable {
     public static final String CONTENT_TYPE = "contentType";
     public static final String TOOL_NAME = "toolName";
     public static final String ALLOWED_API_CALLS = "allowedApiCalls";
+    public static final String REQUIREMENTS = "requirements";
 
     @Id
     @GeneratedValue(strategy = GenerationType.IDENTITY)
@@ -103,6 +104,15 @@ public class ExternalTool implements Serializable {
     @Column(nullable = true, columnDefinition = "TEXT")
     private String allowedApiCalls;
 
+    /**
+     * When non-null, the tool has indicated that it has certain requirements
+     * that must be met before it should be shown to the user. This
+     * functionality was added for tools that operate on aux files rather than
+     * data files so "auxFilesExist" is one of the possible values.
+     */
+    @Column(nullable = true, columnDefinition = "TEXT")
+    private String requirements;
+
     /**
      * This default constructor is only here to prevent this error at
      * deployment:
@@ -118,10 +128,10 @@ public ExternalTool() {
     }
 
     public ExternalTool(String displayName, String toolName, String description, List<ExternalToolType> externalToolTypes, Scope scope, String toolUrl, String toolParameters, String contentType) {
-       this(displayName, toolName, description, externalToolTypes, scope, toolUrl, toolParameters, contentType, null);
+       this(displayName, toolName, description, externalToolTypes, scope, toolUrl, toolParameters, contentType, null, null);
     }
 
-    public ExternalTool(String displayName, String toolName, String description, List<ExternalToolType> externalToolTypes, Scope scope, String toolUrl, String toolParameters, String contentType, String allowedApiCalls) {
+    public ExternalTool(String displayName, String toolName, String description, List<ExternalToolType> externalToolTypes, Scope scope, String toolUrl, String toolParameters, String contentType, String allowedApiCalls, String requirements) {
         this.displayName = displayName;
         this.toolName = toolName;
         this.description = description;
@@ -131,6 +141,7 @@ public ExternalTool(String displayName, String toolName, String description, Lis
         this.toolParameters = toolParameters;
         this.contentType = contentType;
         this.allowedApiCalls = allowedApiCalls;
+        this.requirements = requirements;
     }
 
     public enum Type {
@@ -326,5 +337,12 @@ public void setAllowedApiCalls(String allowedApiCalls) {
         this.allowedApiCalls = allowedApiCalls;
     }
 
+    public String getRequirements() {
+        return requirements;
+    }
+
+    public void setRequirements(String requirements) {
+        this.requirements = requirements;
+    }
 
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java
index a65ad2427ba..f38cd7301ee 100644
--- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java
@@ -1,5 +1,7 @@
 package edu.harvard.iq.dataverse.externaltools;
 
+import edu.harvard.iq.dataverse.AuxiliaryFile;
+import edu.harvard.iq.dataverse.AuxiliaryFileServiceBean;
 import edu.harvard.iq.dataverse.DataFile;
 import edu.harvard.iq.dataverse.DataFileServiceBean;
 import edu.harvard.iq.dataverse.authorization.users.ApiToken;
@@ -30,6 +32,8 @@
 import static edu.harvard.iq.dataverse.externaltools.ExternalTool.*;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
+import javax.ejb.EJB;
+import javax.json.JsonValue;
 
 @Stateless
 @Named
@@ -40,6 +44,9 @@ public class ExternalToolServiceBean {
     @PersistenceContext(unitName = "VDCNet-ejbPU")
     private EntityManager em;
 
+    @EJB
+    AuxiliaryFileServiceBean auxiliaryFileService;
+
     public List<ExternalTool> findAll() {
         TypedQuery<ExternalTool> typedQuery = em.createQuery("SELECT OBJECT(o) FROM ExternalTool AS o ORDER BY o.id", ExternalTool.class);
         return typedQuery.getResultList();
@@ -133,13 +140,13 @@ public ExternalTool save(ExternalTool externalTool) {
      * file supports The list of tools is passed in so it doesn't hit the
      * database each time
      */
-    public static List<ExternalTool> findExternalToolsByFile(List<ExternalTool> allExternalTools, DataFile file) {
+    public List<ExternalTool> findExternalToolsByFile(List<ExternalTool> allExternalTools, DataFile file) {
         List<ExternalTool> externalTools = new ArrayList<>();
         //Map tabular data to it's mimetype (the isTabularData() check assures that this code works the same as before, but it may need to change if tabular data is split into subtypes with differing mimetypes)
         final String contentType = file.isTabularData() ? DataFileServiceBean.MIME_TYPE_TSV_ALT : file.getContentType();
         allExternalTools.forEach((externalTool) -> {
-            //Match tool and file type 
-            if (contentType.equals(externalTool.getContentType())) {
+            //Match tool and file type, then check requirements
+            if (contentType.equals(externalTool.getContentType()) && meetsRequirements(externalTool, file)) {
                 externalTools.add(externalTool);
             }
         });
@@ -147,6 +154,31 @@ public static List<ExternalTool> findExternalToolsByFile(List<ExternalTool> allE
         return externalTools;
     }
 
+    public boolean meetsRequirements(ExternalTool externalTool, DataFile dataFile) {
+        String requirements = externalTool.getRequirements();
+        if (requirements == null) {
+            logger.fine("Data file id" + dataFile.getId() + ": no requirements for tool id " + externalTool.getId());
+            return true;
+        }
+        boolean meetsRequirements = true;
+        JsonObject requirementsObj = JsonUtil.getJsonObject(requirements);
+        JsonArray auxFilesExist = requirementsObj.getJsonArray("auxFilesExist");
+        for (JsonValue jsonValue : auxFilesExist) {
+            String formatTag = jsonValue.asJsonObject().getString("formatTag");
+            String formatVersion = jsonValue.asJsonObject().getString("formatVersion");
+            AuxiliaryFile auxFile = auxiliaryFileService.lookupAuxiliaryFile(dataFile, formatTag, formatVersion);
+            if (auxFile == null) {
+                logger.fine("Data file id" + dataFile.getId() + ": cannot find required aux file. formatTag=" + formatTag + ". formatVersion=" + formatVersion);
+                meetsRequirements = false;
+                break;
+            } else {
+                logger.fine("Data file id" + dataFile.getId() + ": found required aux file. formatTag=" + formatTag + ". formatVersion=" + formatVersion);
+                meetsRequirements = true;
+            }
+        }
+        return meetsRequirements;
+    }
+
     public static ExternalTool parseAddExternalToolManifest(String manifest) {
 
         if (manifest == null || manifest.isEmpty()) {
@@ -170,6 +202,7 @@ public static ExternalTool parseAddExternalToolManifest(String manifest) {
         JsonObject toolParametersObj = jsonObject.getJsonObject(TOOL_PARAMETERS);
         JsonArray queryParams = toolParametersObj.getJsonArray("queryParameters");
         JsonArray allowedApiCallsArray = jsonObject.getJsonArray(ALLOWED_API_CALLS);
+        JsonObject requirementsObj = jsonObject.getJsonObject(REQUIREMENTS);
  
         boolean allRequiredReservedWordsFound = false;
         if (scope.equals(Scope.FILE)) {
@@ -227,8 +260,12 @@ public static ExternalTool parseAddExternalToolManifest(String manifest) {
         if(allowedApiCallsArray !=null) {
             allowedApiCalls = allowedApiCallsArray.toString();
         }
+        String requirements = null;
+        if (requirementsObj != null) {
+            requirements = requirementsObj.toString();
+        }
 
-        return new ExternalTool(displayName, toolName, description, externalToolTypes, scope, toolUrl, toolParameters, contentType, allowedApiCalls);
+        return new ExternalTool(displayName, toolName, description, externalToolTypes, scope, toolUrl, toolParameters, contentType, allowedApiCalls, requirements);
     }
 
     private static String getRequiredTopLevelField(JsonObject jsonObject, String key) {
diff --git a/src/main/resources/db/migration/V5.13.0.3__9153-extract-metadata.sql b/src/main/resources/db/migration/V5.13.0.3__9153-extract-metadata.sql
new file mode 100644
index 00000000000..48230d21032
--- /dev/null
+++ b/src/main/resources/db/migration/V5.13.0.3__9153-extract-metadata.sql
@@ -0,0 +1 @@
+ALTER TABLE externaltool ADD COLUMN IF NOT EXISTS requirements TEXT;
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java
index 5508a6c57dc..cdebeddb7bc 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java
@@ -3,8 +3,11 @@
 import com.jayway.restassured.RestAssured;
 import com.jayway.restassured.path.json.JsonPath;
 import com.jayway.restassured.response.Response;
+import java.io.File;
 import java.io.IOException;
 import java.io.StringReader;
+import java.nio.file.Path;
+import java.nio.file.Paths;
 import javax.json.Json;
 import javax.json.JsonArray;
 import javax.json.JsonObject;
@@ -442,4 +445,122 @@ public void createToolSpreadsheetViewer() {
                 .statusCode(OK.getStatusCode());
     }
 
+    @Test
+    public void testFileLevelToolWithAuxFileReq() throws IOException {
+
+        // Delete all external tools before testing.
+        Response getTools = UtilIT.getExternalTools();
+        getTools.prettyPrint();
+        getTools.then().assertThat()
+                .statusCode(OK.getStatusCode());
+        String body = getTools.getBody().asString();
+        JsonReader bodyObject = Json.createReader(new StringReader(body));
+        JsonArray tools = bodyObject.readObject().getJsonArray("data");
+        for (int i = 0; i < tools.size(); i++) {
+            JsonObject tool = tools.getJsonObject(i);
+            int id = tool.getInt("id");
+            Response deleteExternalTool = UtilIT.deleteExternalTool(id);
+            deleteExternalTool.prettyPrint();
+        }
+
+        Response createUser = UtilIT.createRandomUser();
+        createUser.prettyPrint();
+        createUser.then().assertThat()
+                .statusCode(OK.getStatusCode());
+        String username = UtilIT.getUsernameFromResponse(createUser);
+        String apiToken = UtilIT.getApiTokenFromResponse(createUser);
+
+        Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken);
+        createDataverseResponse.prettyPrint();
+        createDataverseResponse.then().assertThat()
+                .statusCode(CREATED.getStatusCode());
+
+        String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse);
+
+        Response createDataset = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken);
+        createDataset.prettyPrint();
+        createDataset.then().assertThat()
+                .statusCode(CREATED.getStatusCode());
+
+        Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset);
+
+        // Not really an HDF5 file. Just random bytes. But the file extension makes it detected as HDF5.
+        Path pathToFalseHdf5 = Paths.get(java.nio.file.Files.createTempDirectory(null) + File.separator + "false.hdf5");
+        byte[] bytes = {1, 2, 3, 4, 5};
+        java.nio.file.Files.write(pathToFalseHdf5, bytes);
+
+        Response uploadFalseHdf5 = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFalseHdf5.toString(), apiToken);
+        uploadFalseHdf5.prettyPrint();
+        uploadFalseHdf5.then().assertThat()
+                .statusCode(OK.getStatusCode());
+
+        Integer falseHdf5 = JsonPath.from(uploadFalseHdf5.getBody().asString()).getInt("data.files[0].dataFile.id");
+
+        String pathToTrueHdf5 = "src/test/resources/hdf/hdf5/vlen_string_dset";
+        Response uploadTrueHdf5 = UtilIT.uploadFileViaNative(datasetId.toString(), pathToTrueHdf5, apiToken);
+        uploadTrueHdf5.prettyPrint();
+        uploadTrueHdf5.then().assertThat()
+                .statusCode(OK.getStatusCode());
+
+        Integer trueHdf5 = JsonPath.from(uploadTrueHdf5.getBody().asString()).getInt("data.files[0].dataFile.id");
+
+        JsonObjectBuilder job = Json.createObjectBuilder();
+        job.add("displayName", "HDF5 Tool");
+        job.add("description", "Operates on HDF5 files");
+        job.add("types", Json.createArrayBuilder().add("preview"));
+        job.add("scope", "file");
+        job.add("contentType", "application/x-hdf5");
+        job.add("toolUrl", "/dataexplore/dataverse-previewers/previewers/v1.3/TextPreview.html");
+        job.add("toolParameters", Json.createObjectBuilder()
+                .add("queryParameters", Json.createArrayBuilder()
+                        .add(Json.createObjectBuilder()
+                                .add("fileid", "{fileId}")
+                                .build())
+                        .add(Json.createObjectBuilder()
+                                .add("siteUrl", "{siteUrl}")
+                                .build())
+                        .add(Json.createObjectBuilder()
+                                .add("key", "{apiToken}")
+                                .build())
+                        .build())
+                .build());
+        job.add("requirements", Json.createObjectBuilder()
+                .add("auxFilesExist", Json.createArrayBuilder()
+                        .add(Json.createObjectBuilder()
+                                .add("formatTag", "NcML")
+                                .add("formatVersion", "0.1")
+                        )
+                )
+        );
+        Response addExternalTool = UtilIT.addExternalTool(job.build());
+        addExternalTool.prettyPrint();
+        addExternalTool.then().assertThat()
+                .statusCode(OK.getStatusCode())
+                .body("data.displayName", CoreMatchers.equalTo("HDF5 Tool"));
+
+        long toolId = JsonPath.from(addExternalTool.getBody().asString()).getLong("data.id");
+
+        Response getTool = UtilIT.getExternalTool(toolId);
+        getTool.prettyPrint();
+        getTool.then().assertThat()
+                .body("data.scope", CoreMatchers.equalTo("file"))
+                .statusCode(OK.getStatusCode());
+
+        // No tools for false HDF5 file. Aux file couldn't be extracted. Doesn't meet requirements.
+        Response getToolsForFalseHdf5 = UtilIT.getExternalToolsForFile(falseHdf5.toString(), "preview", apiToken);
+        getToolsForFalseHdf5.prettyPrint();
+        getToolsForFalseHdf5.then().assertThat()
+                .statusCode(OK.getStatusCode())
+                .body("data", Matchers.hasSize(0));
+
+        // The tool shows for a true HDF5 file. The NcML aux file is available. Requirements met.
+        Response getToolsForTrueHdf5 = UtilIT.getExternalToolsForFile(trueHdf5.toString(), "preview", apiToken);
+        getToolsForTrueHdf5.prettyPrint();
+        getToolsForTrueHdf5.then().assertThat()
+                .statusCode(OK.getStatusCode())
+                .body("data[0].displayName", CoreMatchers.equalTo("HDF5 Tool"))
+                .body("data[0].scope", CoreMatchers.equalTo("file"))
+                .body("data[0].contentType", CoreMatchers.equalTo("application/x-hdf5"));
+    }
+
 }
diff --git a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java
index 74e10d67352..631c22d959b 100644
--- a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java
@@ -19,7 +19,10 @@
 
 public class ExternalToolServiceBeanTest {
 
+    private final ExternalToolServiceBean externalToolService;
+
     public ExternalToolServiceBeanTest() {
+        this.externalToolService = new ExternalToolServiceBean();
     }
 
     @Test
@@ -49,7 +52,7 @@ public void testfindAll() {
         ExternalToolHandler externalToolHandler4 = new ExternalToolHandler(externalTool, dataFile, apiToken, fmd, null);
         List<ExternalTool> externalTools = new ArrayList<>();
         externalTools.add(externalTool);
-        List<ExternalTool> availableExternalTools = ExternalToolServiceBean.findExternalToolsByFile(externalTools, dataFile);
+        List<ExternalTool> availableExternalTools = externalToolService.findExternalToolsByFile(externalTools, dataFile);
         assertEquals(availableExternalTools.size(), 1);
     }
 
@@ -544,4 +547,67 @@ protected static ExternalTool getAllowedApiCallsTool() {
 
         return ExternalToolServiceBean.parseAddExternalToolManifest(tool);
     }
+
+    @Test
+    public void testParseAddFileToolRequireAuxFile() {
+        JsonObjectBuilder job = Json.createObjectBuilder();
+        job.add("displayName", "AwesomeTool");
+        job.add("toolName", "explorer");
+        job.add("description", "This tool is awesome.");
+        job.add("types", Json.createArrayBuilder().add("explore"));
+        job.add("scope", "file");
+        job.add("hasPreviewMode", "false");
+        job.add("toolUrl", "http://awesometool.com");
+        job.add("toolParameters", Json.createObjectBuilder()
+                .add("queryParameters", Json.createArrayBuilder()
+                        .add(Json.createObjectBuilder()
+                                .add("filePid", "{filePid}")
+                                .build())
+                        .add(Json.createObjectBuilder()
+                                .add("key", "{apiToken}")
+                                .build())
+                        .add(Json.createObjectBuilder()
+                                .add("fileMetadataId", "{fileMetadataId}")
+                                .build())
+                        .add(Json.createObjectBuilder()
+                                .add("dvLocale", "{localeCode}")
+                                .build())
+                        .build())
+                .build());
+        job.add("requirements", Json.createObjectBuilder()
+                .add("auxFilesExist", Json.createArrayBuilder()
+                        .add(Json.createObjectBuilder()
+                                .add("formatTag", "NcML")
+                                .add("formatVersion", "0.1")
+                        )
+                )
+        );
+        job.add(ExternalTool.CONTENT_TYPE, DataFileServiceBean.MIME_TYPE_TSV_ALT);
+        String tool = job.build().toString();
+        ExternalTool externalTool = ExternalToolServiceBean.parseAddExternalToolManifest(tool);
+        assertEquals("AwesomeTool", externalTool.getDisplayName());
+        assertEquals("explorer", externalTool.getToolName());
+        assertEquals("{\"auxFilesExist\":[{\"formatTag\":\"NcML\",\"formatVersion\":\"0.1\"}]}", externalTool.getRequirements());
+        /*
+        DataFile dataFile = new DataFile();
+        dataFile.setId(42l);
+        dataFile.setGlobalId(new GlobalId("doi:10.5072/FK2/RMQT6J/G9F1A1"));
+        FileMetadata fmd = new FileMetadata();
+        fmd.setId(2L);
+        DatasetVersion dv = new DatasetVersion();
+        Dataset ds = new Dataset();
+        dv.setDataset(ds);
+        fmd.setDatasetVersion(dv);
+        List<FileMetadata> fmdl = new ArrayList<FileMetadata>();
+        fmdl.add(fmd);
+        dataFile.setFileMetadatas(fmdl);
+        ApiToken apiToken = new ApiToken();
+        apiToken.setTokenString("7196b5ce-f200-4286-8809-03ffdbc255d7");
+        ExternalToolHandler externalToolHandler = new ExternalToolHandler(externalTool, dataFile, apiToken, fmd, "fr");
+        String toolUrl = externalToolHandler.getToolUrlWithQueryParams();
+        System.out.println("result: " + toolUrl);
+        assertEquals("http://awesometool.com?filePid=doi:10.5072/FK2/RMQT6J/G9F1A1&key=7196b5ce-f200-4286-8809-03ffdbc255d7&fileMetadataId=2&dvLocale=fr", toolUrl);
+*/
+    }
+
 }

From fcfd9bfae3b66bd3073273e3f788288382304e77 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Tue, 20 Dec 2022 16:17:49 +0100
Subject: [PATCH 143/173] style(solr): remove unused MPCONFIG import #7000

As we are now using JvmSettings everywhere as an abstraction above
MPCONFIG, no need to import on our own.
---
 .../iq/dataverse/search/SolrClientService.java       | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrClientService.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrClientService.java
index 70483853979..0dc2fe08b54 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/SolrClientService.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrClientService.java
@@ -7,17 +7,16 @@
 
 import edu.harvard.iq.dataverse.settings.JvmSettings;
 import edu.harvard.iq.dataverse.util.SystemConfig;
-import java.io.IOException;
-import java.util.logging.Logger;
+import org.apache.solr.client.solrj.SolrClient;
+import org.apache.solr.client.solrj.impl.HttpSolrClient;
+
 import javax.annotation.PostConstruct;
 import javax.annotation.PreDestroy;
 import javax.ejb.EJB;
 import javax.ejb.Singleton;
 import javax.inject.Named;
-import org.apache.solr.client.solrj.SolrClient;
-import org.apache.solr.client.solrj.impl.HttpSolrClient;
-import org.eclipse.microprofile.config.Config;
-import org.eclipse.microprofile.config.ConfigProvider;
+import java.io.IOException;
+import java.util.logging.Logger;
 
 /**
  *
@@ -33,7 +32,6 @@
 @Singleton
 public class SolrClientService {
     private static final Logger logger = Logger.getLogger(SolrClientService.class.getCanonicalName());
-    private static final Config config = ConfigProvider.getConfig();
     
     @EJB
     SystemConfig systemConfig;

From d2e14f06bb57da8a9ec8383b178c7bca8f94d148 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 20 Dec 2022 10:18:49 -0500
Subject: [PATCH 144/173] remove cruft #9153

---
 .../ExternalToolServiceBeanTest.java          | 20 -------------------
 1 file changed, 20 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java
index 631c22d959b..3885c9b358c 100644
--- a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java
@@ -588,26 +588,6 @@ public void testParseAddFileToolRequireAuxFile() {
         assertEquals("AwesomeTool", externalTool.getDisplayName());
         assertEquals("explorer", externalTool.getToolName());
         assertEquals("{\"auxFilesExist\":[{\"formatTag\":\"NcML\",\"formatVersion\":\"0.1\"}]}", externalTool.getRequirements());
-        /*
-        DataFile dataFile = new DataFile();
-        dataFile.setId(42l);
-        dataFile.setGlobalId(new GlobalId("doi:10.5072/FK2/RMQT6J/G9F1A1"));
-        FileMetadata fmd = new FileMetadata();
-        fmd.setId(2L);
-        DatasetVersion dv = new DatasetVersion();
-        Dataset ds = new Dataset();
-        dv.setDataset(ds);
-        fmd.setDatasetVersion(dv);
-        List<FileMetadata> fmdl = new ArrayList<FileMetadata>();
-        fmdl.add(fmd);
-        dataFile.setFileMetadatas(fmdl);
-        ApiToken apiToken = new ApiToken();
-        apiToken.setTokenString("7196b5ce-f200-4286-8809-03ffdbc255d7");
-        ExternalToolHandler externalToolHandler = new ExternalToolHandler(externalTool, dataFile, apiToken, fmd, "fr");
-        String toolUrl = externalToolHandler.getToolUrlWithQueryParams();
-        System.out.println("result: " + toolUrl);
-        assertEquals("http://awesometool.com?filePid=doi:10.5072/FK2/RMQT6J/G9F1A1&key=7196b5ce-f200-4286-8809-03ffdbc255d7&fileMetadataId=2&dvLocale=fr", toolUrl);
-*/
     }
 
 }

From 6ef6f07cd1378f6991d3ac77a570cc8b5efc8b2c Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Tue, 20 Dec 2022 16:19:15 +0100
Subject: [PATCH 145/173] refactor(solr): remove unnecessary injections and
 imports from SolrIndexServiceBean

---
 .../harvard/iq/dataverse/search/SolrIndexServiceBean.java   | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java
index ef4422e8d89..5856004ce53 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java
@@ -10,9 +10,7 @@
 import edu.harvard.iq.dataverse.DvObject;
 import edu.harvard.iq.dataverse.DvObjectServiceBean;
 import edu.harvard.iq.dataverse.FileMetadata;
-import edu.harvard.iq.dataverse.util.SystemConfig;
 import java.io.IOException;
-import java.sql.Timestamp;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
@@ -36,9 +34,7 @@
 public class SolrIndexServiceBean {
 
     private static final Logger logger = Logger.getLogger(SolrIndexServiceBean.class.getCanonicalName());
-
-    @EJB
-    SystemConfig systemConfig;
+    
     @EJB
     DvObjectServiceBean dvObjectService;
     @EJB

From 0956ba1b7ff2a833b286783809252c2fc40f83f6 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Tue, 20 Dec 2022 10:44:38 -0500
Subject: [PATCH 146/173] Fix merge issues per review

---
 .../iq/dataverse/datasetutility/AddReplaceFileHelper.java     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
index ccd2245d12b..df27ce26f95 100644
--- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
+++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
@@ -2042,7 +2042,7 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) {
 
             if (filesJson != null) {
                 totalNumberofFiles = filesJson.getValuesAs(JsonObject.class).size();
-                workingVersion = dataset.getCreateOrEditVersion();
+                workingVersion = dataset.getOrCreateEditVersion();
                 clone = workingVersion.cloneDatasetVersion();
                 for (JsonObject fileJson : filesJson.getValuesAs(JsonObject.class)) {
 
@@ -2194,7 +2194,7 @@ public Response replaceFiles(String jsonData, Dataset ds, User authUser) {
 
             if (filesJson != null) {
                 totalNumberofFiles = filesJson.getValuesAs(JsonObject.class).size();
-                workingVersion = dataset.getEditVersion();
+                workingVersion = dataset.getOrCreateEditVersion();
                 clone = workingVersion.cloneDatasetVersion();
                 for (JsonObject fileJson : filesJson.getValuesAs(JsonObject.class)) {
                     boolean forceReplace = false;

From 5fe65b995078347710d4a4e747ab90baa5ea8d03 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 20 Dec 2022 15:57:04 -0500
Subject: [PATCH 147/173] document ongoing security and practices #3215

---
 .github/SECURITY.md                           |  7 ++++
 doc/sphinx-guides/source/developers/index.rst |  1 +
 .../source/developers/security.rst            | 34 +++++++++++++++++++
 doc/sphinx-guides/source/index.rst            |  2 +-
 .../source/installation/config.rst            | 25 ++++++++++++++
 5 files changed, 68 insertions(+), 1 deletion(-)
 create mode 100644 .github/SECURITY.md
 create mode 100755 doc/sphinx-guides/source/developers/security.rst

diff --git a/.github/SECURITY.md b/.github/SECURITY.md
new file mode 100644
index 00000000000..c36e26c8330
--- /dev/null
+++ b/.github/SECURITY.md
@@ -0,0 +1,7 @@
+# Security
+
+To report a security vulnerability please email security@dataverse.org as explained at https://guides.dataverse.org/en/latest/installation/config.html#reporting-security-issues
+
+Advice on securing your installation can be found at https://guides.dataverse.org/en/latest/installation/config.html#securing-your-installation
+
+Security practices and procedures used by the Dataverse team are described at https://guides.dataverse.org/en/latest/developers/security.html
diff --git a/doc/sphinx-guides/source/developers/index.rst b/doc/sphinx-guides/source/developers/index.rst
index bf525422c84..6f93cf75d51 100755
--- a/doc/sphinx-guides/source/developers/index.rst
+++ b/doc/sphinx-guides/source/developers/index.rst
@@ -19,6 +19,7 @@ Developer Guide
    sql-upgrade-scripts
    testing
    documentation
+   security
    dependencies
    debugging
    coding-style
diff --git a/doc/sphinx-guides/source/developers/security.rst b/doc/sphinx-guides/source/developers/security.rst
new file mode 100755
index 00000000000..09b80a4c840
--- /dev/null
+++ b/doc/sphinx-guides/source/developers/security.rst
@@ -0,0 +1,34 @@
+========
+Security
+========
+
+This section describes security practices and procedures for the Dataverse team.
+
+.. contents:: |toctitle|
+	:local:
+
+Intake of Security Issues
+-------------------------
+
+As described under :ref:`reporting-security-issues`, we encourage the community to email security@dataverse.org if they have any security concerns. These emails go into our private ticket tracker (RT_).
+
+.. _RT: https://help.hmdc.harvard.edu
+
+We use a private GitHub issue tracker at https://github.com/IQSS/dataverse-security/issues for security issues.
+
+Sending Security Notices
+------------------------
+
+When drafting the security notice, it might be helpful to look at `previous examples`_.
+
+.. _previous examples: https://drive.google.com/drive/folders/0B_qMYwdHFZghaDZIU2hWQnBDZVE?resourcekey=0-SYjuhCohAIM7_pmysVc3Xg&usp=sharing
+
+Gather email addresses from the following sources (these are also described under :ref:`ongoing-security` in the Installation Guide):
+
+- "contact_email" in the `public installation spreadsheet`_
+- "Other Security Contacts" in the `private installation spreadsheet`_
+
+Once you have the emails, include them as bcc.
+
+.. _public installation spreadsheet: https://docs.google.com/spreadsheets/d/1bfsw7gnHlHerLXuk7YprUT68liHfcaMxs1rFciA-mEo/edit#gid=0
+.. _private installation spreadsheet: https://docs.google.com/spreadsheets/d/1EWDwsj6eptQ7nEr-loLvdU7I6Tm2ljAplfNSVWR42i0/edit?usp=sharing
diff --git a/doc/sphinx-guides/source/index.rst b/doc/sphinx-guides/source/index.rst
index 0cd01b8a5a7..cdc15ac50e0 100755
--- a/doc/sphinx-guides/source/index.rst
+++ b/doc/sphinx-guides/source/index.rst
@@ -70,7 +70,7 @@ The support email address is `support@dataverse.org <mailto:support@dataverse.or
 Report bugs and add feature requests in `GitHub Issues <https://github.com/IQSS/dataverse/issues>`__
 or use `GitHub pull requests <http://guides.dataverse.org/en/latest/developers/version-control.html#how-to-make-a-pull-request>`__,
 if you have some code, scripts or documentation that you'd like to share.
-If you have a **security issue** to report, please email `security@dataverse.org <mailto:security@dataverse.org>`__.
+If you have a **security issue** to report, please email `security@dataverse.org <mailto:security@dataverse.org>`__. See also :ref:`reporting-security-issues`.
 
 
 Indices and Tables
diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst
index c21313a6121..cda679c4b39 100644
--- a/doc/sphinx-guides/source/installation/config.rst
+++ b/doc/sphinx-guides/source/installation/config.rst
@@ -101,6 +101,31 @@ Password complexity rules for "builtin" accounts can be adjusted with a variety
 - :ref:`:PVGoodStrength`
 - :ref:`:PVCustomPasswordResetAlertMessage`
 
+.. _ongoing-security:
+
+Ongoing Security of Your Installation
++++++++++++++++++++++++++++++++++++++
+
+Like any application, you should keep up-to-date with patches to both the Dataverse software and the platform (usually Linux) it runs on. Dataverse releases are announced on the dataverse-community_ mailing list, the Dataverse blog_, and in chat.dataverse.org_.
+
+.. _dataverse-community: https://groups.google.com/g/dataverse-community
+.. _blog: https://dataverse.org/blog
+.. _chat.dataverse.org: https://chat.dataverse.org
+
+In addition to these public channels, you can subscribe to receive security notices via email from the Dataverse team. These notices are sent to the ``contact_email`` in the installation spreadsheet_ and you can open an issue in the dataverse-installations_ repo to add or change the contact email. Security notices are also sent to people and organizations that prefer to remain anonymous. To be added to this private list, please email support@dataverse.org.
+
+.. _spreadsheet: https://docs.google.com/spreadsheets/d/1bfsw7gnHlHerLXuk7YprUT68liHfcaMxs1rFciA-mEo/edit#gid=0
+.. _dataverse-installations: https://github.com/IQSS/dataverse-installations
+
+For additional details about security practices by the Dataverse team, see the :doc:`/developers/security` section of the Developer Guide.
+
+.. _reporting-security-issues:
+
+Reporting Security Issues
++++++++++++++++++++++++++
+
+If you have a security issue to report, please email it to security@dataverse.org.
+
 .. _network-ports:
 
 Network Ports

From 6a1984da36e9e98a75c0b49f2e067461a3e07961 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Wed, 21 Dec 2022 08:00:05 +0100
Subject: [PATCH 148/173] fix(rserve): avoid disfunctional JSF page when
 invalid Rserve port #7000

When an invalid port was set, which cannot be transformed to an integer,
the JSF page would kind of freeze: it leaves the user on the grayed out
"action" screen and if you click on it, shows upload page with no files,
so a silent error.

By defaulting to 6311 also for this case and logging an error, this
should improve the UX, but leave a hint for an admin in the logs.
---
 doc/sphinx-guides/source/installation/config.rst         | 2 +-
 .../tabulardata/impl/plugins/rdata/RDATAFileReader.java  | 9 ++++++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst
index 43c33dc0868..421777607c0 100644
--- a/doc/sphinx-guides/source/installation/config.rst
+++ b/doc/sphinx-guides/source/installation/config.rst
@@ -1587,7 +1587,7 @@ dataverse.rserve.port
 Port number for Rserve, used for tasks that require use of R (to ingest RData
 files and to save tabular data as RData frames).
 
-Defaults to ``6311``.
+Defaults to ``6311`` when not configured or no valid integer.
 
 Can also be set via *MicroProfile Config API* sources, e.g. the environment
 variable ``DATAVERSE_RSERVE_PORT``.
diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java
index 1ec0c389049..6d17a5bd553 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java
@@ -438,7 +438,14 @@ public RDATAFileReader(TabularDataFileReaderSpi originator) {
     // ready to be overridden by a sysadmin. Every time a file would be read with this file reader,
     // a new reader will be created, reading from the cached config source settings with minimal overhead.
     this.RSERVE_HOST = JvmSettings.RSERVE_HOST.lookup();
-    this.RSERVE_PORT = JvmSettings.RSERVE_PORT.lookup(Integer.class);
+    int port;
+    try {
+      port = JvmSettings.RSERVE_PORT.lookup(Integer.class);
+    } catch (IllegalArgumentException e) {
+      LOG.log(Level.SEVERE, "Could not parse value for " + JvmSettings.RSERVE_PORT.getScopedKey() + ", defaulting to 6311", e);
+      port = 6311;
+    }
+    this.RSERVE_PORT = port;
     this.RSERVE_USER = JvmSettings.RSERVE_USER.lookup();
     this.RSERVE_PASSWORD = JvmSettings.RSERVE_PASSWORD.lookup();
 

From f1a84b4ff2cffe033bdb85a07f9b24d9d500dab8 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Wed, 21 Dec 2022 08:11:19 +0100
Subject: [PATCH 149/173] fix(metadata): remove typos from CodeMeta files #7844

---
 scripts/api/data/metadatablocks/codemeta.tsv      | 2 +-
 src/main/java/propertyFiles/codeMeta20.properties | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/api/data/metadatablocks/codemeta.tsv b/scripts/api/data/metadatablocks/codemeta.tsv
index b65cf56b1af..a5c50368b75 100644
--- a/scripts/api/data/metadatablocks/codemeta.tsv
+++ b/scripts/api/data/metadatablocks/codemeta.tsv
@@ -3,7 +3,7 @@
 #datasetField	name	title	description	watermark	fieldType	displayOrder	displayFormat	advancedSearchField	allowControlledVocabulary	allowmultiples	facetable	displayoncreate	required	parent	metadatablock_id	termURI
 	codeVersion	Software Version	Version of the software instance, usually following some convention like SemVer etc.	e.g. 0.2.1 or 1.3 or 2021.1 etc	text	0	#VALUE	TRUE	FALSE	FALSE	TRUE	TRUE	FALSE		codeMeta20	https://schema.org/softwareVersion
 	developmentStatus	Development Status	Description of development status, e.g. work in progress (wip), active, etc. See repostatus.org for more information.		text	1	<a href='https://www.repostatus.org/##VALUE'><img src='https://www.repostatus.org/badges/latest/#VALUE.svg' alt='#VALUE '/></a>	TRUE	TRUE	FALSE	TRUE	FALSE	FALSE		codeMeta20	https://www.repostatus.org
-	codeRepository	Code Repository	Link to the repository where the un-compiled, human readable code and related code is located (SVN, GitHub, CodePlex, institutional GitLab instance, Gitea, etc.).	e.g. https://github.com/user/project	url	2	<a href="#VALUE" target="_blank" rel="noopener">#VALUE</a>	TRUE	FALSE	TRUE	FALSE	TRUE	FALSE		codeMeta20	https://schema.org/codeRepository
+	codeRepository	Code Repository	Link to the repository where the un-compiled, human-readable code and related code is located (SVN, GitHub, CodePlex, institutional GitLab instance, Gitea, etc.).	e.g. https://github.com/user/project	url	2	<a href="#VALUE" target="_blank" rel="noopener">#VALUE</a>	TRUE	FALSE	TRUE	FALSE	TRUE	FALSE		codeMeta20	https://schema.org/codeRepository
 	applicationCategory	Application Category	Type of software application, e.g. Simulation, Analysis, Visualisation.		text	3	#VALUE	TRUE	FALSE	TRUE	TRUE	TRUE	FALSE		codeMeta20	https://schema.org/applicationCategory
 	applicationSubCategory	Application Subcategory	Subcategory of the application, e.g. Arcade Game.		text	4	#VALUE	TRUE	FALSE	TRUE	TRUE	FALSE	FALSE		codeMeta20	https://schema.org/applicationSubCategory
 	programmingLanguage	Programming Language	The programming language(s) used to implement the software (e.g. Python, C++, Matlab, Fortran, Java, Julia,...)		text	5	#VALUE	TRUE	FALSE	TRUE	TRUE	TRUE	FALSE		codeMeta20	https://schema.org/programmingLanguage
diff --git a/src/main/java/propertyFiles/codeMeta20.properties b/src/main/java/propertyFiles/codeMeta20.properties
index 92153ccb10a..c0e7eac6d4a 100644
--- a/src/main/java/propertyFiles/codeMeta20.properties
+++ b/src/main/java/propertyFiles/codeMeta20.properties
@@ -7,9 +7,9 @@ datasetfieldtype.developmentStatus.title=Development Status
 datasetfieldtype.developmentStatus.description=Description of development status, e.g. work in progress (wip), active, etc. See repostatus.org for more information.
 datasetfieldtype.developmentStatus.watermark=                                                                                                                       Development Status
 datasetfieldtype.codeRepository.title=Code Repository
-datasetfieldtype.codeRepository.description=Link to the repository where the un-compiled, human readable code and related code is located (SVN, GitHub, CodePlex, institutional GitLab instance, Gitea, etc.).
+datasetfieldtype.codeRepository.description=Link to the repository where the un-compiled, human-readable code and related code is located (SVN, GitHub, CodePlex, institutional GitLab instance, Gitea, etc.).
 datasetfieldtype.codeRepository.watermark=e.g. https://github.com/user/project
-datasetfieldtype.applicationCategory.title=	Application Category
+datasetfieldtype.applicationCategory.title=Application Category
 datasetfieldtype.applicationCategory.description=Type of software application, e.g. Simulation, Analysis, Visualisation.
 datasetfieldtype.applicationCategory.watermark=
 datasetfieldtype.applicationSubCategory.title=Application Subcategory

From 230440ded0eb350c01952d12ce9346976420f3b0 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Wed, 21 Dec 2022 11:57:17 -0500
Subject: [PATCH 150/173] explain necessary config #9095

---
 doc/sphinx-guides/source/installation/config.rst     | 6 ++++++
 doc/sphinx-guides/source/user/dataset-management.rst | 2 ++
 2 files changed, 8 insertions(+)

diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst
index 421777607c0..b8451baafeb 100644
--- a/doc/sphinx-guides/source/installation/config.rst
+++ b/doc/sphinx-guides/source/installation/config.rst
@@ -2730,6 +2730,7 @@ The URL for your Repository Storage Abstraction Layer (RSAL) installation. This
 This setting controls which upload methods are available to users of your Dataverse installation. The following upload methods are available:
 
 - ``native/http``: Corresponds to "Upload with HTTP via your browser" and APIs that use HTTP (SWORD and native).
+- ``dvwebloader``: Corresponds to :ref:`folder-upload`. Note that ``dataverse.files.<id>.download-redirect`` must be set to "true" on an S3 store for this method to show up in the UI. In addition, :ref:`:WebloaderUrl` must be set.
 - ``dcm/rsync+ssh``: Corresponds to "Upload with rsync+ssh via Data Capture Module (DCM)". A lot of setup is required, as explained in the :doc:`/developers/big-data-support` section of the Developer Guide.
 
 Out of the box only ``native/http`` is enabled and will work without further configuration. To add multiple upload method, separate them using a comma like this:
@@ -3231,6 +3232,11 @@ The interval in seconds between Dataverse calls to Globus to check on upload pro
 
 A true/false option to add a Globus transfer option to the file download menu which is not yet fully supported in the dataverse-globus app. See :ref:`globus-support` for details.
 
+.. _:WebloaderUrl:
 
+:WebloaderUrl
++++++++++++++
+
+The URL for main HTML file in https://github.com/gdcc/dvwebloader when that app is deployed. See also :ref:`:UploadMethods` for another required settings.
 
 .. _supported MicroProfile Config API source: https://docs.payara.fish/community/docs/Technical%20Documentation/MicroProfile/Config/Overview.html
diff --git a/doc/sphinx-guides/source/user/dataset-management.rst b/doc/sphinx-guides/source/user/dataset-management.rst
index 685e3e2e1b4..5e179115f64 100755
--- a/doc/sphinx-guides/source/user/dataset-management.rst
+++ b/doc/sphinx-guides/source/user/dataset-management.rst
@@ -93,6 +93,8 @@ Dropbox Upload
 
 Some Dataverse installations support the ability to upload files directly from Dropbox. To do so, click the "Upload from Dropbox" button, log in to Dropbox in the pop-up window, and select the files you'd like to transfer over.
 
+.. _folder-upload:
+
 Folder Upload
 -------------
 

From 4cdf9a89294ae4f0cfbfae451549fa4630775d4d Mon Sep 17 00:00:00 2001
From: qqmyers <jim.myers@computer.org>
Date: Wed, 21 Dec 2022 14:05:14 -0500
Subject: [PATCH 151/173] Apply suggestions from code review

Co-authored-by: Philip Durbin <philipdurbin@gmail.com>
---
 doc/release-notes/9096-folder-upload.md                       | 2 +-
 doc/sphinx-guides/source/user/dataset-management.rst          | 2 +-
 .../java/edu/harvard/iq/dataverse/util/WebloaderUtil.java     | 2 +-
 src/main/java/propertyFiles/Bundle.properties                 | 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/doc/release-notes/9096-folder-upload.md b/doc/release-notes/9096-folder-upload.md
index 44c3f8973bc..0345cd6c334 100644
--- a/doc/release-notes/9096-folder-upload.md
+++ b/doc/release-notes/9096-folder-upload.md
@@ -1 +1 @@
-Dataverse can now support upload of an entire folder tree of files and retain the relative paths of files as directory path metadata for the uploaded files.
+Dataverse can now support upload of an entire folder tree of files and retain the relative paths of files as directory path metadata for the uploaded files, if the installation is configured with S3 direct upload.
diff --git a/doc/sphinx-guides/source/user/dataset-management.rst b/doc/sphinx-guides/source/user/dataset-management.rst
index 5e179115f64..8043e7ffbb7 100755
--- a/doc/sphinx-guides/source/user/dataset-management.rst
+++ b/doc/sphinx-guides/source/user/dataset-management.rst
@@ -98,7 +98,7 @@ Some Dataverse installations support the ability to upload files directly from D
 Folder Upload
 -------------
 
-Some Dataverse installations support the ability to upload some/all files from a local folder and subfolders. To do this, click the "Upload from Folder" button, select the folder you wish to upload, select/unselect specific files, and click 'Start Uploads'. More detailed instructions are available in the `DVWebloader wiki <https://github.com/gdcc/dvwebloader/wiki#use>`_.
+Some Dataverse installations support the ability to upload files from a local folder and subfolders. To do this, click the "Upload from Folder" button, select the folder you wish to upload, select/unselect specific files, and click "Start Uploads". More detailed instructions are available in the `DVWebloader wiki <https://github.com/gdcc/dvwebloader/wiki#use>`_.
 
 .. _rsync_upload:
 
diff --git a/src/main/java/edu/harvard/iq/dataverse/util/WebloaderUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/WebloaderUtil.java
index 266d55eceb3..c2d9bf67236 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/WebloaderUtil.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/WebloaderUtil.java
@@ -23,7 +23,7 @@ public class WebloaderUtil {
     private static final Logger logger = Logger.getLogger(WebloaderUtil.class.getCanonicalName());
 
     /**
-     * Create the URL required to launch https://github.com/gdcc/dvweloader
+     * Create the URL required to launch https://github.com/gdcc/dvwebloader
      */
     public static String getWebloaderUrl(Dataset d, ApiToken apiToken, String localeCode, String baseUrl) {
         // Use URLTokenUtil for params currently in common with external tools.
diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties
index fdc1ede4b8d..62531d32bb2 100644
--- a/src/main/java/propertyFiles/Bundle.properties
+++ b/src/main/java/propertyFiles/Bundle.properties
@@ -1673,8 +1673,8 @@ file.finishGlobus=Globus Transfer has finished
 file.downloadFromGlobus=Download through Globus
 file.globus.transfer=Globus Transfer
 file.globus.of=of:
-file.fromWebloader.tip=Upload a folder of files. This method retains the relative path structure on from your local machine. (Using it will cancel any other types of uploads in progress on this page.)
-file.fromWebloaderAfterCreate.tip=This option will be enabled after this dataset is created.
+file.fromWebloader.tip=Upload a folder of files. This method retains the relative path structure from your local machine. (Using it will cancel any other types of uploads in progress on this page.)
+file.fromWebloaderAfterCreate.tip=An option to upload a folder of files will be enabled after this dataset is created.
 file.fromWebloader=Upload a Folder
 
 file.api.httpDisabled=File upload via HTTP is not available for this installation of Dataverse.

From 78bc5a5704b9a2a0bac3c595c043b57b62067e59 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Wed, 21 Dec 2022 15:52:14 -0500
Subject: [PATCH 152/173] CORS must be allowed on the S3 bucket #9095

---
 .../source/developers/big-data-support.rst           | 12 +++++++++++-
 doc/sphinx-guides/source/installation/config.rst     |  2 +-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst
index 0782fd239a1..0a3dd23ed23 100644
--- a/doc/sphinx-guides/source/developers/big-data-support.rst
+++ b/doc/sphinx-guides/source/developers/big-data-support.rst
@@ -36,10 +36,20 @@ At present, one potential drawback for direct-upload is that files are only part
 
 ``./asadmin create-jvm-options "-Ddataverse.files.<id>.ingestsizelimit=<size in bytes>"``
 
+.. _cors-s3-bucket:
 
-**IMPORTANT:** One additional step that is required to enable direct uploads via a Dataverse installation and for direct download to work with previewers is to allow cross site (CORS) requests on your S3 store. 
+Allow CORS for S3 Buckets
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+**IMPORTANT:** One additional step that is required to enable direct uploads via a Dataverse installation and for direct download to work with previewers and direct upload to work with dvwebloader (:ref:`folder-upload`) is to allow cross site (CORS) requests on your S3 store.
 The example below shows how to enable CORS rules (to support upload and download) on a bucket using the AWS CLI command line tool. Note that you may want to limit the AllowedOrigins and/or AllowedHeaders further.  https://github.com/gdcc/dataverse-previewers/wiki/Using-Previewers-with-download-redirects-from-S3 has some additional information about doing this.
 
+If you'd like to check the CORS configuration on your bucket before making changes:
+
+``aws s3api get-bucket-cors --bucket <BUCKET_NAME>``
+
+To proceed with making changes:
+
 ``aws s3api put-bucket-cors --bucket <BUCKET_NAME> --cors-configuration file://cors.json``
 
 with the contents of the file cors.json as follows:
diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst
index b8451baafeb..4a0c7ec2f0b 100644
--- a/doc/sphinx-guides/source/installation/config.rst
+++ b/doc/sphinx-guides/source/installation/config.rst
@@ -2730,7 +2730,7 @@ The URL for your Repository Storage Abstraction Layer (RSAL) installation. This
 This setting controls which upload methods are available to users of your Dataverse installation. The following upload methods are available:
 
 - ``native/http``: Corresponds to "Upload with HTTP via your browser" and APIs that use HTTP (SWORD and native).
-- ``dvwebloader``: Corresponds to :ref:`folder-upload`. Note that ``dataverse.files.<id>.download-redirect`` must be set to "true" on an S3 store for this method to show up in the UI. In addition, :ref:`:WebloaderUrl` must be set.
+- ``dvwebloader``: Corresponds to :ref:`folder-upload`. Note that ``dataverse.files.<id>.upload-redirect`` must be set to "true" on an S3 store for this method to show up in the UI. In addition, :ref:`:WebloaderUrl` must be set. CORS allowed on the S3 bucket. See :ref:`cors-s3-bucket`.
 - ``dcm/rsync+ssh``: Corresponds to "Upload with rsync+ssh via Data Capture Module (DCM)". A lot of setup is required, as explained in the :doc:`/developers/big-data-support` section of the Developer Guide.
 
 Out of the box only ``native/http`` is enabled and will work without further configuration. To add multiple upload method, separate them using a comma like this:

From e4efe4ac3601b078ee2d08f157be30362ca2cc41 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Thu, 22 Dec 2022 11:56:40 -0500
Subject: [PATCH 153/173] typo

---
 src/main/java/edu/harvard/iq/dataverse/api/Access.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java
index 47ff1e94303..3634bf3b4ae 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java
@@ -1771,7 +1771,7 @@ private boolean isAccessAuthorized(DataFile df) {
             return true;
         }
         
-        //For permissions check decide if we havce a session user, or an API user
+        //For permissions check decide if we have a session user, or an API user
         User user = null;
         
         /** 

From 49ab1618e6457388ac1b493e158badddfea0ee02 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Thu, 22 Dec 2022 12:41:22 -0500
Subject: [PATCH 154/173] include comments from PR

---
 .../edu/harvard/iq/dataverse/api/Access.java  | 31 +++++++++++++++----
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java
index 3634bf3b4ae..3bd0a19672b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java
@@ -206,10 +206,11 @@ public BundleDownloadInstance datafileBundle(@PathParam("fileId") String fileId,
         
         if (gbrecs != true && df.isReleased()){
             // Write Guestbook record if not done previously and file is released
+            //This calls findUserOrDie which will retrieve the key param or api token header, or the workflow token header.
             User apiTokenUser = findAPITokenUser();
             gbr = guestbookResponseService.initAPIGuestbookResponse(df.getOwner(), df, session, apiTokenUser);
             guestbookResponseService.save(gbr);
-            MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, df);                                        
+            MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, df);
             mdcLogService.logEntry(entry);
         }
         
@@ -1779,7 +1780,16 @@ private boolean isAccessAuthorized(DataFile df) {
          */
         
         User apiTokenUser = null;
-        //If we get a non-GuestUser from findUserOrDie, use it. Otherwise, check the session
+
+        /*
+         * The logic looks for an apitoken authenticated user and uses it if it exists.
+         * If not, and a session user exists, we use that. If the apitoken method
+         * indicates a GuestUser, we will use that if there's no session.
+         * 
+         * This is currently the only API call that supports sessions. If the rest of
+         * the API is opened up, the custom logic here wouldn't be needed.
+         */
+
         try {
             logger.fine("calling apiTokenUser = findUserOrDie()...");
             apiTokenUser = findUserOrDie();
@@ -1813,9 +1823,12 @@ private boolean isAccessAuthorized(DataFile df) {
             return false;
         }
 
-        // OK, let's revisit the case of non-restricted files, this time in
-        // an unpublished version:
-        // (if (published) was already addressed above)
+        /*
+         * Since published and not restricted/embargoed is handled above, the main split
+         * now is whether it is published or not. If it's published, the only case left
+         * is with restricted/embargoed. With unpublished, both the restricted/embargoed
+         * and not restricted/embargoed both get handled the same way.
+         */
 
         DataverseRequest dvr = null;
         if (apiTokenUser != null) {
@@ -1828,6 +1841,7 @@ private boolean isAccessAuthorized(DataFile df) {
             // If the file is not published, they can still download the file, if the user
             // has the permission to view unpublished versions:
 
+            // This line handles all three authenticated session user, token user, and guest cases.
             if (permissionService.requestOn(dvr, df.getOwner()).has(Permission.ViewUnpublishedDataset)) {
                 // it's not unthinkable, that a GuestUser could be given
                 // the ViewUnpublished permission!
@@ -1837,7 +1851,7 @@ private boolean isAccessAuthorized(DataFile df) {
                 return true;
             }
         } else { // published and restricted and/or embargoed
-
+            // This line also handles all three authenticated session user, token user, and guest cases.
             if (permissionService.requestOn(dvr, df).has(Permission.DownloadFile)) {
                 return true;
             }
@@ -1859,6 +1873,11 @@ private User findAPITokenUser() {
         try {
             logger.fine("calling apiTokenUser = findUserOrDie()...");
             apiTokenUser = findUserOrDie();
+            /*
+             * The idea here is to not let a guest user returned from findUserOrDie (which
+             * happens when there is no key/token, and which we want if there's no session)
+             * from overriding an authenticated session user.
+             */
             if(apiTokenUser instanceof GuestUser) {
                 if(session!=null && session.getUser()!=null) {
                 //The apiTokenUser, if set, will override the sessionUser in permissions calcs, so set it to null if we have a session user

From 24a0b3e4b437c7896e35ad9a94683d578809d32d Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 22 Dec 2022 14:11:06 -0500
Subject: [PATCH 155/173] add NcML previewer to guides (merged upstream) #9153

Merged: https://github.com/gdcc/dataverse-previewers/pull/18
---
 .../source/_static/admin/dataverse-external-tools.tsv       | 2 +-
 doc/sphinx-guides/source/user/dataset-management.rst        | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv
index fd1f0f27bc5..16623a6aeec 100644
--- a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv
+++ b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv
@@ -1,5 +1,5 @@
 Tool	Type	Scope	Description
 Data Explorer	explore	file	A GUI which lists the variables in a tabular data file allowing searching, charting and cross tabulation analysis. See the README.md file at https://github.com/scholarsportal/dataverse-data-explorer-v2 for the instructions on adding Data Explorer to your Dataverse. 
 Whole Tale	explore	dataset	A platform for the creation of reproducible research packages that allows users to launch containerized interactive analysis environments based on popular tools such as Jupyter and RStudio. Using this integration, Dataverse users can launch Jupyter and RStudio environments to analyze published datasets. For more information, see the `Whole Tale User Guide <https://wholetale.readthedocs.io/en/stable/users_guide/integration.html>`_.
-File Previewers	explore	file	A set of tools that display the content of files - including audio, html, `Hypothes.is <https://hypothes.is/>`_ annotations, images, PDF, text, video, tabular data, spreadsheets, GeoJSON, and ZipFiles - allowing them to be viewed without downloading the file. The previewers can be run directly from github.io, so the only required step is using the Dataverse API to register the ones you want to use. Documentation, including how to optionally brand the previewers, and an invitation to contribute through github are in the README.md file. Initial development was led by the Qualitative Data Repository and the spreasdheet previewer was added by the Social Sciences and Humanities Open Cloud (SSHOC) project. https://github.com/gdcc/dataverse-previewers
+File Previewers	explore	file	A set of tools that display the content of files - including audio, html, `Hypothes.is <https://hypothes.is/>`_ annotations, images, PDF, text, video, tabular data, spreadsheets, GeoJSON, zip, and NcML files - allowing them to be viewed without downloading the file. The previewers can be run directly from github.io, so the only required step is using the Dataverse API to register the ones you want to use. Documentation, including how to optionally brand the previewers, and an invitation to contribute through github are in the README.md file. Initial development was led by the Qualitative Data Repository and the spreasdheet previewer was added by the Social Sciences and Humanities Open Cloud (SSHOC) project. https://github.com/gdcc/dataverse-previewers
 Data Curation Tool	configure	file	A GUI for curating data by adding labels, groups, weights and other details to assist with informed reuse. See the README.md file at https://github.com/scholarsportal/Dataverse-Data-Curation-Tool for the installation instructions.
diff --git a/doc/sphinx-guides/source/user/dataset-management.rst b/doc/sphinx-guides/source/user/dataset-management.rst
index e891ca72880..0c9c7c9e3c7 100755
--- a/doc/sphinx-guides/source/user/dataset-management.rst
+++ b/doc/sphinx-guides/source/user/dataset-management.rst
@@ -177,11 +177,15 @@ File Handling
 
 Certain file types in the Dataverse installation are supported by additional functionality, which can include downloading in different formats, previews, file-level metadata preservation, file-level data citation; and exploration through data visualization and analysis. See the sections below for information about special functionality for specific file types.
 
+.. _file-previews:
+
 File Previews
 -------------
 
 Dataverse installations can add previewers for common file types uploaded by their research communities. The previews appear on the file page. If a preview tool for a specific file type is available, the preview will be created and will display automatically, after terms have been agreed to or a guestbook entry has been made, if necessary. File previews are not available for restricted files unless they are being accessed using a Private URL. See also :ref:`privateurl`.
 
+Installation of previewers is explained in the :doc:`/admin/external-tools` section of in the Admin Guide.
+
 Tabular Data Files
 ------------------
 
@@ -302,7 +306,7 @@ Metadata found in the header section of `Flexible Image Transport System (FITS)
 NetCDF and HDF5
 ---------------
 
-For NetCDF and HDF5 files, an attempt will be made to extract metadata in NcML_ (XML) format and save it as an auxiliary file. (See also :doc:`/developers/aux-file-support` in the Developer Guide.)
+For NetCDF and HDF5 files, an attempt will be made to extract metadata in NcML_ (XML) format and save it as an auxiliary file. (See also :doc:`/developers/aux-file-support` in the Developer Guide.) A previewer for these NcML files is available (see :ref:`file-previews`).
 
 .. _NcML: https://docs.unidata.ucar.edu/netcdf-java/current/userguide/ncml_overview.html
 

From edf7919980d051ddad254920f1c5e8ecf5e1a73b Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Thu, 5 Jan 2023 15:03:09 -0500
Subject: [PATCH 156/173] doc/example updates per QA

---
 .../source/developers/s3-direct-upload-api.rst         | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst b/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst
index b29b3421900..de4f38a1e9b 100644
--- a/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst
+++ b/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst
@@ -172,9 +172,9 @@ Note that the API call does not validate that the file matches the hash value su
   export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
   export SERVER_URL=https://demo.dataverse.org
   export FILE_IDENTIFIER=5072
-  export JSON_DATA="{'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'forceReplace':'true', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42', 'fileName':'file1.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123456'}}"
+  export JSON_DATA='{"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "forceReplace":"true", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123456"}}'
 
-  curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/files/$FILE_IDENTIFIER/replace" -F "jsonData=$JSON_DATA"
+  curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/files/$FILE_IDENTIFIER/replace" -F 'jsonData=$JSON_DATA'
   
 Note that this API call can be used independently of the others, e.g. supporting use cases in which the file already exists in S3/has been uploaded via some out-of-band method. 
 With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifer must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above.
@@ -205,10 +205,10 @@ The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.Data
   export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
   export SERVER_URL=https://demo.dataverse.org
   export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV
-  export JSON_DATA="[{'fileToReplaceId': 10, 'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42', 'fileName':'file1.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123456'}}, \
-                      {'fileToReplaceId': 10, 'forceReplace': true, 'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53', 'fileName':'file2.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123789'}}]"
+  export JSON_DATA='[{"fileToReplaceId": 10, "description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123456"}},{"fileToReplaceId": 11, "forceReplace": true, "description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123789"}}]'
 
-  curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/replaceFiles?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA"
+  curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/replaceFiles?persistentId=$PERSISTENT_IDENTIFIER" -F 'jsonData=$JSON_DATA'
 
+The JSON object returned as a response from this API call includes a "data" element that includes a "Result" key identifying whether all replacements succeed or not, e.g. "Result":{"Total number of files":2,"Number of files successfully replaced":2} A "Files" array provides details about the success or error occuring with each specific file.
 Note that this API call can be used independently of the others, e.g. supporting use cases in which the files already exists in S3/has been uploaded via some out-of-band method.
 With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifer must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above.

From be63c2dc9a0bec5b3fdb036a0986bb9ec1b91195 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Thu, 5 Jan 2023 15:07:09 -0500
Subject: [PATCH 157/173] Fix for unrelated bug found in QA

In the UI doing a file replace and, in the case where the mimetype does
not match the original, selecting delete could result in a broken upload
tab. This only occured with direct upload enabled and was due to the
delete call not reinitializing the upload tab for direct upload.
---
 src/main/webapp/editFilesFragment.xhtml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/main/webapp/editFilesFragment.xhtml b/src/main/webapp/editFilesFragment.xhtml
index 0fd5bf48fb7..a4b676ac67a 100644
--- a/src/main/webapp/editFilesFragment.xhtml
+++ b/src/main/webapp/editFilesFragment.xhtml
@@ -578,7 +578,8 @@
     <p:dialog id="fileTypeDifferentPopup" styleClass="smallPopUp" header="#{bundle['file.addreplace.content_type.header']}" widgetVar="fileTypeDifferentPopup" modal="true">
         <p class="text-warning"><span class="glyphicon glyphicon-warning-sign"/> #{EditDatafilesPage.warningMessageForFileTypeDifferentPopUp}</p>
         <div class="button-block">
-            <p:commandButton styleClass="btn btn-default" value="#{bundle['file.delete']}" onclick="PF('fileTypeDifferentPopup').hide()" oncomplete="uploadWidgetDropMsg();"
+            <p:commandButton styleClass="btn btn-default" value="#{bundle['file.delete']}" onclick="PF('fileTypeDifferentPopup').hide()" oncomplete="uploadWidgetDropMsg();
+                            setupDirectUpload(#{systemConfig.directUploadEnabled(EditDatafilesPage.dataset)});"
                              action="#{EditDatafilesPage.deleteFiles()}"
                              update=":#{p:resolveClientId('datasetForm:filesTable', view)},:messagePanel,:#{p:resolveClientId('datasetForm:fileUpload', view)},uploadMessage"/>
             <button class="btn btn-default" onclick="PF('fileTypeDifferentPopup').hide();" type="button">

From e73fd93bb36182144a85a2e6ce5aeb9884af6e2b Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Thu, 5 Jan 2023 15:10:56 -0500
Subject: [PATCH 158/173] Fix bug found in QA

The replaceFiles call keeps one dataset/draft version open for all file
replacements and then calls the update version command. At some point
prior to this commit changes started causing the dataset being used to
be replaced by a fresh copy from the database for each file, losing
changes for all but the last file.
---
 .../edu/harvard/iq/dataverse/api/Files.java   | 38 ++++++++--------
 .../datasetutility/AddReplaceFileHelper.java  | 43 +++++++++++--------
 2 files changed, 43 insertions(+), 38 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java
index b0a0787e1f1..2e5cef06f27 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java
@@ -267,31 +267,27 @@ public Response replaceFileInDataset(
         try {
             DataFile dataFile = findDataFileOrDie(fileIdOrPersistentId);
             fileToReplaceId = dataFile.getId();
-            
-            if (dataFile.isFilePackage()) {                           
-                return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile"));
+
+            if (dataFile.isFilePackage()) {
+                return error(Response.Status.SERVICE_UNAVAILABLE,
+                        BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile"));
+            }
+
+            if (forceReplace) {
+                addFileHelper.runForceReplaceFile(fileToReplaceId, newFilename, newFileContentType,
+                        newStorageIdentifier, testFileInputStream, dataFile.getOwner(), optionalFileParams);
+            } else {
+                addFileHelper.runReplaceFile(fileToReplaceId, newFilename, newFileContentType, newStorageIdentifier,
+                        testFileInputStream, dataFile.getOwner(), optionalFileParams);
             }
         } catch (WrappedResponse ex) {
-            String error = BundleUtil.getStringFromBundle("file.addreplace.error.existing_file_to_replace_not_found_by_id", Arrays.asList(fileIdOrPersistentId));
-            // TODO: Some day, return ex.getResponse() instead. Also run FilesIT and updated expected status code and message.
+            String error = BundleUtil.getStringFromBundle(
+                    "file.addreplace.error.existing_file_to_replace_not_found_by_id",
+                    Arrays.asList(fileIdOrPersistentId));
+            // TODO: Some day, return ex.getResponse() instead. Also run FilesIT and updated
+            // expected status code and message.
             return error(BAD_REQUEST, error);
         }
-        if (forceReplace){
-            addFileHelper.runForceReplaceFile(fileToReplaceId,
-                                    newFilename,
-                                    newFileContentType,
-                                    newStorageIdentifier,
-                                    testFileInputStream,
-                                    optionalFileParams);
-        }else{
-            addFileHelper.runReplaceFile(fileToReplaceId,
-                                    newFilename,
-                                    newFileContentType,
-                                    newStorageIdentifier,
-                                    testFileInputStream,
-                                    optionalFileParams);
-        }    
-            
         msg("we're back.....");
         if (addFileHelper.hasError()){
             msg("yes, has error");          
diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
index df27ce26f95..22b5bc2194a 100644
--- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
+++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
@@ -380,9 +380,9 @@ public boolean runAddFile(Dataset dataset,
     
 
     public boolean runForceReplaceFile(long fileToReplaceId, String newFilename, String newFileContentType,
-        String newStorageIdentifier, InputStream newFileInputStream, OptionalFileParams optionalFileParams) {
+        String newStorageIdentifier, InputStream newFileInputStream, Dataset ds, OptionalFileParams optionalFileParams) {
         return runForceReplaceFile(fileToReplaceId, newFilename, newFileContentType,
-                newStorageIdentifier, newFileInputStream, optionalFileParams, false);
+                newStorageIdentifier, newFileInputStream, ds, optionalFileParams, false);
     }
     /**
      * After the constructor, this method is called to replace a file
@@ -399,6 +399,7 @@ private boolean runForceReplaceFile(Long oldFileId,
                         String newFileContentType, 
                         String newStorageIdentifier,
                         InputStream newFileInputStream,
+                        Dataset ds,
                         OptionalFileParams optionalFileParams,
                         boolean multipleFiles){
         
@@ -419,16 +420,19 @@ private boolean runForceReplaceFile(Long oldFileId,
         if (!this.step_005_loadFileToReplaceById(oldFileId)){
             return false;
         }
-
-        
-        return this.runAddReplaceFile(fileToReplace.getOwner(), newFileName, newFileContentType, newStorageIdentifier, newFileInputStream, optionalFileParams);
+        if(ds.getId()!=fileToReplace.getOwner().getId()) {
+            this.addErrorSevere(getBundleErr("existing_file_to_replace_not_in_dataset"));
+            return false;
+        }
+        // ds may include changes not yet in the copy created when loading the file from the db, as in replaceFiles()
+        return this.runAddReplaceFile(ds, newFileName, newFileContentType, newStorageIdentifier, newFileInputStream, optionalFileParams);
     }
     
 
     public boolean runReplaceFile(long fileToReplaceId, String newFilename, String newFileContentType,
-            String newStorageIdentifier, InputStream newFileInputStream, OptionalFileParams optionalFileParams) {
+            String newStorageIdentifier, InputStream newFileInputStream, Dataset ds, OptionalFileParams optionalFileParams) {
         return runReplaceFile(fileToReplaceId, newFilename, newFileContentType,
-                newStorageIdentifier, newFileInputStream, optionalFileParams, false);
+                newStorageIdentifier, newFileInputStream, ds, optionalFileParams, false);
         
     }
     
@@ -437,6 +441,7 @@ private boolean runReplaceFile(Long oldFileId,
                             String newFileContentType, 
                             String newStorageIdentifier, 
                             InputStream newFileInputStream,
+                            Dataset ds,
                             OptionalFileParams optionalFileParams,
                             boolean multipleFiles){
     
@@ -457,7 +462,12 @@ private boolean runReplaceFile(Long oldFileId,
         if (!this.step_005_loadFileToReplaceById(oldFileId)){
             return false;
         }
-        return this.runAddReplaceFile(fileToReplace.getOwner(), newFileName, newFileContentType, newStorageIdentifier, newFileInputStream, optionalFileParams);
+        if(ds.getId()!=fileToReplace.getOwner().getId()) {
+            this.addErrorSevere(getBundleErr("existing_file_to_replace_not_in_dataset"));
+            return false;
+        }
+        // ds may include changes not yet in the copy created when loading the file from the db, as in replaceFiles()
+        return this.runAddReplaceFile(ds, newFileName, newFileContentType, newStorageIdentifier, newFileInputStream, optionalFileParams);
     }
     
     
@@ -1551,7 +1561,7 @@ private boolean step_055_loadOptionalFileParams(OptionalFileParams optionalFileP
                 }
                 
             } catch (DataFileTagException ex) {
-                Logger.getLogger(AddReplaceFileHelper.class.getName()).log(Level.SEVERE, null, ex);
+                logger.log(Level.SEVERE, null, ex);
                 addError(ex.getMessage());
                 return false;
             } catch (CommandException ex) {
@@ -1771,7 +1781,7 @@ private boolean step_080_run_update_dataset_command_for_replace(){
 
             }
         }
-        // Call the update dataset command which will delete the replaced filemetadata and file in needed (if file is not released)
+        // Call the update dataset command which will delete the replaced filemetadata and file if needed (if file is not released)
         //
         return step_070_run_update_dataset_command();
         
@@ -2104,7 +2114,7 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) {
                         }
 
                     } catch (DataFileTagException ex) {
-                        Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, ex);
+                        logger.log(Level.SEVERE, null, ex);
                         JsonObjectBuilder fileoutput = Json.createObjectBuilder()
                                 .add("errorCode", Response.Status.BAD_REQUEST.getStatusCode())
                                 .add("message", ex.getMessage())
@@ -2113,7 +2123,7 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) {
 
                     }
                     catch (NoFilesException ex) {
-                        Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, ex);
+                        logger.log(Level.SEVERE, null, ex);
                         JsonObjectBuilder fileoutput = Json.createObjectBuilder()
                                 .add("errorCode", Response.Status.BAD_REQUEST.getStatusCode())
                                 .add("message", BundleUtil.getStringFromBundle("NoFileException!  Serious Error! See administrator!"))
@@ -2233,10 +2243,10 @@ public Response replaceFiles(String jsonData, Dataset ds, User authUser) {
                             msgt("REPLACE!  = " + newFilename);
                             if (forceReplace) {
                                 runForceReplaceFile(fileToReplaceId, newFilename, newFileContentType,
-                                        newStorageIdentifier, null, optionalFileParams, true);
+                                        newStorageIdentifier, null, dataset, optionalFileParams, true);
                             } else {
                                 runReplaceFile(fileToReplaceId, newFilename, newFileContentType, newStorageIdentifier,
-                                        null, optionalFileParams, true);
+                                        null, dataset, optionalFileParams, true);
                             }
                             if (hasError()) {
                                 JsonObjectBuilder fileoutput = Json.createObjectBuilder()
@@ -2272,7 +2282,7 @@ public Response replaceFiles(String jsonData, Dataset ds, User authUser) {
                         }
 
                     } catch (DataFileTagException ex) {
-                        Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, ex);
+                        logger.log(Level.SEVERE, null, ex);
                         JsonObjectBuilder fileoutput = Json.createObjectBuilder()
                                 .add("errorCode", Response.Status.BAD_REQUEST.getStatusCode())
                                 .add("message", ex.getMessage())
@@ -2281,14 +2291,13 @@ public Response replaceFiles(String jsonData, Dataset ds, User authUser) {
 
                     }
                     catch (NoFilesException ex) {
-                        Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, ex);
+                        logger.log(Level.SEVERE, null, ex);
                         JsonObjectBuilder fileoutput = Json.createObjectBuilder()
                                 .add("errorCode", Response.Status.BAD_REQUEST.getStatusCode())
                                 .add("message", BundleUtil.getStringFromBundle("NoFileException!  Serious Error! See administrator!"))
                                 .add("fileDetails", fileJson);
                         jarr.add(fileoutput);
                     }
-
                 }// End of adding files
 
                 DatasetLock eipLock = dataset.getLockFor(DatasetLock.Reason.EditInProgress);

From 0da9f8c2564a5448aa106e5015f3048b3a4e1c78 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Thu, 5 Jan 2023 15:25:05 -0500
Subject: [PATCH 159/173] add example response to docs

---
 .../developers/s3-direct-upload-api.rst       | 67 ++++++++++++++++++-
 1 file changed, 66 insertions(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst b/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst
index de4f38a1e9b..59d862ca654 100644
--- a/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst
+++ b/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst
@@ -209,6 +209,71 @@ The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.Data
 
   curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/replaceFiles?persistentId=$PERSISTENT_IDENTIFIER" -F 'jsonData=$JSON_DATA'
 
-The JSON object returned as a response from this API call includes a "data" element that includes a "Result" key identifying whether all replacements succeed or not, e.g. "Result":{"Total number of files":2,"Number of files successfully replaced":2} A "Files" array provides details about the success or error occuring with each specific file.
+The JSON object returned as a response from this API call includes a "data" that indicates how many of the file replacements succeeded and provides per-file error messages for those that don't, e.g.
+
+.. code-block::
+
+  {
+    "status": "OK",
+    "data": {
+      "Files": [
+,
+        {
+          "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42",
+          "errorMessage": "Bad Request:The file to replace does not belong to this dataset.",
+          "fileDetails": {
+            "fileToReplaceId": 10,
+            "description": "My description.",
+            "directoryLabel": "data/subdir1",
+            "categories": [
+              "Data"
+            ],
+            "restrict": "false",
+            "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42",
+            "fileName": "file1.Bin",
+            "mimeType": "application/octet-stream",
+            "checksum": {
+              "@type": "SHA-1",
+              "@value": "123456"
+            }
+          }
+        },
+        {
+          "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53",
+          "successMessage": "Replaced successfully in the dataset",
+          "fileDetails": {
+            "description": "My description.",
+            "label": "file2.txt",
+            "restricted": false,
+            "directoryLabel": "data/subdir1",
+            "categories": [
+              "Data"
+            ],
+            "dataFile": {
+              "persistentId": "",
+              "pidURL": "",
+              "filename": "file2.txt",
+              "contentType": "text/plain",
+              "filesize": 2407,
+              "description": "My description.",
+              "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53",
+              "rootDataFileId": 11,
+              "previousDataFileId": 11,
+              "checksum": {
+                "type": "SHA-1",
+                "value": "123789"
+              }
+            }
+          }
+        }
+      ],
+      "Result": {
+        "Total number of files": 2,
+        "Number of files successfully replaced": 1
+      }
+    }
+  }
+
+
 Note that this API call can be used independently of the others, e.g. supporting use cases in which the files already exists in S3/has been uploaded via some out-of-band method.
 With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifer must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above.

From d7fdb1be463963004b261cad42b43b8cc052da80 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Thu, 5 Jan 2023 15:27:13 -0500
Subject: [PATCH 160/173] typo

---
 doc/sphinx-guides/source/developers/s3-direct-upload-api.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst b/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst
index 59d862ca654..61f4a862ce6 100644
--- a/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst
+++ b/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst
@@ -217,7 +217,6 @@ The JSON object returned as a response from this API call includes a "data" that
     "status": "OK",
     "data": {
       "Files": [
-,
         {
           "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42",
           "errorMessage": "Bad Request:The file to replace does not belong to this dataset.",

From e2066c854c534193b9fa9651a6a02bae82857e07 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 5 Jan 2023 16:09:55 -0500
Subject: [PATCH 161/173] add extractNcml API endpoint #9153

---
 doc/release-notes/9153-extract-metadata.md    |   2 +
 doc/sphinx-guides/source/api/native-api.rst   |  41 ++++++
 .../source/user/dataset-management.rst        |   2 +
 .../edu/harvard/iq/dataverse/api/Files.java   |  21 +++
 .../dataverse/ingest/IngestServiceBean.java   | 139 +++++++++++++-----
 .../harvard/iq/dataverse/api/NetcdfIT.java    | 125 ++++++++++++++++
 .../edu/harvard/iq/dataverse/api/UtilIT.java  |  18 ++-
 7 files changed, 304 insertions(+), 44 deletions(-)

diff --git a/doc/release-notes/9153-extract-metadata.md b/doc/release-notes/9153-extract-metadata.md
index ce4cc714805..be21c5ed739 100644
--- a/doc/release-notes/9153-extract-metadata.md
+++ b/doc/release-notes/9153-extract-metadata.md
@@ -1 +1,3 @@
 For NetCDF and HDF5 files, an attempt will be made to extract metadata in NcML (XML) format and save it as an auxiliary file.
+
+An "extractNcml" API endpoint has been added, especially for installations with existing NetCDF and HDF5 files. After upgrading, they can iterate through these files and try to extract an NcML file.
diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst
index 76ca38fdc70..40011a7d175 100644
--- a/doc/sphinx-guides/source/api/native-api.rst
+++ b/doc/sphinx-guides/source/api/native-api.rst
@@ -2248,6 +2248,47 @@ Currently the following methods are used to detect file types:
 - The file extension (e.g. ".ipybn") is used, defined in a file called ``MimeTypeDetectionByFileExtension.properties``.
 - The file name (e.g. "Dockerfile") is used, defined in a file called ``MimeTypeDetectionByFileName.properties``.
 
+.. _extractNcml:
+
+Extract NcML
+~~~~~~~~~~~~
+
+As explained in the :ref:`netcdf-and-hdf5` section of the User Guide, when those file types are uploaded, an attempt is made to extract an NcML file from them and store it as an auxiliary file.
+
+This happens automatically but superusers can also manually trigger this NcML extraction process with the API endpoint below.
+
+Note that "true" will be returned if an NcML file was created. "false" will be returned if there was an error or if the NcML file already exists (check server.log for details).
+
+.. code-block:: bash
+
+  export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+  export SERVER_URL=https://demo.dataverse.org
+  export ID=24
+
+  curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/files/$ID/extractNcml"
+
+The fully expanded example above (without environment variables) looks like this:
+
+.. code-block:: bash
+
+  curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/files/24/extractNcml
+
+A curl example using a PID:
+
+.. code-block:: bash
+
+  export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+  export SERVER_URL=https://demo.dataverse.org
+  export PERSISTENT_ID=doi:10.5072/FK2/AAA000
+
+  curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/files/:persistentId/extractNcml?persistentId=$PERSISTENT_ID"
+
+The fully expanded example above (without environment variables) looks like this:
+
+.. code-block:: bash
+
+  curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/files/:persistentId/extractNcml?persistentId=doi:10.5072/FK2/AAA000"
+
 Replacing Files
 ~~~~~~~~~~~~~~~
 
diff --git a/doc/sphinx-guides/source/user/dataset-management.rst b/doc/sphinx-guides/source/user/dataset-management.rst
index 0c9c7c9e3c7..1da31707749 100755
--- a/doc/sphinx-guides/source/user/dataset-management.rst
+++ b/doc/sphinx-guides/source/user/dataset-management.rst
@@ -303,6 +303,8 @@ Astronomy (FITS)
 
 Metadata found in the header section of `Flexible Image Transport System (FITS) files <http://fits.gsfc.nasa.gov/fits_primer.html>`_ are automatically extracted by the Dataverse Software, aggregated and displayed in the Astronomy Domain-Specific Metadata of the Dataset that the file belongs to. This FITS file metadata, is therefore searchable and browsable (facets) at the Dataset-level.
 
+.. _netcdf-and-hdf5:
+
 NetCDF and HDF5
 ---------------
 
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java
index af0f6be6d32..6cdbcf82c1b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java
@@ -625,6 +625,27 @@ public Response redetectDatafile(@PathParam("id") String id, @QueryParam("dryRun
         }
     }
 
+    @Path("{id}/extractNcml")
+    @POST
+    public Response extractNcml(@PathParam("id") String id) {
+        try {
+            AuthenticatedUser au = findAuthenticatedUserOrDie();
+            if (!au.isSuperuser()) {
+                // We can always make a command in the future if there's a need
+                // for non-superusers to call this API.
+                return error(Response.Status.FORBIDDEN, "This API call can be used by superusers only");
+            }
+            DataFile dataFileIn = findDataFileOrDie(id);
+            java.nio.file.Path tempLocationPath = null;
+            boolean successOrFail = ingestService.extractMetadataNcml(dataFileIn, tempLocationPath);
+            NullSafeJsonBuilder result = NullSafeJsonBuilder.jsonObjectBuilder()
+                    .add("result", successOrFail);
+            return ok(result);
+        } catch (WrappedResponse wr) {
+            return wr.getResponse();
+        }
+    }
+
     /**
      * Attempting to run metadata export, for all the formats for which we have
      * metadata Exporters.
diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
index b5934c1167f..f3fc56a54aa 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
@@ -242,43 +242,7 @@ public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
 						logger.fine("Success: permanently saved file " + dataFile.getFileMetadata().getLabel());
 
                                             // TODO: reformat this file to remove the many tabs added in cc08330
-                                            InputStream inputStream = null;
-                                            if (tempLocationPath != null) {
-                                                try ( NetcdfFile netcdfFile = NetcdfFiles.open(tempLocationPath.toString())) {
-                                                    if (netcdfFile != null) {
-                                                        // For now, empty string. What should we pass as a URL to toNcml()? The filename (including the path) most commonly at https://docs.unidata.ucar.edu/netcdf-java/current/userguide/ncml_cookbook.html
-                                                        // With an empty string the XML will show 'location="file:"'.
-                                                        String ncml = netcdfFile.toNcml("");
-                                                        inputStream = new ByteArrayInputStream(ncml.getBytes(StandardCharsets.UTF_8));
-                                                    } else {
-                                                        logger.info("NetcdfFiles.open() could open file id " + dataFile.getId() + " (null returned).");
-                                                    }
-                                                } catch (IOException ex) {
-                                                    logger.info("NetcdfFiles.open() could open file id " + dataFile.getId() + ". Exception caught: " + ex);
-                                                }
-                                            } else {
-                                                logger.info("tempLocationPath is null for file id " + dataFile.getId() + ". Can't extract NcML.");
-                                            }
-                                            if (inputStream != null) {
-                                                // If you change NcML, you must also change the previewer.
-                                                String formatTag = "NcML";
-                                                // 0.1 is arbitrary. It's our first attempt to put out NcML so we're giving it a low number.
-                                                // If you bump the number here, be sure the bump the number in the previewer as well.
-                                                // We could use 2.2 here since that's the current version of NcML.
-                                                String formatVersion = "0.1";
-                                                String origin = "netcdf-java";
-                                                boolean isPublic = true;
-                                                // See also file.auxfiles.types.NcML in Bundle.properties. Used to group aux files in UI.
-                                                String type = "NcML";
-                                                // XML because NcML doesn't have its own MIME/content type at https://www.iana.org/assignments/media-types/media-types.xhtml
-                                                MediaType mediaType = new MediaType("text", "xml");
-                                                try {
-                                                    AuxiliaryFile auxFile = auxiliaryFileService.processAuxiliaryFile(inputStream, dataFile, formatTag, formatVersion, origin, isPublic, type, mediaType, false);
-                                                    logger.fine ("Aux file extracted from NetCDF/HDF5 file saved to storage (but not to the database yet) from file id  " + dataFile.getId());
-                                                } catch (Exception ex) {
-                                                    logger.info("exception throw calling processAuxiliaryFile: " + ex);
-                                                }
-                                            }
+                                            extractMetadataNcml(dataFile, tempLocationPath);
 
 					} catch (IOException ioex) {
                     logger.warning("Failed to save the file, storage id " + dataFile.getStorageIdentifier() + " (" + ioex.getMessage() + ")");
@@ -392,7 +356,7 @@ public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
 						try {
 							// FITS is the only type supported for metadata
 							// extraction, as of now. -- L.A. 4.0
-                                                        // Consider adding other formats such as NetCDF/HDF5.
+                                                        // Note that extractMetadataNcml() is used for NetCDF/HDF5.
 							dataFile.setContentType("application/fits");
 							metadataExtracted = extractMetadata(tempFileLocation, dataFile, version);
 						} catch (IOException mex) {
@@ -1255,7 +1219,104 @@ public boolean extractMetadata(String tempFileLocation, DataFile dataFile, Datas
         return ingestSuccessful;
     }
 
-    
+    /**
+     * @param dataFile The DataFile from which to attempt NcML extraction
+     * (NetCDF or HDF5 format)
+     * @param tempLocationPath Null if the file is already saved to permanent
+     * storage. Otherwise, the path to the temp location of the files, as during
+     * initial upload.
+     * @return True if the Ncml files was created. False on any error or if the
+     * NcML file already exists.
+     */
+    public boolean extractMetadataNcml(DataFile dataFile, Path tempLocationPath) {
+        boolean ncmlFileCreated = false;
+        logger.fine("extractMetadataNcml: dataFileIn: " + dataFile + ". tempLocationPath: " + tempLocationPath);
+        InputStream inputStream = null;
+        String dataFileLocation = null;
+        if (tempLocationPath != null) {
+            // This file was just uploaded and hasn't been saved to S3 or local storage.
+            dataFileLocation = tempLocationPath.toString();
+        } else {
+            // This file is already on S3 or local storage.
+            File tempFile = null;
+            File localFile;
+            StorageIO<DataFile> storageIO;
+            try {
+                storageIO = dataFile.getStorageIO();
+                storageIO.open();
+                if (storageIO.isLocalFile()) {
+                    localFile = storageIO.getFileSystemPath().toFile();
+                    dataFileLocation = localFile.getAbsolutePath();
+                    logger.fine("extractMetadataNcml: file is local. Path: " + dataFileLocation);
+                } else {
+                    // Need to create a temporary local file:
+                    tempFile = File.createTempFile("tempFileExtractMetadataNcml", ".tmp");
+                    try ( ReadableByteChannel targetFileChannel = (ReadableByteChannel) storageIO.getReadChannel();  FileChannel tempFileChannel = new FileOutputStream(tempFile).getChannel();) {
+                        tempFileChannel.transferFrom(targetFileChannel, 0, storageIO.getSize());
+                    }
+                    dataFileLocation = tempFile.getAbsolutePath();
+                    logger.fine("extractMetadataNcml: file is on S3. Downloaded and saved to temp path: " + dataFileLocation);
+                }
+            } catch (IOException ex) {
+                logger.info("While attempting to extract NcML, could not use storageIO for data file id " + dataFile.getId() + ". Exception: " + ex);
+            }
+        }
+        if (dataFileLocation != null) {
+            try ( NetcdfFile netcdfFile = NetcdfFiles.open(dataFileLocation)) {
+                logger.fine("trying to open " + dataFileLocation);
+                if (netcdfFile != null) {
+                    // For now, empty string. What should we pass as a URL to toNcml()? The filename (including the path) most commonly at https://docs.unidata.ucar.edu/netcdf-java/current/userguide/ncml_cookbook.html
+                    // With an empty string the XML will show 'location="file:"'.
+                    String ncml = netcdfFile.toNcml("");
+                    inputStream = new ByteArrayInputStream(ncml.getBytes(StandardCharsets.UTF_8));
+                } else {
+                    logger.info("NetcdfFiles.open() could not open file id " + dataFile.getId() + " (null returned).");
+                }
+            } catch (IOException ex) {
+                logger.info("NetcdfFiles.open() could not open file id " + dataFile.getId() + ". Exception caught: " + ex);
+            }
+        } else {
+            logger.info("dataFileLocation is null for file id " + dataFile.getId() + ". Can't extract NcML.");
+        }
+        if (inputStream != null) {
+            // If you change NcML, you must also change the previewer.
+            String formatTag = "NcML";
+            // 0.1 is arbitrary. It's our first attempt to put out NcML so we're giving it a low number.
+            // If you bump the number here, be sure the bump the number in the previewer as well.
+            // We could use 2.2 here since that's the current version of NcML.
+            String formatVersion = "0.1";
+            String origin = "netcdf-java";
+            boolean isPublic = true;
+            // See also file.auxfiles.types.NcML in Bundle.properties. Used to group aux files in UI.
+            String type = "NcML";
+            // XML because NcML doesn't have its own MIME/content type at https://www.iana.org/assignments/media-types/media-types.xhtml
+            MediaType mediaType = new MediaType("text", "xml");
+            try {
+                // Let the cascade do the save if the file isn't yet on permanent storage.
+                boolean callSave = false;
+                if (tempLocationPath == null) {
+                    callSave = true;
+                    // Check for an existing NcML file
+                    logger.fine("Checking for existing NcML aux file for file id  " + dataFile.getId());
+                    AuxiliaryFile existingAuxiliaryFile = auxiliaryFileService.lookupAuxiliaryFile(dataFile, formatTag, formatVersion);
+                    if (existingAuxiliaryFile != null) {
+                        logger.fine("Aux file already exists for NetCDF/HDF5 file for file id  " + dataFile.getId());
+                        return false;
+                    }
+                }
+                AuxiliaryFile auxFile = auxiliaryFileService.processAuxiliaryFile(inputStream, dataFile, formatTag, formatVersion, origin, isPublic, type, mediaType, callSave);
+                logger.fine("Aux file extracted from NetCDF/HDF5 file saved to storage (but not to the database yet) from file id  " + dataFile.getId());
+                ncmlFileCreated = true;
+            } catch (Exception ex) {
+                logger.info("exception throw calling processAuxiliaryFile: " + ex);
+            }
+        } else {
+            logger.info("extractMetadataNcml: input stream is null! dataFileLocation was " + dataFileLocation);
+        }
+
+        return ncmlFileCreated;
+    }
+
     private void processDatasetMetadata(FileMetadataIngest fileMetadataIngest, DatasetVersion editVersion) throws IOException {
         
         
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java b/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java
index 74179b98833..9716e7aca13 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java
@@ -3,9 +3,16 @@
 import com.jayway.restassured.RestAssured;
 import com.jayway.restassured.path.json.JsonPath;
 import com.jayway.restassured.response.Response;
+import java.io.File;
 import java.io.IOException;
+import java.nio.file.Path;
+import java.nio.file.Paths;
 import static javax.ws.rs.core.Response.Status.CREATED;
+import static javax.ws.rs.core.Response.Status.FORBIDDEN;
+import static javax.ws.rs.core.Response.Status.NOT_FOUND;
 import static javax.ws.rs.core.Response.Status.OK;
+import org.hamcrest.CoreMatchers;
+import static org.hamcrest.CoreMatchers.equalTo;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
@@ -53,5 +60,123 @@ public void testNmclFromNetcdf() throws IOException {
         downloadNcml.then().assertThat()
                 .statusCode(OK.getStatusCode())
                 .contentType("text/xml; name=\"madis-raob.ncml_0.1.xml\";charset=UTF-8");
+
+        Response deleteNcml = UtilIT.deleteAuxFile(fileId, tag, version, apiToken);
+        deleteNcml.prettyPrint();
+        deleteNcml.then().assertThat().statusCode(OK.getStatusCode());
+
+        Response downloadNcmlShouldFail = UtilIT.downloadAuxFile(fileId, tag, version, apiToken);
+        downloadNcmlShouldFail.then().assertThat()
+                .statusCode(NOT_FOUND.getStatusCode());
+
+        UtilIT.makeSuperUser(username).then().assertThat().statusCode(OK.getStatusCode());
+
+        Response extractNcml = UtilIT.extractNcml(fileId, apiToken);
+        extractNcml.prettyPrint();
+        extractNcml.then().assertThat()
+                .statusCode(OK.getStatusCode());
+
+        Response downloadNcmlShouldWork = UtilIT.downloadAuxFile(fileId, tag, version, apiToken);
+        downloadNcmlShouldWork.then().assertThat()
+                .statusCode(OK.getStatusCode());
+
     }
+
+    @Test
+    public void testNmclFromNetcdfErrorChecking() throws IOException {
+        Response createUser = UtilIT.createRandomUser();
+        createUser.then().assertThat().statusCode(OK.getStatusCode());
+        String apiToken = UtilIT.getApiTokenFromResponse(createUser);
+        String username = UtilIT.getUsernameFromResponse(createUser);
+
+        Response createUserRandom = UtilIT.createRandomUser();
+        createUserRandom.then().assertThat().statusCode(OK.getStatusCode());
+        String apiTokenRandom = UtilIT.getApiTokenFromResponse(createUserRandom);
+
+        String apiTokenNull = null;
+
+        Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken);
+        createDataverseResponse.prettyPrint();
+        createDataverseResponse.then().assertThat()
+                .statusCode(CREATED.getStatusCode());
+
+        String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse);
+
+        Response createDataset = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken);
+        createDataset.prettyPrint();
+        createDataset.then().assertThat()
+                .statusCode(CREATED.getStatusCode());
+
+        Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset);
+        String datasetPid = UtilIT.getDatasetPersistentIdFromResponse(createDataset);
+
+        String pathToFile = "src/test/resources/netcdf/madis-raob";
+
+        Response uploadFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, apiToken);
+        uploadFile.prettyPrint();
+        uploadFile.then().assertThat().statusCode(OK.getStatusCode());
+
+        long fileId = JsonPath.from(uploadFile.body().asString()).getLong("data.files[0].dataFile.id");
+        String tag = "NcML";
+        String version = "0.1";
+
+        Response downloadNcmlFail = UtilIT.downloadAuxFile(fileId, tag, version, apiTokenNull);
+        downloadNcmlFail.then().assertThat()
+                .statusCode(FORBIDDEN.getStatusCode());
+
+        Response downloadNcml = UtilIT.downloadAuxFile(fileId, tag, version, apiToken);
+        downloadNcml.then().assertThat()
+                .statusCode(OK.getStatusCode())
+                .contentType("text/xml; name=\"madis-raob.ncml_0.1.xml\";charset=UTF-8");
+
+        Response deleteNcml = UtilIT.deleteAuxFile(fileId, tag, version, apiToken);
+        deleteNcml.prettyPrint();
+        deleteNcml.then().assertThat().statusCode(OK.getStatusCode());
+
+        Response downloadNcmlShouldFail = UtilIT.downloadAuxFile(fileId, tag, version, apiToken);
+        downloadNcmlShouldFail.then().assertThat()
+                .statusCode(NOT_FOUND.getStatusCode());
+
+        Response extractNcmlFailRandomUser = UtilIT.extractNcml(fileId, apiTokenRandom);
+        extractNcmlFailRandomUser.prettyPrint();
+        extractNcmlFailRandomUser.then().assertThat()
+                .statusCode(FORBIDDEN.getStatusCode());
+
+        UtilIT.makeSuperUser(username).then().assertThat().statusCode(OK.getStatusCode());
+
+        Response extractNcml = UtilIT.extractNcml(fileId, apiToken);
+        extractNcml.prettyPrint();
+        extractNcml.then().assertThat()
+                .statusCode(OK.getStatusCode())
+                .body("data.result", CoreMatchers.equalTo(true));
+
+        Response downloadNcmlShouldWork = UtilIT.downloadAuxFile(fileId, tag, version, apiToken);
+        downloadNcmlShouldWork.then().assertThat()
+                .statusCode(OK.getStatusCode());
+
+        Response extractNcmlFailExistsAlready = UtilIT.extractNcml(fileId, apiToken);
+        extractNcmlFailExistsAlready.prettyPrint();
+        extractNcmlFailExistsAlready.then().assertThat()
+                .statusCode(OK.getStatusCode())
+                .body("data.result", CoreMatchers.equalTo(false));
+
+        Path pathToTxt = Paths.get(java.nio.file.Files.createTempDirectory(null) + File.separator + "file.txt");
+        String contentOfTxt = "Just a text file. Don't expect NcML out!";
+        java.nio.file.Files.write(pathToTxt, contentOfTxt.getBytes());
+
+        Response uploadFileTxt = UtilIT.uploadFileViaNative(datasetId.toString(), pathToTxt.toString(), apiToken);
+        uploadFileTxt.then().assertThat()
+                .statusCode(OK.getStatusCode())
+                .body("data.files[0].label", equalTo("file.txt"));
+
+        long fileIdTxt = JsonPath.from(uploadFileTxt.body().asString()).getLong("data.files[0].dataFile.id");
+
+        Response extractNcmlFailText = UtilIT.extractNcml(fileIdTxt, apiToken);
+        extractNcmlFailText.prettyPrint();
+        extractNcmlFailText.then().assertThat()
+                .statusCode(OK.getStatusCode())
+                .body("data.result", CoreMatchers.equalTo(false));
+
+    }
+
 }
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
index 12ccaf2caff..36dce2978fa 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
@@ -743,10 +743,11 @@ static Response uploadAuxFile(Long fileId, String pathToFile, String formatTag,
     }
 
     static Response downloadAuxFile(Long fileId, String formatTag, String formatVersion, String apiToken) {
-        Response response = given()
-                .header(API_TOKEN_HTTP_HEADER, apiToken)
-                .get("/api/access/datafile/" + fileId + "/auxiliary/" + formatTag + "/" + formatVersion);
-        return response;
+        RequestSpecification requestSpecification = given();
+        if (apiToken != null) {
+            requestSpecification.header(API_TOKEN_HTTP_HEADER, apiToken);
+        }
+        return requestSpecification.get("/api/access/datafile/" + fileId + "/auxiliary/" + formatTag + "/" + formatVersion);
     }
 
     static Response listAuxFilesByOrigin(Long fileId, String origin, String apiToken) {
@@ -1170,7 +1171,14 @@ public static Response uningestFile(Long fileId, String apiToken) {
                 .post("/api/files/" + fileId + "/uningest/?key=" + apiToken);
         return uningestFileResponse;
     }
-    
+
+    public static Response extractNcml(Long fileId, String apiToken) {
+        Response response = given()
+                .header(API_TOKEN_HTTP_HEADER, apiToken)
+                .post("/api/files/" + fileId + "/extractNcml");
+        return response;
+    }
+
     //I don't understand why this blows up when I remove the key
     public static Response getDataFileMetadata(Long fileId, String apiToken) {
         Response fileResponse = given()

From 3c35baa8460e5b91747e98fb8e27d460c6288b91 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 6 Jan 2023 13:42:38 -0500
Subject: [PATCH 162/173] flip quotes in examples per QA

---
 doc/sphinx-guides/source/developers/s3-direct-upload-api.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst b/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst
index 61f4a862ce6..4d323455d28 100644
--- a/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst
+++ b/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst
@@ -174,7 +174,7 @@ Note that the API call does not validate that the file matches the hash value su
   export FILE_IDENTIFIER=5072
   export JSON_DATA='{"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "forceReplace":"true", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123456"}}'
 
-  curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/files/$FILE_IDENTIFIER/replace" -F 'jsonData=$JSON_DATA'
+  curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/files/$FILE_IDENTIFIER/replace" -F "jsonData=$JSON_DATA"
   
 Note that this API call can be used independently of the others, e.g. supporting use cases in which the file already exists in S3/has been uploaded via some out-of-band method. 
 With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifer must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above.
@@ -207,7 +207,7 @@ The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.Data
   export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV
   export JSON_DATA='[{"fileToReplaceId": 10, "description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123456"}},{"fileToReplaceId": 11, "forceReplace": true, "description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123789"}}]'
 
-  curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/replaceFiles?persistentId=$PERSISTENT_IDENTIFIER" -F 'jsonData=$JSON_DATA'
+  curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/replaceFiles?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA"
 
 The JSON object returned as a response from this API call includes a "data" that indicates how many of the file replacements succeeded and provides per-file error messages for those that don't, e.g.
 

From 27cd2f509127a3d8e3663491724695085957a9f5 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Fri, 6 Jan 2023 15:18:05 -0500
Subject: [PATCH 163/173] trivial doc changes #9228

---
 doc/sphinx-guides/source/developers/remote-users.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/sphinx-guides/source/developers/remote-users.rst b/doc/sphinx-guides/source/developers/remote-users.rst
index 9d3a788fe57..1f3aad70540 100755
--- a/doc/sphinx-guides/source/developers/remote-users.rst
+++ b/doc/sphinx-guides/source/developers/remote-users.rst
@@ -37,7 +37,7 @@ If you are working on the OpenID Connect (OIDC) user authentication flow, you do
 
 You can find this configuration in ``conf/keycloak``. There are two options available in this directory to run a Keycloak container: bash script or docker-compose.
 
-To run the container via bash script, execute the following command (Positioned in ``conf/keycloak``):
+To run the container via bash script, execute the following command (positioned in ``conf/keycloak``):
 
 ``./run-keycloak.sh``
 
@@ -47,7 +47,7 @@ Now load the configuration defined in ``oidc-keycloak-auth-provider.json`` into
 
 ``curl -X POST -H 'Content-type: application/json' --upload-file oidc-keycloak-auth-provider.json http://localhost:8080/api/admin/authenticationProviders``
 
-You should see the new provider, called “OIDC-Keycloak“, under “Other options” on the Log In page.
+You should see the new provider, called "OIDC-Keycloak", under "Other options" on the Log In page.
 
 You should be able to log into Keycloak with the following credentials:
 

From bb625eae6c90b11a5d57ee9c72b0fbd2cb7bc6c2 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Fri, 6 Jan 2023 15:23:37 -0500
Subject: [PATCH 164/173] add note: client secret exposed, do not use in prod,
 dev only #9228

---
 doc/sphinx-guides/source/developers/remote-users.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/sphinx-guides/source/developers/remote-users.rst b/doc/sphinx-guides/source/developers/remote-users.rst
index 1f3aad70540..21d36d28a75 100755
--- a/doc/sphinx-guides/source/developers/remote-users.rst
+++ b/doc/sphinx-guides/source/developers/remote-users.rst
@@ -35,6 +35,8 @@ OpenID Connect (OIDC)
 
 If you are working on the OpenID Connect (OIDC) user authentication flow, you do not need to connect to a remote provider (as explained in :doc:`/installation/oidc`) to test this feature. Instead, you can use the available configuration that allows you to run a test Keycloak OIDC identity management service locally through a Docker container.
 
+(Please note! The client secret (``ss6gE8mODCDfqesQaSG3gwUwZqZt547E``) is hard-coded in ``oidc-realm.json`` and ``oidc-keycloak-auth-provider.json``. Do not use this config in production! This is only for developers.)
+
 You can find this configuration in ``conf/keycloak``. There are two options available in this directory to run a Keycloak container: bash script or docker-compose.
 
 To run the container via bash script, execute the following command (positioned in ``conf/keycloak``):

From 3b6d17c8e5cebe16e4623364f439f05374138fef Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Mon, 9 Jan 2023 15:25:36 +0100
Subject: [PATCH 165/173] docs(config): adapt wording and order for
 dataverse.files.directory as per review

---
 .../source/installation/config.rst            | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst
index d46ec3ca3d5..946d580bbde 100644
--- a/doc/sphinx-guides/source/installation/config.rst
+++ b/doc/sphinx-guides/source/installation/config.rst
@@ -263,7 +263,9 @@ To support multiple stores, a Dataverse installation now requires an id, type, a
 
 Out of the box, a Dataverse installation is configured to use local file storage in the 'file' store by default. You can add additional stores and, as a superuser, configure specific Dataverse collections to use them (by editing the 'General Information' for the Dataverse collection as described in the :doc:`/admin/dataverses-datasets` section).
 
-Note that the "\-Ddataverse.files.directory", if defined, continues to control where temporary files are stored (in the /temp subdir of that directory), independent of the location of any 'file' store defined above.
+Note that the "\-Ddataverse.files.directory", if defined, continues to control where temporary files are stored
+(in the /temp subdir of that directory), independent of the location of any 'file' store defined above.
+(See also the option reference: :ref:`dataverse.files.directory`)
 
 If you wish to change which store is used by default, you'll need to delete the existing default storage driver and set a new one using jvm options.
 
@@ -1495,6 +1497,8 @@ protocol, host, and port number and should not include a trailing slash.
   https://github.com/IQSS/dataverse/issues/6636 is about resolving this confusion.
 
 
+.. _dataverse.files.directory:
+
 .. _dataverse.files.directory:
 
 dataverse.files.directory
@@ -1503,16 +1507,15 @@ dataverse.files.directory
 Please provide an absolute path to a directory backed by some mounted file system. This directory is used for a number
 of purposes:
 
-1. ``<dataverse.files.directory>/temp`` after uploading, data is temporarily stored here for ingest and/or before
+1. ``<dataverse.files.directory>/<PID Authority>/<PID Identifier>`` is the subdirectory layout when using the target
+   directory as a :ref:`permanent file storage <storage-files-dir>`. The DCM feature for :doc:`../developers/big-data-support`
+   is able to trigger imports for externally uploaded files from this area under certain conditions.
+2. ``<dataverse.files.directory>/temp`` after uploading, data is temporarily stored here for ingest and/or before
    shipping to the final storage destination.
 2. ``<dataverse.files.directory>/sword`` a place to store uploads via the :doc:`../api/sword` before transfer
    to final storage location and/or ingest.
-3. ``<dataverse.files.directory>/<PID Authority>/<PID Identifier>`` data location for file system imports, see
-   :ref:`api-import-dataset`.
 4. ``<dataverse.files.directory>/googlecloudkey.json`` used with :ref:`Google Cloud Configuration` for BagIt exports.
-
-This directory might also be used for permanent storage of data, but this setting is independent from
-:ref:`storage-files-dir` configuration.
+   This location is deprecated and might be refactored into a distinct setting in the future.
 
 .. _dataverse.files.uploads:
 
@@ -3116,7 +3119,7 @@ For example:
 
 ``curl -X PUT -d "This content needs to go through an additional review by the Curation Team before it can be published." http://localhost:8080/api/admin/settings/:DatasetMetadataValidationFailureMsg``
 
-	
+
 :ExternalValidationAdminOverride
 ++++++++++++++++++++++++++++++++
 

From 8750cfe0cda0a6db9a356719f42dc24e87581b41 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <o.bertuch@fz-juelich.de>
Date: Mon, 9 Jan 2023 15:36:53 +0100
Subject: [PATCH 166/173] doc(config): remove some merge artifact and fix
 enumeration

---
 doc/sphinx-guides/source/installation/config.rst | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst
index c0de6e43a98..03dcbb51624 100644
--- a/doc/sphinx-guides/source/installation/config.rst
+++ b/doc/sphinx-guides/source/installation/config.rst
@@ -1521,9 +1521,6 @@ protocol, host, and port number and should not include a trailing slash.
 - We are absolutely aware that it's confusing to have both ``dataverse.fqdn`` and ``dataverse.siteUrl``.
   https://github.com/IQSS/dataverse/issues/6636 is about resolving this confusion.
 
-
-.. _dataverse.files.directory:
-
 .. _dataverse.files.directory:
 
 dataverse.files.directory
@@ -1537,7 +1534,7 @@ of purposes:
    is able to trigger imports for externally uploaded files from this area under certain conditions.
 2. ``<dataverse.files.directory>/temp`` after uploading, data is temporarily stored here for ingest and/or before
    shipping to the final storage destination.
-2. ``<dataverse.files.directory>/sword`` a place to store uploads via the :doc:`../api/sword` before transfer
+3. ``<dataverse.files.directory>/sword`` a place to store uploads via the :doc:`../api/sword` before transfer
    to final storage location and/or ingest.
 4. ``<dataverse.files.directory>/googlecloudkey.json`` used with :ref:`Google Cloud Configuration` for BagIt exports.
    This location is deprecated and might be refactored into a distinct setting in the future.

From b7aecf56e67f13515150a7d5d3b344450d5ce7c3 Mon Sep 17 00:00:00 2001
From: Oliver Bertuch <poikilotherm@users.noreply.github.com>
Date: Tue, 10 Jan 2023 10:38:54 +0100
Subject: [PATCH 167/173] docs(config): change wording for
 dataverse.files.directory option as per review

---
 doc/sphinx-guides/source/installation/config.rst | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst
index 03dcbb51624..5f6a3a9aee8 100644
--- a/doc/sphinx-guides/source/installation/config.rst
+++ b/doc/sphinx-guides/source/installation/config.rst
@@ -1529,15 +1529,18 @@ dataverse.files.directory
 Please provide an absolute path to a directory backed by some mounted file system. This directory is used for a number
 of purposes:
 
-1. ``<dataverse.files.directory>/<PID Authority>/<PID Identifier>`` is the subdirectory layout when using the target
-   directory as a :ref:`permanent file storage <storage-files-dir>`. The DCM feature for :doc:`../developers/big-data-support`
-   is able to trigger imports for externally uploaded files from this area under certain conditions.
-2. ``<dataverse.files.directory>/temp`` after uploading, data is temporarily stored here for ingest and/or before
+1. ``<dataverse.files.directory>/temp`` after uploading, data is temporarily stored here for ingest and/or before
    shipping to the final storage destination.
-3. ``<dataverse.files.directory>/sword`` a place to store uploads via the :doc:`../api/sword` before transfer
+2. ``<dataverse.files.directory>/sword`` a place to store uploads via the :doc:`../api/sword` before transfer
    to final storage location and/or ingest.
-4. ``<dataverse.files.directory>/googlecloudkey.json`` used with :ref:`Google Cloud Configuration` for BagIt exports.
+3. ``<dataverse.files.directory>/googlecloudkey.json`` used with :ref:`Google Cloud Configuration` for BagIt exports.
    This location is deprecated and might be refactored into a distinct setting in the future.
+4. The experimental DCM feature for :doc:`../developers/big-data-support` is able to trigger imports for externally
+   uploaded files in a directory tree at ``<dataverse.files.directory>/<PID Authority>/<PID Identifier>``
+   under certain conditions. This directory may also be used by file stores for :ref:`permanent file storage <storage-files-dir>`,
+   but this is controlled by other, store-specific settings.
+
+Defaults to ``/tmp/dataverse``. Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_FILES_DIRECTORY``.
 
 .. _dataverse.files.uploads:
 

From 7bf4026c72e77d766c6554fec4cc6e0554d3d614 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Tue, 10 Jan 2023 10:51:19 -0500
Subject: [PATCH 168/173] debug logging

---
 .../iq/dataverse/datasetutility/AddReplaceFileHelper.java     | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
index 22b5bc2194a..c4c4fe552be 100644
--- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
+++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
@@ -420,6 +420,8 @@ private boolean runForceReplaceFile(Long oldFileId,
         if (!this.step_005_loadFileToReplaceById(oldFileId)){
             return false;
         }
+        logger.info("DS id: " + ds.getId());
+        logger.info("Owner id: " + fileToReplace.getOwner().getId());
         if(ds.getId()!=fileToReplace.getOwner().getId()) {
             this.addErrorSevere(getBundleErr("existing_file_to_replace_not_in_dataset"));
             return false;
@@ -462,6 +464,8 @@ private boolean runReplaceFile(Long oldFileId,
         if (!this.step_005_loadFileToReplaceById(oldFileId)){
             return false;
         }
+        logger.info("DS id: " + ds.getId());
+        logger.info("Owner id: " + fileToReplace.getOwner().getId());
         if(ds.getId()!=fileToReplace.getOwner().getId()) {
             this.addErrorSevere(getBundleErr("existing_file_to_replace_not_in_dataset"));
             return false;

From 64af6f4450980999b8cd6c4eeba7d3dca0176b8d Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Tue, 10 Jan 2023 14:57:02 -0500
Subject: [PATCH 169/173] use equals!

---
 .../dataverse/datasetutility/AddReplaceFileHelper.java   | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
index c4c4fe552be..ad03bcd550a 100644
--- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
+++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
@@ -420,9 +420,7 @@ private boolean runForceReplaceFile(Long oldFileId,
         if (!this.step_005_loadFileToReplaceById(oldFileId)){
             return false;
         }
-        logger.info("DS id: " + ds.getId());
-        logger.info("Owner id: " + fileToReplace.getOwner().getId());
-        if(ds.getId()!=fileToReplace.getOwner().getId()) {
+        if(!ds.getId().equals(fileToReplace.getOwner().getId())) {
             this.addErrorSevere(getBundleErr("existing_file_to_replace_not_in_dataset"));
             return false;
         }
@@ -464,9 +462,8 @@ private boolean runReplaceFile(Long oldFileId,
         if (!this.step_005_loadFileToReplaceById(oldFileId)){
             return false;
         }
-        logger.info("DS id: " + ds.getId());
-        logger.info("Owner id: " + fileToReplace.getOwner().getId());
-        if(ds.getId()!=fileToReplace.getOwner().getId()) {
+
+        if(!ds.getId().equals(fileToReplace.getOwner().getId())) {
             this.addErrorSevere(getBundleErr("existing_file_to_replace_not_in_dataset"));
             return false;
         }

From 8cead71fe610e434fbbecd5cccdfe0aae8ef2eac Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 10 Jan 2023 15:36:45 -0500
Subject: [PATCH 170/173] cleanup (remove extraneous println) #9153

---
 .../java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java  | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
index f3fc56a54aa..9c6acd964c1 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
@@ -305,7 +305,6 @@ public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
 					// Any necessary post-processing:
 					// performPostProcessingTasks(dataFile);
 				} else {
-                                    System.out.println("driver is not tmp");
 					try {
 						StorageIO<DvObject> dataAccess = DataAccess.getStorageIO(dataFile);
 						//Populate metadata

From c22545b469ee7e3d9569576d8df15d2de83edac4 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Tue, 10 Jan 2023 15:45:46 -0500
Subject: [PATCH 171/173] Only do physcal delete when deleteFileId has been set

---
 .../iq/dataverse/datasetutility/AddReplaceFileHelper.java       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
index ad03bcd550a..1d0ec0f19d9 100644
--- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
+++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
@@ -1666,7 +1666,7 @@ private boolean step_070_run_update_dataset_command() {
             }
         }
 
-        if (isFileReplaceOperation() && !multifile) {
+        if (isFileReplaceOperation() && deleteFileId!=-1 && !multifile) {
             // Finalize the delete of the physical file
             // (File service will double-check that the datafile no
             // longer exists in the database, before proceeding to

From 60934e70c170796a47befcd616ee1639dc61ecdf Mon Sep 17 00:00:00 2001
From: doigl <38038460+doigl@users.noreply.github.com>
Date: Fri, 13 Jan 2023 12:57:25 +0100
Subject: [PATCH 172/173] Update shibboleth2.xml

add  redirectLimit to Session element to prevent open redirect issues
---
 .../_static/installation/files/etc/shibboleth/shibboleth2.xml   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/_static/installation/files/etc/shibboleth/shibboleth2.xml b/doc/sphinx-guides/source/_static/installation/files/etc/shibboleth/shibboleth2.xml
index 41bf4709ba9..3960d003ad2 100644
--- a/doc/sphinx-guides/source/_static/installation/files/etc/shibboleth/shibboleth2.xml
+++ b/doc/sphinx-guides/source/_static/installation/files/etc/shibboleth/shibboleth2.xml
@@ -18,7 +18,7 @@ https://wiki.shibboleth.net/confluence/display/SHIB2/NativeSPConfiguration
 
         <!-- You should use secure cookies if at all possible.  See cookieProps in this Wiki article. -->
         <!-- https://wiki.shibboleth.net/confluence/display/SHIB2/NativeSPSessions -->
-        <Sessions lifetime="28800" timeout="3600" checkAddress="false" relayState="ss:mem" handlerSSL="false">
+        <Sessions lifetime="28800" timeout="3600" checkAddress="false" relayState="ss:mem" handlerSSL="false" redirectLimit="exact">
 
 	    <SSO>
 	      SAML2 SAML1

From 230298902fbb7296c9623a355e66e72302f83174 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 19 Jan 2023 10:00:33 -0500
Subject: [PATCH 173/173] rename sql scripts #9153

"Use a version like '4.11.0.1' in the example above where the
previously released version was 4.11" -- dev guide

That is, these scripts should have been 5.12.1.whatever since
the last release was 5.12.1. Fixing. (They were 5.13.whatever.)
---
 ...-sorting_licenses.sql => V5.12.1.1__8671-sorting_licenses.sql} | 0
 ...ls-for-tools.sql => V5.12.1.2__7715-signed-urls-for-tools.sql} | 0
 ...imates.sql => V5.12.1.3__8840-improve-guestbook-estimates.sql} | 0
 ...-extract-metadata.sql => V5.12.1.4__9153-extract-metadata.sql} | 0
 4 files changed, 0 insertions(+), 0 deletions(-)
 rename src/main/resources/db/migration/{V5.13.0.1__8671-sorting_licenses.sql => V5.12.1.1__8671-sorting_licenses.sql} (100%)
 rename src/main/resources/db/migration/{V5.13.0.2__7715-signed-urls-for-tools.sql => V5.12.1.2__7715-signed-urls-for-tools.sql} (100%)
 rename src/main/resources/db/migration/{V5.13.0.3__8840-improve-guestbook-estimates.sql => V5.12.1.3__8840-improve-guestbook-estimates.sql} (100%)
 rename src/main/resources/db/migration/{V5.13.0.3__9153-extract-metadata.sql => V5.12.1.4__9153-extract-metadata.sql} (100%)

diff --git a/src/main/resources/db/migration/V5.13.0.1__8671-sorting_licenses.sql b/src/main/resources/db/migration/V5.12.1.1__8671-sorting_licenses.sql
similarity index 100%
rename from src/main/resources/db/migration/V5.13.0.1__8671-sorting_licenses.sql
rename to src/main/resources/db/migration/V5.12.1.1__8671-sorting_licenses.sql
diff --git a/src/main/resources/db/migration/V5.13.0.2__7715-signed-urls-for-tools.sql b/src/main/resources/db/migration/V5.12.1.2__7715-signed-urls-for-tools.sql
similarity index 100%
rename from src/main/resources/db/migration/V5.13.0.2__7715-signed-urls-for-tools.sql
rename to src/main/resources/db/migration/V5.12.1.2__7715-signed-urls-for-tools.sql
diff --git a/src/main/resources/db/migration/V5.13.0.3__8840-improve-guestbook-estimates.sql b/src/main/resources/db/migration/V5.12.1.3__8840-improve-guestbook-estimates.sql
similarity index 100%
rename from src/main/resources/db/migration/V5.13.0.3__8840-improve-guestbook-estimates.sql
rename to src/main/resources/db/migration/V5.12.1.3__8840-improve-guestbook-estimates.sql
diff --git a/src/main/resources/db/migration/V5.13.0.3__9153-extract-metadata.sql b/src/main/resources/db/migration/V5.12.1.4__9153-extract-metadata.sql
similarity index 100%
rename from src/main/resources/db/migration/V5.13.0.3__9153-extract-metadata.sql
rename to src/main/resources/db/migration/V5.12.1.4__9153-extract-metadata.sql