diff --git a/.gitignore b/.gitignore
index 8e53b96a9..c7dc5aa84 100644
--- a/.gitignore
+++ b/.gitignore
@@ -17,6 +17,11 @@ src/_website
 *._pdf
 
 
+src/**/*.html
+src/**/*.tex
+!header.tex
+!json.html
+
 # eBook build output
 *.epub
 *.mobi
diff --git a/README.md b/README.md
index 649816e70..96e5ae1a2 100644
--- a/README.md
+++ b/README.md
@@ -26,9 +26,7 @@ previously, we used [bookdown](https://github.com/rstudio/bookdown).
 [Download quarto](https://quarto.org/docs/download/)
 To build the pdf version of the docs, you will need to [install LaTeX](https://www.latex-project.org/get/) as well.
 
-
-Quarto accepts both [Rmarkdown (`.Rmd`)](https://rmarkdown.rstudio.com)
-and [`.qmd`](https://quarto.org/docs/authoring/markdown-basics.html) source files
+Quarto accepts [`.qmd`](https://quarto.org/docs/authoring/markdown-basics.html) source files
 and uses the [Pandoc](https://pandoc.org) conversion engine.
 
 ## Scripts to build and maintain the docset
@@ -47,7 +45,7 @@ Requires Python 3.7 or higher, due to call to `subprocess.run`, kwarg `capture_o
 * `python build.py 2 35 website` - builds the docs website in `docs/2_42`.
 * `python build.py 2 35 pdf functions-reference` - builds only the pdf version of the Stan functions reference,  resulting document is `docs/2_35/functions-reference-2_35.pdf`
 * `python build.py 2 35 pdf all` - builds all pdfs from the Stan documentation set, resulting pdfs are in `docs/2_35`.
- 
+
 
 **Additional scripts**
 
diff --git a/extract_function_sigs.py b/extract_function_sigs.py
index 76ef60c76..ee9123934 100644
--- a/extract_function_sigs.py
+++ b/extract_function_sigs.py
@@ -35,7 +35,7 @@ def main():
     sigs = set()
     ref_dir = os.path.join('src', 'functions-reference')
     with pushd(ref_dir):
-        for file in glob.glob('*.Rmd'):
+        for file in glob.glob('*.qmd'):
             print(file)
             with open(file) as rmd_file:
                 lines = rmd_file.readlines()
@@ -50,7 +50,7 @@ def main():
                             sigs.add('{}; {}; {}'.format(parts[1], parts[2], parts[0]))
                         else:
                             print('not a function sig: {}'.format(line))
-    
+
     with open(outfile_name, 'w') as outfile:
         outfile.write('# This file is semicolon delimited\n')
         outfile.write('StanFunction; Arguments; ReturnType\n')
diff --git a/src/.gitignore b/src/.gitignore
index f2de5cb88..8ccecbdb7 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -6,9 +6,4 @@
 *.ind
 *.toc
 
-./**/*.html
-./**/*.tex
-!header.tex
-!json.html
-
 /.quarto/
diff --git a/src/_functions.R b/src/_functions.R
deleted file mode 100644
index 97f1d30e0..000000000
--- a/src/_functions.R
+++ /dev/null
@@ -1,10 +0,0 @@
-since <- function(x) {
-  x <- paste("Available since", x)
-  if (knitr::is_latex_output()) {
-    sprintf("\\newline\\mbox{\\small\\emph{%s}}", x)
-  } else if (knitr::is_html_output()) {
-    sprintf("<br/><small><i>%s</i></small>", x)
-  } else x
-}
-
-
diff --git a/src/_quarto.yml b/src/_quarto.yml
index 1010fd679..820d8d5ad 100644
--- a/src/_quarto.yml
+++ b/src/_quarto.yml
@@ -5,15 +5,15 @@ project:
   render:
     - "index.qmd"
     - "404.qmd"
-    - "cmdstan-guide/overview.qmd"
-    - "functions-reference/overview.qmd"
-    - "reference-manual/overview.qmd"
-    - "stan-users-guide/overview.qmd"
-    - "cmdstan-guide/*.Rmd"
-    - "functions-reference/*.Rmd"
-    - "reference-manual/*.Rmd"
-    - "stan-users-guide/*.Rmd"
-    - "!sdlc.Rmd"
+    - "cmdstan-guide/*.qmd"
+    - "functions-reference/*.qmd"
+    - "reference-manual/*.qmd"
+    - "stan-users-guide/*.qmd"
+    - "!sdlc.qmd"
+    - "!cmdstan-guide/index.qmd"
+    - "!functions-reference/index.qmd"
+    - "!reference-manual/index.qmd"
+    - "!stan-users-guide/index.qmd"
 
 bibliography: bibtex/all.bib
 biblio-style: apalike
@@ -93,48 +93,48 @@ website:
         - section: "Version {{< env STAN_DOCS_VERSION >}}"
         - section: "Example Models"
           contents:
-            - stan-users-guide/regression.Rmd
-            - stan-users-guide/time-series.Rmd
-            - stan-users-guide/missing-data.Rmd
-            - stan-users-guide/truncation-censoring.Rmd
-            - stan-users-guide/finite-mixtures.Rmd
-            - stan-users-guide/measurement-error.Rmd
-            - stan-users-guide/latent-discrete.Rmd
-            - stan-users-guide/sparse-ragged.Rmd
-            - stan-users-guide/clustering.Rmd
-            - stan-users-guide/gaussian-processes.Rmd
-            - stan-users-guide/hyperspherical-models.Rmd
-            - stan-users-guide/algebraic-equations.Rmd
-            - stan-users-guide/odes.Rmd
-            - stan-users-guide/one-dimensional-integrals.Rmd
-            - stan-users-guide/complex-numbers.Rmd
-            - stan-users-guide/dae.Rmd
+            - stan-users-guide/regression.qmd
+            - stan-users-guide/time-series.qmd
+            - stan-users-guide/missing-data.qmd
+            - stan-users-guide/truncation-censoring.qmd
+            - stan-users-guide/finite-mixtures.qmd
+            - stan-users-guide/measurement-error.qmd
+            - stan-users-guide/latent-discrete.qmd
+            - stan-users-guide/sparse-ragged.qmd
+            - stan-users-guide/clustering.qmd
+            - stan-users-guide/gaussian-processes.qmd
+            - stan-users-guide/hyperspherical-models.qmd
+            - stan-users-guide/algebraic-equations.qmd
+            - stan-users-guide/odes.qmd
+            - stan-users-guide/one-dimensional-integrals.qmd
+            - stan-users-guide/complex-numbers.qmd
+            - stan-users-guide/dae.qmd
         - section:  "Programming Techniques"
           contents:
-            - stan-users-guide/floating-point.Rmd
-            - stan-users-guide/matrices-arrays.Rmd
-            - stan-users-guide/multi-indexing.Rmd
-            - stan-users-guide/user-functions.Rmd
-            - stan-users-guide/custom-probability.Rmd
-            - stan-users-guide/proportionality-constants.Rmd
-            - stan-users-guide/problematic-posteriors.Rmd
-            - stan-users-guide/reparameterization.Rmd
-            - stan-users-guide/efficiency-tuning.Rmd
-            - stan-users-guide/parallelization.Rmd
+            - stan-users-guide/floating-point.qmd
+            - stan-users-guide/matrices-arrays.qmd
+            - stan-users-guide/multi-indexing.qmd
+            - stan-users-guide/user-functions.qmd
+            - stan-users-guide/custom-probability.qmd
+            - stan-users-guide/proportionality-constants.qmd
+            - stan-users-guide/problematic-posteriors.qmd
+            - stan-users-guide/reparameterization.qmd
+            - stan-users-guide/efficiency-tuning.qmd
+            - stan-users-guide/parallelization.qmd
         - section: "Posterior Inference & Model Checking"
           contents:
-            - stan-users-guide/posterior-prediction.Rmd
-            - stan-users-guide/simulation-based-calibration.Rmd
-            - stan-users-guide/posterior-predictive-checks.Rmd
-            - stan-users-guide/cross-validation.Rmd
-            - stan-users-guide/poststratification.Rmd
-            - stan-users-guide/decision-analysis.Rmd
-            - stan-users-guide/bootstrap.Rmd
+            - stan-users-guide/posterior-prediction.qmd
+            - stan-users-guide/simulation-based-calibration.qmd
+            - stan-users-guide/posterior-predictive-checks.qmd
+            - stan-users-guide/cross-validation.qmd
+            - stan-users-guide/poststratification.qmd
+            - stan-users-guide/decision-analysis.qmd
+            - stan-users-guide/bootstrap.qmd
         - section: "Appendices"
           contents:
-            - stan-users-guide/using-stanc.Rmd
-            - stan-users-guide/style-guide.Rmd
-            - stan-users-guide/for-bugs-users.Rmd
+            - stan-users-guide/using-stanc.qmd
+            - stan-users-guide/style-guide.qmd
+            - stan-users-guide/for-bugs-users.qmd
 
     - id: reference-manual
       title: "Reference Manual"
@@ -143,33 +143,33 @@ website:
         - section: "Version {{< env STAN_DOCS_VERSION >}}"
         - section: "Language"
           contents:
-            - reference-manual/encoding.Rmd
-            - reference-manual/includes.Rmd
-            - reference-manual/comments.Rmd
-            - reference-manual/whitespace.Rmd
-            - reference-manual/types.Rmd
-            - reference-manual/expressions.Rmd
-            - reference-manual/statements.Rmd
-            - reference-manual/blocks.Rmd
-            - reference-manual/user-functions.Rmd
-            - reference-manual/transforms.Rmd
-            - reference-manual/syntax.Rmd
-            - reference-manual/execution.Rmd
-            - reference-manual/deprecations.Rmd
-            - reference-manual/removals.Rmd
+            - reference-manual/encoding.qmd
+            - reference-manual/includes.qmd
+            - reference-manual/comments.qmd
+            - reference-manual/whitespace.qmd
+            - reference-manual/types.qmd
+            - reference-manual/expressions.qmd
+            - reference-manual/statements.qmd
+            - reference-manual/blocks.qmd
+            - reference-manual/user-functions.qmd
+            - reference-manual/transforms.qmd
+            - reference-manual/syntax.qmd
+            - reference-manual/execution.qmd
+            - reference-manual/deprecations.qmd
+            - reference-manual/removals.qmd
         - section: "Algorithms"
           contents:
-            - reference-manual/mcmc.Rmd
-            - reference-manual/analysis.Rmd
-            - reference-manual/optimization.Rmd
-            - reference-manual/pathfinder.Rmd
-            - reference-manual/variational.Rmd
-            - reference-manual/laplace.Rmd
-            - reference-manual/diagnostics.Rmd
+            - reference-manual/mcmc.qmd
+            - reference-manual/analysis.qmd
+            - reference-manual/optimization.qmd
+            - reference-manual/pathfinder.qmd
+            - reference-manual/variational.qmd
+            - reference-manual/laplace.qmd
+            - reference-manual/diagnostics.qmd
         - section: "Usage"
           contents:
-            - reference-manual/reproducibility.Rmd
-            - reference-manual/licenses.Rmd
+            - reference-manual/reproducibility.qmd
+            - reference-manual/licenses.qmd
 
     - id: functions-reference
       title: "Stan Functions"
@@ -178,44 +178,44 @@ website:
         - section: "Version {{< env STAN_DOCS_VERSION >}}"
         - section: "Built-in Functions"
           contents:
-            - functions-reference/void_functions.Rmd
-            - functions-reference/integer-valued_basic_functions.Rmd
-            - functions-reference/real-valued_basic_functions.Rmd
-            - functions-reference/complex-valued_basic_functions.Rmd
-            - functions-reference/array_operations.Rmd
-            - functions-reference/matrix_operations.Rmd
-            - functions-reference/complex_matrix_operations.Rmd
-            - functions-reference/sparse_matrix_operations.Rmd
-            - functions-reference/mixed_operations.Rmd
-            - functions-reference/compound_arithmetic_and_assignment.Rmd
-            - functions-reference/higher-order_functions.Rmd
-            - functions-reference/deprecated_functions.Rmd
-            - functions-reference/removed_functions.Rmd
-            - functions-reference/conventions_for_probability_functions.Rmd
+            - functions-reference/void_functions.qmd
+            - functions-reference/integer-valued_basic_functions.qmd
+            - functions-reference/real-valued_basic_functions.qmd
+            - functions-reference/complex-valued_basic_functions.qmd
+            - functions-reference/array_operations.qmd
+            - functions-reference/matrix_operations.qmd
+            - functions-reference/complex_matrix_operations.qmd
+            - functions-reference/sparse_matrix_operations.qmd
+            - functions-reference/mixed_operations.qmd
+            - functions-reference/compound_arithmetic_and_assignment.qmd
+            - functions-reference/higher-order_functions.qmd
+            - functions-reference/deprecated_functions.qmd
+            - functions-reference/removed_functions.qmd
+            - functions-reference/conventions_for_probability_functions.qmd
         - section: "Discrete Distributions"
           contents:
-            - functions-reference/binary_distributions.Rmd
-            - functions-reference/bounded_discrete_distributions.Rmd
-            - functions-reference/unbounded_discrete_distributions.Rmd
-            - functions-reference/multivariate_discrete_distributions.Rmd
+            - functions-reference/binary_distributions.qmd
+            - functions-reference/bounded_discrete_distributions.qmd
+            - functions-reference/unbounded_discrete_distributions.qmd
+            - functions-reference/multivariate_discrete_distributions.qmd
         - section: "Continuous Distributions"
           contents:
-            - functions-reference/unbounded_continuous_distributions.Rmd
-            - functions-reference/positive_continuous_distributions.Rmd
-            - functions-reference/positive_lower-bounded_distributions.Rmd
-            - functions-reference/continuous_distributions_on_0_1.Rmd
-            - functions-reference/circular_distributions.Rmd
-            - functions-reference/bounded_continuous_distributions.Rmd
-            - functions-reference/distributions_over_unbounded_vectors.Rmd
-            - functions-reference/simplex_distributions.Rmd
-            - functions-reference/correlation_matrix_distributions.Rmd
-            - functions-reference/covariance_matrix_distributions.Rmd
+            - functions-reference/unbounded_continuous_distributions.qmd
+            - functions-reference/positive_continuous_distributions.qmd
+            - functions-reference/positive_lower-bounded_distributions.qmd
+            - functions-reference/continuous_distributions_on_0_1.qmd
+            - functions-reference/circular_distributions.qmd
+            - functions-reference/bounded_continuous_distributions.qmd
+            - functions-reference/distributions_over_unbounded_vectors.qmd
+            - functions-reference/simplex_distributions.qmd
+            - functions-reference/correlation_matrix_distributions.qmd
+            - functions-reference/covariance_matrix_distributions.qmd
         - section: "Additional Distributions"
           contents:
-            - functions-reference/hidden_markov_models.Rmd
+            - functions-reference/hidden_markov_models.qmd
         - section: "Appendix"
           contents:
-            - functions-reference/mathematical_functions.Rmd
+            - functions-reference/mathematical_functions.qmd
 
     - id: cmdstan-guide
       title: "CmdStan Guide"
@@ -224,38 +224,38 @@ website:
         - section: "Version {{< env STAN_DOCS_VERSION >}}"
         - section: "Quickstart Guide"
           contents:
-            - cmdstan-guide/installation.Rmd
-            - cmdstan-guide/example_model_data.Rmd
-            - cmdstan-guide/compiling_stan_programs.Rmd
-            - cmdstan-guide/mcmc_sampling_intro.Rmd
-            - cmdstan-guide/optimization_intro.Rmd
-            - cmdstan-guide/pathfinder_intro.Rmd
-            - cmdstan-guide/variational_intro.Rmd
-            - cmdstan-guide/generate_quantities_intro.Rmd
+            - cmdstan-guide/installation.qmd
+            - cmdstan-guide/example_model_data.qmd
+            - cmdstan-guide/compiling_stan_programs.qmd
+            - cmdstan-guide/mcmc_sampling_intro.qmd
+            - cmdstan-guide/optimization_intro.qmd
+            - cmdstan-guide/pathfinder_intro.qmd
+            - cmdstan-guide/variational_intro.qmd
+            - cmdstan-guide/generate_quantities_intro.qmd
         - section: "Reference Manual"
           contents:
-            - cmdstan-guide/command_line_options.Rmd
-            - cmdstan-guide/mcmc_config.Rmd
-            - cmdstan-guide/optimize_config.Rmd
-            - cmdstan-guide/pathfinder_config.Rmd
-            - cmdstan-guide/variational_config.Rmd
-            - cmdstan-guide/generate_quantities_config.Rmd
-            - cmdstan-guide/laplace_sample_config.Rmd
-            - cmdstan-guide/log_prob_config.Rmd
-            - cmdstan-guide/diagnose_config.Rmd
-            - cmdstan-guide/parallelization.Rmd
+            - cmdstan-guide/command_line_options.qmd
+            - cmdstan-guide/mcmc_config.qmd
+            - cmdstan-guide/optimize_config.qmd
+            - cmdstan-guide/pathfinder_config.qmd
+            - cmdstan-guide/variational_config.qmd
+            - cmdstan-guide/generate_quantities_config.qmd
+            - cmdstan-guide/laplace_sample_config.qmd
+            - cmdstan-guide/log_prob_config.qmd
+            - cmdstan-guide/diagnose_config.qmd
+            - cmdstan-guide/parallelization.qmd
         - section: "Tools and Utilities"
           contents:
-            - cmdstan-guide/stanc.Rmd
-            - cmdstan-guide/stansummary.Rmd
-            - cmdstan-guide/diagnose_utility.Rmd
-            - cmdstan-guide/print.Rmd
+            - cmdstan-guide/stanc.qmd
+            - cmdstan-guide/stansummary.qmd
+            - cmdstan-guide/diagnose_utility.qmd
+            - cmdstan-guide/print.qmd
         - section: "Appendices"
           contents:
-            - cmdstan-guide/stan_csv_apdx.Rmd
-            - cmdstan-guide/json_apdx.Rmd
-            - cmdstan-guide/rdump_apdx.Rmd
-            - cmdstan-guide/external_code.Rmd
+            - cmdstan-guide/stan_csv_apdx.qmd
+            - cmdstan-guide/json_apdx.qmd
+            - cmdstan-guide/rdump_apdx.qmd
+            - cmdstan-guide/external_code.qmd
 
 format:
   html:
diff --git a/src/before-chapter.R b/src/before-chapter.R
deleted file mode 100644
index 17abaea98..000000000
--- a/src/before-chapter.R
+++ /dev/null
@@ -1,5 +0,0 @@
-library(dplyr)
-library(ggplot2)
-library(kableExtra)
-
-options(digits = 2)
diff --git a/src/cmdstan-guide/_quarto.yml b/src/cmdstan-guide/_quarto.yml
index 038c16099..0870f73f1 100644
--- a/src/cmdstan-guide/_quarto.yml
+++ b/src/cmdstan-guide/_quarto.yml
@@ -36,40 +36,40 @@ book:
     - index.qmd
     - part: "Quickstart Guide"
       chapters:
-        - installation.Rmd
-        - example_model_data.Rmd
-        - compiling_stan_programs.Rmd
-        - mcmc_sampling_intro.Rmd
-        - optimization_intro.Rmd
-        - pathfinder_intro.Rmd
-        - variational_intro.Rmd
-        - generate_quantities_intro.Rmd
+        - installation.qmd
+        - example_model_data.qmd
+        - compiling_stan_programs.qmd
+        - mcmc_sampling_intro.qmd
+        - optimization_intro.qmd
+        - pathfinder_intro.qmd
+        - variational_intro.qmd
+        - generate_quantities_intro.qmd
 
     - part: "Reference Manual"
       chapters:
-        - command_line_options.Rmd
-        - mcmc_config.Rmd
-        - optimize_config.Rmd
-        - pathfinder_config.Rmd
-        - variational_config.Rmd
-        - generate_quantities_config.Rmd
-        - laplace_sample_config.Rmd
-        - log_prob_config.Rmd
-        - diagnose_config.Rmd
-        - parallelization.Rmd
+        - command_line_options.qmd
+        - mcmc_config.qmd
+        - optimize_config.qmd
+        - pathfinder_config.qmd
+        - variational_config.qmd
+        - generate_quantities_config.qmd
+        - laplace_sample_config.qmd
+        - log_prob_config.qmd
+        - diagnose_config.qmd
+        - parallelization.qmd
 
     - part: "CmdStan Utilities"
       chapters:
-        - stanc.Rmd
-        - stansummary.Rmd
-        - diagnose_utility.Rmd
-        - print.Rmd
+        - stanc.qmd
+        - stansummary.qmd
+        - diagnose_utility.qmd
+        - print.qmd
 
     - part: "Appendices"
       chapters:
-        - stan_csv_apdx.Rmd
-        - json_apdx.Rmd
-        - rdump_apdx.Rmd
-        - external_code.Rmd
+        - stan_csv_apdx.qmd
+        - json_apdx.qmd
+        - rdump_apdx.qmd
+        - external_code.qmd
 
-    - bib.Rmd
+    - bib.qmd
diff --git a/src/cmdstan-guide/bib.Rmd b/src/cmdstan-guide/bib.qmd
similarity index 100%
rename from src/cmdstan-guide/bib.Rmd
rename to src/cmdstan-guide/bib.qmd
diff --git a/src/cmdstan-guide/command_line_options.Rmd b/src/cmdstan-guide/command_line_options.qmd
similarity index 100%
rename from src/cmdstan-guide/command_line_options.Rmd
rename to src/cmdstan-guide/command_line_options.qmd
diff --git a/src/cmdstan-guide/compiling_stan_programs.Rmd b/src/cmdstan-guide/compiling_stan_programs.qmd
similarity index 100%
rename from src/cmdstan-guide/compiling_stan_programs.Rmd
rename to src/cmdstan-guide/compiling_stan_programs.qmd
diff --git a/src/cmdstan-guide/diagnose_config.Rmd b/src/cmdstan-guide/diagnose_config.qmd
similarity index 100%
rename from src/cmdstan-guide/diagnose_config.Rmd
rename to src/cmdstan-guide/diagnose_config.qmd
diff --git a/src/cmdstan-guide/diagnose_utility.Rmd b/src/cmdstan-guide/diagnose_utility.qmd
similarity index 100%
rename from src/cmdstan-guide/diagnose_utility.Rmd
rename to src/cmdstan-guide/diagnose_utility.qmd
diff --git a/src/cmdstan-guide/err_handling_apdx.Rmd b/src/cmdstan-guide/err_handling_apdx.qmd
similarity index 100%
rename from src/cmdstan-guide/err_handling_apdx.Rmd
rename to src/cmdstan-guide/err_handling_apdx.qmd
diff --git a/src/cmdstan-guide/example_model_data.Rmd b/src/cmdstan-guide/example_model_data.qmd
similarity index 100%
rename from src/cmdstan-guide/example_model_data.Rmd
rename to src/cmdstan-guide/example_model_data.qmd
diff --git a/src/cmdstan-guide/external_code.Rmd b/src/cmdstan-guide/external_code.qmd
similarity index 100%
rename from src/cmdstan-guide/external_code.Rmd
rename to src/cmdstan-guide/external_code.qmd
diff --git a/src/cmdstan-guide/generate_quantities_config.Rmd b/src/cmdstan-guide/generate_quantities_config.qmd
similarity index 97%
rename from src/cmdstan-guide/generate_quantities_config.Rmd
rename to src/cmdstan-guide/generate_quantities_config.qmd
index 9d582cb7f..2bf2a577b 100644
--- a/src/cmdstan-guide/generate_quantities_config.Rmd
+++ b/src/cmdstan-guide/generate_quantities_config.qmd
@@ -7,7 +7,7 @@ pagetitle: Standalone Generate Quantities
 The `generate_quantities` method allows you to generate additional
 quantities of interest from a fitted model without re-running the sampler.
 For an overview of the uses of this feature, see the
-[QuickStart Guide section](generate_quantities_intro.Rmd)
+[QuickStart Guide section](generate_quantities_intro.qmd)
 and the Stan User's Guide section on
 [Stand-alone generated quantities and ongoing prediction](https://mc-stan.org/docs/stan-users-guide/posterior-prediction.html#stand-alone-generated-quantities-and-ongoing-prediction).
 
diff --git a/src/cmdstan-guide/generate_quantities_intro.Rmd b/src/cmdstan-guide/generate_quantities_intro.qmd
similarity index 100%
rename from src/cmdstan-guide/generate_quantities_intro.Rmd
rename to src/cmdstan-guide/generate_quantities_intro.qmd
diff --git a/src/cmdstan-guide/installation.Rmd b/src/cmdstan-guide/installation.qmd
similarity index 100%
rename from src/cmdstan-guide/installation.Rmd
rename to src/cmdstan-guide/installation.qmd
diff --git a/src/cmdstan-guide/json_apdx.Rmd b/src/cmdstan-guide/json_apdx.qmd
similarity index 100%
rename from src/cmdstan-guide/json_apdx.Rmd
rename to src/cmdstan-guide/json_apdx.qmd
diff --git a/src/cmdstan-guide/laplace_sample_config.Rmd b/src/cmdstan-guide/laplace_sample_config.qmd
similarity index 100%
rename from src/cmdstan-guide/laplace_sample_config.Rmd
rename to src/cmdstan-guide/laplace_sample_config.qmd
diff --git a/src/cmdstan-guide/log_prob_config.Rmd b/src/cmdstan-guide/log_prob_config.qmd
similarity index 98%
rename from src/cmdstan-guide/log_prob_config.Rmd
rename to src/cmdstan-guide/log_prob_config.qmd
index 821ec3948..4247c2635 100644
--- a/src/cmdstan-guide/log_prob_config.Rmd
+++ b/src/cmdstan-guide/log_prob_config.qmd
@@ -24,8 +24,8 @@ This method takes 3 arguments:
 
 - `constrained_params` - Input file of parameters values on the constrained scale.
 A single set of constrained parameters can be specified using
-[JSON](json_apdx.Rmd) format.
-Alternatively, the input file can be set of draws in [StanCSV](stan_csv_apdx.Rmd) format.
+[JSON](json_apdx.qmd) format.
+Alternatively, the input file can be set of draws in [StanCSV](stan_csv_apdx.qmd) format.
 
 
 - `unconstrained_params` - Input file (JSON or R dump) of parameter values
diff --git a/src/cmdstan-guide/mcmc_config.Rmd b/src/cmdstan-guide/mcmc_config.qmd
similarity index 99%
rename from src/cmdstan-guide/mcmc_config.Rmd
rename to src/cmdstan-guide/mcmc_config.qmd
index a0c921adb..fc2658b7e 100644
--- a/src/cmdstan-guide/mcmc_config.Rmd
+++ b/src/cmdstan-guide/mcmc_config.qmd
@@ -126,7 +126,7 @@ This metric file can be reused in subsequent sampler runs as the initial metric,
 via sampler argument `metric_file`.
 
 ### Step size optimization configuration
- 
+
 The Stan User's Guide section on
 [model conditioning and curvature](https://mc-stan.org/docs/stan-users-guide/efficiency-tuning.html#model-conditioning-and-curvature)
 provides a discussion of adaptation and stepsize issues.
@@ -314,7 +314,7 @@ By default, no auxiliary output file is produced.
 ## Multiple chains in one executable {#sampler-num-chains}
 
 As described in the
-[quickstart section on parallelism](mcmc_sampling_intro.Rmd#multi-chain-sampling),
+[quickstart section on parallelism](mcmc_sampling_intro.qmd#multi-chain-sampling),
 the preferred way to run multiple chains is to use the `num_chains` argument.
 
 This will run multiple chains of MCMC from the same executable, which can save
@@ -346,7 +346,7 @@ argument, which defaults to `1`.
 **Note**: Many of these examples can be simplified by using the [`num_chains`
 argument](#sampler-num-chains).
 
-The Quickstart Guide [MCMC Sampling chapter](mcmc_sampling_intro.Rmd) section on multiple chains
+The Quickstart Guide [MCMC Sampling chapter](mcmc_sampling_intro.qmd) section on multiple chains
 also showed how to run multiple chains given a model and data, using the minimal required
 command line options: the method, the name of the data file, and a chain-specific name for the output file.
 
diff --git a/src/cmdstan-guide/mcmc_sampling_intro.Rmd b/src/cmdstan-guide/mcmc_sampling_intro.qmd
similarity index 100%
rename from src/cmdstan-guide/mcmc_sampling_intro.Rmd
rename to src/cmdstan-guide/mcmc_sampling_intro.qmd
diff --git a/src/cmdstan-guide/optimization_intro.Rmd b/src/cmdstan-guide/optimization_intro.qmd
similarity index 100%
rename from src/cmdstan-guide/optimization_intro.Rmd
rename to src/cmdstan-guide/optimization_intro.qmd
diff --git a/src/cmdstan-guide/optimize_config.Rmd b/src/cmdstan-guide/optimize_config.qmd
similarity index 100%
rename from src/cmdstan-guide/optimize_config.Rmd
rename to src/cmdstan-guide/optimize_config.qmd
diff --git a/src/cmdstan-guide/parallelization.Rmd b/src/cmdstan-guide/parallelization.qmd
similarity index 100%
rename from src/cmdstan-guide/parallelization.Rmd
rename to src/cmdstan-guide/parallelization.qmd
diff --git a/src/cmdstan-guide/pathfinder_config.Rmd b/src/cmdstan-guide/pathfinder_config.qmd
similarity index 98%
rename from src/cmdstan-guide/pathfinder_config.Rmd
rename to src/cmdstan-guide/pathfinder_config.qmd
index e05d06be0..2bc41efb8 100644
--- a/src/cmdstan-guide/pathfinder_config.Rmd
+++ b/src/cmdstan-guide/pathfinder_config.qmd
@@ -4,7 +4,7 @@ pagetitle: Pathfinder Configuration
 
 # Pathfinder Method for Approximate Bayesian Inference {#pathfinder-config}
 
-The Pathfinder algorithm is described in section [Pathfinder overview](pathfinder_intro.Rmd).
+The Pathfinder algorithm is described in section [Pathfinder overview](pathfinder_intro.qmd).
 
 The `pathfinder` method runs multi-path Pathfinder by default,
 which returns a PSIS sample over the draws from several individual ("single-path") Pathfinder runs.
@@ -76,7 +76,7 @@ Valid values: $\{0, 1\}$. Default is $1$ (True).
 ## L-BFGS Configuration
 
 Arguments `init_alpha` through `history_size` are the full set of arguments to the L-BFGS optimizer
-and have the same defaults for [optimization](optimize_config.Rmd).
+and have the same defaults for [optimization](optimize_config.qmd).
 
 
 ## Multi-path Pathfinder CSV files{#pathfinder_csv}
@@ -84,7 +84,7 @@ and have the same defaults for [optimization](optimize_config.Rmd).
 By default, the `pathfinder` method uses 4 independent Pathfinder runs,
 each of which produces 1000 approximate draws, which are then
 importance resampled down to 1000 final draws.
-The importance resampled draws are output as a [StanCSV file](stan_csv_apdx.Rmd).
+The importance resampled draws are output as a [StanCSV file](stan_csv_apdx.qmd).
 
 The CSV files have the following structure:
 
diff --git a/src/cmdstan-guide/pathfinder_intro.Rmd b/src/cmdstan-guide/pathfinder_intro.qmd
similarity index 97%
rename from src/cmdstan-guide/pathfinder_intro.Rmd
rename to src/cmdstan-guide/pathfinder_intro.qmd
index c23c322d0..2ac38c89a 100644
--- a/src/cmdstan-guide/pathfinder_intro.Rmd
+++ b/src/cmdstan-guide/pathfinder_intro.qmd
@@ -99,7 +99,7 @@ Path [4] :Best Iter: [3] ELBO (-6.161276) evaluations: (126)
 Total log probability function evaluations:8404
 ```
 
-Pathfinder outputs a [StanCSV file](pathfinder_config.Rmd#pathfinder_csv) file which
+Pathfinder outputs a [StanCSV file](pathfinder_config.qmd#pathfinder_csv) file which
 contains the importance resampled draws from multi-path Pathfinder.
 The initial CSV comment rows contain the complete set of CmdStan
 configuration options.  Next is the column header line, followed the
@@ -125,4 +125,4 @@ The final lines are comment lines which give timing information.
 
 Pathfinder provides option `save_single_paths` which will save output
 from the single-path Pathfinder runs.
-See section [Pathfinder Method](pathfinder_config.Rmd) for details.
+See section [Pathfinder Method](pathfinder_config.qmd) for details.
diff --git a/src/cmdstan-guide/print.Rmd b/src/cmdstan-guide/print.qmd
similarity index 97%
rename from src/cmdstan-guide/print.Rmd
rename to src/cmdstan-guide/print.qmd
index 4f5780ad1..4387f7a5a 100644
--- a/src/cmdstan-guide/print.Rmd
+++ b/src/cmdstan-guide/print.qmd
@@ -5,5 +5,5 @@ pagetitle: Utility print (deprecated)
 # `print` (deprecated): MCMC Output Analysis
 
 The `print` utility is deprecated, but is still available until CmdStan v3.0.
-It has been replaced by the [`stansummary` utility](stansummary.Rmd).
+It has been replaced by the [`stansummary` utility](stansummary.qmd).
 
diff --git a/src/cmdstan-guide/rdump_apdx.Rmd b/src/cmdstan-guide/rdump_apdx.qmd
similarity index 99%
rename from src/cmdstan-guide/rdump_apdx.Rmd
rename to src/cmdstan-guide/rdump_apdx.qmd
index 9add367a3..168a95adb 100644
--- a/src/cmdstan-guide/rdump_apdx.Rmd
+++ b/src/cmdstan-guide/rdump_apdx.qmd
@@ -5,7 +5,7 @@ pagetitle: RDump Format for CmdStan
 # RDump Format for CmdStan {#rdump}
 
 **NOTE:** Although the RDump format is still supported, I/O with JSON is faster
-and recommended. See the [chapter on JSON](json_apdx.Rmd) for more details.
+and recommended. See the [chapter on JSON](json_apdx.qmd) for more details.
 
 RDump format can be used to represent values for Stan variables.
 This format was introduced in SPLUS and is used in R, JAGS,
diff --git a/src/cmdstan-guide/stan_csv_apdx.Rmd b/src/cmdstan-guide/stan_csv_apdx.qmd
similarity index 98%
rename from src/cmdstan-guide/stan_csv_apdx.Rmd
rename to src/cmdstan-guide/stan_csv_apdx.qmd
index 3edc52a53..0dae8c214 100644
--- a/src/cmdstan-guide/stan_csv_apdx.Rmd
+++ b/src/cmdstan-guide/stan_csv_apdx.qmd
@@ -77,7 +77,7 @@ bar:1,bar:2.1,bar:2.2,bar:2.3
 ## MCMC sampler CSV output
 
 The sample method produces both a Stan CSV output file and a
-[diagnostic file](mcmc_config.Rmd#sampler-diag-file)
+[diagnostic file](mcmc_config.qmd#sampler-diag-file)
 which contains the sampler parameters together with the gradients on the unconstrained scale and
 log probabilities for all parameters in the model.
 
@@ -153,14 +153,14 @@ as seen in the initial comment rows:
 ```
 
 Note that when running multi-threaded programs which use `reduce_sum`
-for [high-level parallelization](parallelization.Rmd), the number of threads used
+for [high-level parallelization](parallelization.qmd), the number of threads used
 will also be included in this initial comment header.
 
 
 **Column headers**
 
 The CSV header row lists all sampler parameters, model parameters, transformed parameters, and quantities of interest.
-The sampler parameters are described in detail in the [output file](mcm_sampling_intro.Rmd#mcmc_output_csv) section of the Quickstart Guide chapter on MCMC Sampling.
+The sampler parameters are described in detail in the [output file](mcmc_sampling_intro.qmd#mcmc_output_csv) section of the Quickstart Guide chapter on MCMC Sampling.
 The example model `bernoulli.stan` only contains one parameter `theta`, therefore the CSV file data table
 consists of 7 sampler parameter columns and one column for the model parameter:
 ```
diff --git a/src/cmdstan-guide/stanc.Rmd b/src/cmdstan-guide/stanc.qmd
similarity index 100%
rename from src/cmdstan-guide/stanc.Rmd
rename to src/cmdstan-guide/stanc.qmd
diff --git a/src/cmdstan-guide/stansummary.Rmd b/src/cmdstan-guide/stansummary.qmd
similarity index 100%
rename from src/cmdstan-guide/stansummary.Rmd
rename to src/cmdstan-guide/stansummary.qmd
diff --git a/src/cmdstan-guide/variational_config.Rmd b/src/cmdstan-guide/variational_config.qmd
similarity index 100%
rename from src/cmdstan-guide/variational_config.Rmd
rename to src/cmdstan-guide/variational_config.qmd
diff --git a/src/cmdstan-guide/variational_intro.Rmd b/src/cmdstan-guide/variational_intro.qmd
similarity index 100%
rename from src/cmdstan-guide/variational_intro.Rmd
rename to src/cmdstan-guide/variational_intro.qmd
diff --git a/src/functions-reference/_quarto.yml b/src/functions-reference/_quarto.yml
index 84677c0ef..289b31def 100644
--- a/src/functions-reference/_quarto.yml
+++ b/src/functions-reference/_quarto.yml
@@ -35,43 +35,43 @@ book:
   chapters:
     - index.qmd
     - part: "Built-in Functions"
-      chapters: 
-        - void_functions.Rmd
-        - integer-valued_basic_functions.Rmd
-        - real-valued_basic_functions.Rmd
-        - complex-valued_basic_functions.Rmd
-        - array_operations.Rmd
-        - matrix_operations.Rmd
-        - complex_matrix_operations.Rmd
-        - sparse_matrix_operations.Rmd
-        - mixed_operations.Rmd
-        - compound_arithmetic_and_assignment.Rmd
-        - higher-order_functions.Rmd
-        - deprecated_functions.Rmd
-        - removed_functions.Rmd
-        - conventions_for_probability_functions.Rmd
+      chapters:
+        - void_functions.qmd
+        - integer-valued_basic_functions.qmd
+        - real-valued_basic_functions.qmd
+        - complex-valued_basic_functions.qmd
+        - array_operations.qmd
+        - matrix_operations.qmd
+        - complex_matrix_operations.qmd
+        - sparse_matrix_operations.qmd
+        - mixed_operations.qmd
+        - compound_arithmetic_and_assignment.qmd
+        - higher-order_functions.qmd
+        - deprecated_functions.qmd
+        - removed_functions.qmd
+        - conventions_for_probability_functions.qmd
     - part: "Discrete Distributions"
-      chapters: 
-        - binary_distributions.Rmd
-        - bounded_discrete_distributions.Rmd
-        - unbounded_discrete_distributions.Rmd
-        - multivariate_discrete_distributions.Rmd
+      chapters:
+        - binary_distributions.qmd
+        - bounded_discrete_distributions.qmd
+        - unbounded_discrete_distributions.qmd
+        - multivariate_discrete_distributions.qmd
     - part: "Continuous Distributions"
-      chapters: 
-        - unbounded_continuous_distributions.Rmd
-        - positive_continuous_distributions.Rmd
-        - positive_lower-bounded_distributions.Rmd
-        - continuous_distributions_on_0_1.Rmd
-        - circular_distributions.Rmd
-        - bounded_continuous_distributions.Rmd
-        - distributions_over_unbounded_vectors.Rmd
-        - simplex_distributions.Rmd
-        - correlation_matrix_distributions.Rmd
-        - covariance_matrix_distributions.Rmd
+      chapters:
+        - unbounded_continuous_distributions.qmd
+        - positive_continuous_distributions.qmd
+        - positive_lower-bounded_distributions.qmd
+        - continuous_distributions_on_0_1.qmd
+        - circular_distributions.qmd
+        - bounded_continuous_distributions.qmd
+        - distributions_over_unbounded_vectors.qmd
+        - simplex_distributions.qmd
+        - correlation_matrix_distributions.qmd
+        - covariance_matrix_distributions.qmd
     - part: "Additional Distributions"
-      chapters: 
-        - hidden_markov_models.Rmd
+      chapters:
+        - hidden_markov_models.qmd
     - part: "Appendix"
-      chapters: 
-        - mathematical_functions.Rmd
-    - references.Rmd
+      chapters:
+        - mathematical_functions.qmd
+    - references.qmd
diff --git a/src/functions-reference/array_operations.Rmd b/src/functions-reference/array_operations.qmd
similarity index 100%
rename from src/functions-reference/array_operations.Rmd
rename to src/functions-reference/array_operations.qmd
diff --git a/src/functions-reference/binary_distributions.Rmd b/src/functions-reference/binary_distributions.qmd
similarity index 99%
rename from src/functions-reference/binary_distributions.Rmd
rename to src/functions-reference/binary_distributions.qmd
index efe4a1a4e..af9eb410b 100644
--- a/src/functions-reference/binary_distributions.Rmd
+++ b/src/functions-reference/binary_distributions.qmd
@@ -79,7 +79,7 @@ function of y given chance of success `theta`
 Generate a Bernoulli variate with chance of success `theta`; may only be
 used in transformed data and generated quantities blocks.
 For a description of argument and return types, see section
-[vectorized PRNG functions](conventions_for_probability_functions.Rmd#prng-vectorization).
+[vectorized PRNG functions](conventions_for_probability_functions.qmd#prng-vectorization).
 {{< since 2.18 >}}
 
 ## Bernoulli distribution, logit parameterization {#bernoulli-logit-distribution}
@@ -135,7 +135,7 @@ The log Bernoulli probability mass of y given chance of success
 Generate a Bernoulli variate with chance of success
 $\text{logit}^{-1}(\alpha)$; may only be used in transformed data and generated
 quantities blocks. For a description of argument and return types, see section
-[vectorized PRNG functions](conventions_for_probability_functions.Rmd#prng-vectorization).
+[vectorized PRNG functions](conventions_for_probability_functions.qmd#prng-vectorization).
 {{< since 2.18 >}}
 
 ## Bernoulli-logit generalized linear model (Logistic Regression) {#bernoulli-logit-glm}
diff --git a/src/functions-reference/bounded_continuous_distributions.Rmd b/src/functions-reference/bounded_continuous_distributions.qmd
similarity index 99%
rename from src/functions-reference/bounded_continuous_distributions.Rmd
rename to src/functions-reference/bounded_continuous_distributions.qmd
index 1d48d577f..bdad7b933 100644
--- a/src/functions-reference/bounded_continuous_distributions.Rmd
+++ b/src/functions-reference/bounded_continuous_distributions.qmd
@@ -76,5 +76,5 @@ of y given lower bound alpha and upper bound beta
 Generate a uniform variate with lower bound alpha and upper bound
 beta; may only be used in transformed data and generated quantities blocks. For a
 description of argument and return types, see section
-[vectorized PRNG functions](conventions_for_probability_functions.Rmd#prng-vectorization).
+[vectorized PRNG functions](conventions_for_probability_functions.qmd#prng-vectorization).
 {{< since 2.18 >}}
diff --git a/src/functions-reference/bounded_discrete_distributions.Rmd b/src/functions-reference/bounded_discrete_distributions.qmd
similarity index 99%
rename from src/functions-reference/bounded_discrete_distributions.Rmd
rename to src/functions-reference/bounded_discrete_distributions.qmd
index ec28e2b0d..b99e4730d 100644
--- a/src/functions-reference/bounded_discrete_distributions.Rmd
+++ b/src/functions-reference/bounded_discrete_distributions.qmd
@@ -88,7 +88,7 @@ of n successes in N trials given chance of success theta
 Generate a binomial variate with N trials and chance of success theta;
 may only be used in transformed data and generated quantities blocks.
 For a description of argument and return types, see section
-[vectorized PRNG functions](conventions_for_probability_functions.Rmd#prng-vectorization).
+[vectorized PRNG functions](conventions_for_probability_functions.qmd#prng-vectorization).
 {{< since 2.18 >}}
 
 ## Binomial distribution, logit parameterization
@@ -350,7 +350,7 @@ Generate a beta-binomial variate with N trials, prior success count
 (plus one) of alpha, and prior failure count (plus one) of beta; may
 only be used in transformed data and generated quantities blocks.
 For a description of argument and return types, see section
-[vectorized PRNG functions](conventions_for_probability_functions.Rmd#prng-vectorization).
+[vectorized PRNG functions](conventions_for_probability_functions.qmd#prng-vectorization).
 {{< since 2.18 >}}
 
 ## Hypergeometric distribution
@@ -409,7 +409,7 @@ for $y \in \{1,\ldots,N\}$, \begin{equation*} \text{Categorical}(y~|~\theta) =
 \theta_y. \end{equation*} In addition, Stan provides a log-odds scaled categorical
 distribution, \begin{equation*} \text{CategoricalLogit}(y~|~\beta) =
 \text{Categorical}(y~|~\text{softmax}(\beta)). \end{equation*}
-See [the definition of softmax](matrix_operations.Rmd#softmax) for the definition of the softmax function.
+See [the definition of softmax](matrix_operations.qmd#softmax) for the definition of the softmax function.
 
 ### Sampling statement
 
@@ -513,7 +513,7 @@ If $N,M,K \in \mathbb{N}$, $N,M,K > 0$, and if $x\in \mathbb{R}^{M\times K}, \al
  & =  \prod_{1\leq i \leq M}\text{Categorical}(y_i~|~softmax(\alpha+x_i\cdot\beta)).
 \end{split}
 \end{equation*}
-See [the definition of softmax](matrix_operations.Rmd#softmax) for the definition of the softmax function.
+See [the definition of softmax](matrix_operations.qmd#softmax) for the definition of the softmax function.
 
 ### Sampling statement
 
diff --git a/src/functions-reference/circular_distributions.Rmd b/src/functions-reference/circular_distributions.qmd
similarity index 99%
rename from src/functions-reference/circular_distributions.Rmd
rename to src/functions-reference/circular_distributions.qmd
index 9b322f023..4a089b0cd 100644
--- a/src/functions-reference/circular_distributions.Rmd
+++ b/src/functions-reference/circular_distributions.qmd
@@ -95,7 +95,7 @@ Generate a Von Mises variate with location mu and scale kappa (i.e.
 returns values in the interval $[(\mu \mod 2\pi)-\pi,(\mu \mod
 2\pi)+\pi]$); may only be used in transformed data and generated quantities
 blocks. For a description of argument and return types, see section
-[vectorized PRNG functions](conventions_for_probability_functions.Rmd#prng-vectorization).
+[vectorized PRNG functions](conventions_for_probability_functions.qmd#prng-vectorization).
 {{< since 2.18 >}}
 
 ### Numerical stability
diff --git a/src/functions-reference/complex-valued_basic_functions.Rmd b/src/functions-reference/complex-valued_basic_functions.qmd
similarity index 100%
rename from src/functions-reference/complex-valued_basic_functions.Rmd
rename to src/functions-reference/complex-valued_basic_functions.qmd
diff --git a/src/functions-reference/complex_matrix_operations.Rmd b/src/functions-reference/complex_matrix_operations.qmd
similarity index 100%
rename from src/functions-reference/complex_matrix_operations.Rmd
rename to src/functions-reference/complex_matrix_operations.qmd
diff --git a/src/functions-reference/compound_arithmetic_and_assignment.Rmd b/src/functions-reference/compound_arithmetic_and_assignment.qmd
similarity index 100%
rename from src/functions-reference/compound_arithmetic_and_assignment.Rmd
rename to src/functions-reference/compound_arithmetic_and_assignment.qmd
diff --git a/src/functions-reference/continuous_distributions_on_0_1.Rmd b/src/functions-reference/continuous_distributions_on_0_1.qmd
similarity index 97%
rename from src/functions-reference/continuous_distributions_on_0_1.Rmd
rename to src/functions-reference/continuous_distributions_on_0_1.qmd
index efc747bb5..3f59ba962 100644
--- a/src/functions-reference/continuous_distributions_on_0_1.Rmd
+++ b/src/functions-reference/continuous_distributions_on_0_1.qmd
@@ -16,7 +16,7 @@ If $\alpha \in \mathbb{R}^+$ and $\beta \in \mathbb{R}^+$, then for
 $\theta \in (0,1)$, \begin{equation*} \text{Beta}(\theta|\alpha,\beta) =
 \frac{1}{\mathrm{B}(\alpha,\beta)} \, \theta^{\alpha - 1} \, (1 -
 \theta)^{\beta - 1} , \end{equation*} where the beta function $\mathrm{B}()$ is as
-defined in section [combinatorial functions](real-valued_basic_functions.Rmd#betafun).
+defined in section [combinatorial functions](real-valued_basic_functions.qmd#betafun).
 
 _**Warning:**_  If $\theta = 0$ or $\theta = 1$, then the probability
 is 0 and the log probability is $-\infty$.  Similarly, the
@@ -85,7 +85,7 @@ prior failures (plus one) beta
 Generate a beta variate with positive prior successes (plus one) alpha
 and prior failures (plus one) beta; may only be used in transformed data and
 generated quantities blocks. For a description of argument and return types, see
-section [vectorized PRNG functions](conventions_for_probability_functions.Rmd#prng-vectorization).
+section [vectorized PRNG functions](conventions_for_probability_functions.qmd#prng-vectorization).
 {{< since 2.18 >}}
 
 ## Beta proportion distribution
@@ -97,7 +97,7 @@ If $\mu \in (0, 1)$ and $\kappa \in \mathbb{R}^+$, then for $\theta
 \frac{1}{\mathrm{B}(\mu \kappa, (1 - \mu) \kappa)} \,
 \theta^{\mu\kappa - 1} \, (1 - \theta)^{(1 - \mu)\kappa- 1} , \end{equation*} where
 the beta function $\mathrm{B}()$ is as defined in section
-[combinatorial functions](real-valued_basic_functions.Rmd#betafun).
+[combinatorial functions](real-valued_basic_functions.qmd#betafun).
 
 _**Warning:**_  If $\theta = 0$ or $\theta = 1$, then the probability
 is 0 and the log probability is $-\infty$.  Similarly, the
@@ -154,5 +154,5 @@ function of `theta` in $(0,1)$ given mean mu and precision kappa
 Generate a beta_proportion variate with mean mu and precision kappa;
 may only be used in transformed data and generated quantities blocks.
 For a description of argument and return types, see section
-[vectorized PRNG functions](conventions_for_probability_functions.Rmd#prng-vectorization).
+[vectorized PRNG functions](conventions_for_probability_functions.qmd#prng-vectorization).
 {{< since 2.18 >}}
diff --git a/src/functions-reference/conventions_for_probability_functions.Rmd b/src/functions-reference/conventions_for_probability_functions.qmd
similarity index 99%
rename from src/functions-reference/conventions_for_probability_functions.Rmd
rename to src/functions-reference/conventions_for_probability_functions.qmd
index 1ec3172b4..8719489e0 100644
--- a/src/functions-reference/conventions_for_probability_functions.Rmd
+++ b/src/functions-reference/conventions_for_probability_functions.qmd
@@ -143,7 +143,7 @@ Stan's univariate log probability functions, including the log density
 functions, log mass functions, log CDFs, and log CCDFs, all support
 vectorized function application, with results defined to be the sum of
 the elementwise application of the function.  Some of the PRNG
-functions support vectorization, see section [vectorized PRNG functions](conventions_for_probability_functions.Rmd#prng-vectorization)
+functions support vectorization, see section [vectorized PRNG functions](conventions_for_probability_functions.qmd#prng-vectorization)
 for more details.
 
 In all cases, matrix operations are at least as fast and usually
diff --git a/src/functions-reference/correlation_matrix_distributions.Rmd b/src/functions-reference/correlation_matrix_distributions.qmd
similarity index 100%
rename from src/functions-reference/correlation_matrix_distributions.Rmd
rename to src/functions-reference/correlation_matrix_distributions.qmd
diff --git a/src/functions-reference/covariance_matrix_distributions.Rmd b/src/functions-reference/covariance_matrix_distributions.qmd
similarity index 100%
rename from src/functions-reference/covariance_matrix_distributions.Rmd
rename to src/functions-reference/covariance_matrix_distributions.qmd
diff --git a/src/functions-reference/deprecated_functions.Rmd b/src/functions-reference/deprecated_functions.qmd
similarity index 99%
rename from src/functions-reference/deprecated_functions.Rmd
rename to src/functions-reference/deprecated_functions.qmd
index 91825c446..1f9375584 100644
--- a/src/functions-reference/deprecated_functions.Rmd
+++ b/src/functions-reference/deprecated_functions.qmd
@@ -34,7 +34,7 @@ $$ 1.0 / 2.0 = 0.5 $$
 ## integrate_ode_rk45, integrate_ode_adams, integrate_ode_bdf ODE Integrators {#functions-old-ode-solver}
 
 These ODE integrator functions have been replaced by those described in
-[Ordinary Differential Equation (ODE) Solvers](higher-order_functions.Rmd#functions-ode-solver).
+[Ordinary Differential Equation (ODE) Solvers](higher-order_functions.qmd#functions-ode-solver).
 
 ### Specifying an ordinary differential equation as a function
 
@@ -191,7 +191,7 @@ be passed to the system function
 ## algebra_solver, algebra_solver_newton algebraic solvers {#functions-old-algebra-solver}
 
 These algebraic solver functions have been replaced by those described in
-[Algebraic Equation Solvers](higher-order_functions.Rmd#functions-algebraic-solver)..
+[Algebraic Equation Solvers](higher-order_functions.qmd#functions-algebraic-solver)..
 
 ### Specifying an algebraic equation as a function
 
diff --git a/src/functions-reference/distributions_over_unbounded_vectors.Rmd b/src/functions-reference/distributions_over_unbounded_vectors.qmd
similarity index 99%
rename from src/functions-reference/distributions_over_unbounded_vectors.Rmd
rename to src/functions-reference/distributions_over_unbounded_vectors.qmd
index 92b0ac132..6488e2e80 100644
--- a/src/functions-reference/distributions_over_unbounded_vectors.Rmd
+++ b/src/functions-reference/distributions_over_unbounded_vectors.qmd
@@ -39,7 +39,7 @@ The multivariate normal probability function is overloaded to allow
 the variate vector $y$ and location vector $\mu$ to be vectors or row
 vectors (or to mix the two types).  The density function is also
 vectorized, so it allows arrays of row vectors or vectors as
-arguments; see section [vectorized function signatures](conventions_for_probability_functions.Rmd#prob-vectorization) for a description of
+arguments; see section [vectorized function signatures](conventions_for_probability_functions.qmd#prob-vectorization) for a description of
 vectorization.
 
 <!-- real; multi_normal_lpdf; (vectors y | vectors mu, matrix Sigma); -->
@@ -627,7 +627,7 @@ The log of the multivariate Student-$t$ density of vector or array of
 vectors `y` given
 degrees of freedom `nu`, location vector or array of vectors `mu`, and Cholesky factor of the scale matrix `L`.
 For a definition of the arguments compatible with the `vectors` type,
-see the [probability vectorization section](conventions_for_probability_functions.Rmd#prob-vectorization).
+see the [probability vectorization section](conventions_for_probability_functions.qmd#prob-vectorization).
 {{< since 2.30 >}}
 
 
@@ -640,7 +640,7 @@ array `y` given
 degrees of freedom `nu`, location vector or vector array `mu`, and Cholesky factor of the scale matrix `L`,
 dropping constant additive terms.  For a definition of arguments
 compatible with the `vectors` type, see the [probability vectorization
-section](conventions_for_probability_functions.Rmd#prob-vectorization).
+section](conventions_for_probability_functions.qmd#prob-vectorization).
 {{< since 2.30 >}}
 
 
diff --git a/src/functions-reference/hidden_markov_models.Rmd b/src/functions-reference/hidden_markov_models.qmd
similarity index 100%
rename from src/functions-reference/hidden_markov_models.Rmd
rename to src/functions-reference/hidden_markov_models.qmd
diff --git a/src/functions-reference/higher-order_functions.Rmd b/src/functions-reference/higher-order_functions.qmd
similarity index 100%
rename from src/functions-reference/higher-order_functions.Rmd
rename to src/functions-reference/higher-order_functions.qmd
diff --git a/src/functions-reference/integer-valued_basic_functions.Rmd b/src/functions-reference/integer-valued_basic_functions.qmd
similarity index 99%
rename from src/functions-reference/integer-valued_basic_functions.Rmd
rename to src/functions-reference/integer-valued_basic_functions.qmd
index 3be25f231..fb55bca13 100644
--- a/src/functions-reference/integer-valued_basic_functions.Rmd
+++ b/src/functions-reference/integer-valued_basic_functions.qmd
@@ -166,7 +166,7 @@ Return the step function of x as an integer, \begin{equation*} \mathrm{int\_step
 or } x \text{ is } NaN \end{cases} \end{equation*} _**Warning:**_ `int_step(0)` and
 `int_step(NaN)` return 0 whereas `step(0)` and `step(NaN)` return 1.
 
-See the warning in section [step functions](real-valued_basic_functions.Rmd#step-functions) about the dangers of
+See the warning in section [step functions](real-valued_basic_functions.qmd#step-functions) about the dangers of
 step functions applied to anything other than data.
 {{< since 2.0 >}}
 
diff --git a/src/functions-reference/mathematical_functions.Rmd b/src/functions-reference/mathematical_functions.qmd
similarity index 100%
rename from src/functions-reference/mathematical_functions.Rmd
rename to src/functions-reference/mathematical_functions.qmd
diff --git a/src/functions-reference/matrix_operations.Rmd b/src/functions-reference/matrix_operations.qmd
similarity index 99%
rename from src/functions-reference/matrix_operations.Rmd
rename to src/functions-reference/matrix_operations.qmd
index 7be7f27e3..2f6bc2a4a 100644
--- a/src/functions-reference/matrix_operations.Rmd
+++ b/src/functions-reference/matrix_operations.qmd
@@ -392,7 +392,7 @@ Elementwise functions apply a function to each element of a vector or
 matrix, returning a result of the same shape as the argument.  There
 are many functions that are vectorized in addition to the ad hoc cases
 listed in this section; see section
-[function vectorization](real-valued_basic_functions.Rmd#fun-vectorization)
+[function vectorization](real-valued_basic_functions.qmd#fun-vectorization)
 for the general cases.
 
 <!-- vector; operator.*; (vector x, vector y); -->
@@ -918,14 +918,14 @@ The product of the values in x, or 1 if x is empty
 ### Sample moments
 
 Full definitions are provided for sample moments in section
-[array reductions](array_operations.Rmd#array-reductions).
+[array reductions](array_operations.qmd#array-reductions).
 
 <!-- real; mean; (vector x); -->
 \index{{\tt \bfseries mean }!{\tt (vector x): real}|hyperpage}
 
 `real` **`mean`**`(vector x)`<br>\newline
 The sample mean of the values in x; see section
-[array reductions](array_operations.Rmd#array-reductions) for details.
+[array reductions](array_operations.qmd#array-reductions) for details.
 {{< since 2.0 >}}
 
 <!-- real; mean; (row_vector x); -->
@@ -933,7 +933,7 @@ The sample mean of the values in x; see section
 
 `real` **`mean`**`(row_vector x)`<br>\newline
 The sample mean of the values in x; see section
-[array reductions](array_operations.Rmd#array-reductions) for details.
+[array reductions](array_operations.qmd#array-reductions) for details.
 {{< since 2.0 >}}
 
 <!-- real; mean; (matrix x); -->
@@ -941,7 +941,7 @@ The sample mean of the values in x; see section
 
 `real` **`mean`**`(matrix x)`<br>\newline
 The sample mean of the values in x; see section
-[array reductions](array_operations.Rmd#array-reductions) for details.
+[array reductions](array_operations.qmd#array-reductions) for details.
 {{< since 2.0 >}}
 
 <!-- real; variance; (vector x); -->
@@ -949,7 +949,7 @@ The sample mean of the values in x; see section
 
 `real` **`variance`**`(vector x)`<br>\newline
 The sample variance of the values in x; see section
-[array reductions](array_operations.Rmd#array-reductions) for details.
+[array reductions](array_operations.qmd#array-reductions) for details.
 {{< since 2.0 >}}
 
 <!-- real; variance; (row_vector x); -->
@@ -957,7 +957,7 @@ The sample variance of the values in x; see section
 
 `real` **`variance`**`(row_vector x)`<br>\newline
 The sample variance of the values in x; see section
-[array reductions](array_operations.Rmd#array-reductions) for details.
+[array reductions](array_operations.qmd#array-reductions) for details.
 {{< since 2.0 >}}
 
 <!-- real; variance; (matrix x); -->
@@ -965,7 +965,7 @@ The sample variance of the values in x; see section
 
 `real` **`variance`**`(matrix x)`<br>\newline
 The sample variance of the values in x; see section
-[array reductions](array_operations.Rmd#array-reductions) for details.
+[array reductions](array_operations.qmd#array-reductions) for details.
 {{< since 2.0 >}}
 
 <!-- real; sd; (vector x); -->
@@ -973,7 +973,7 @@ The sample variance of the values in x; see section
 
 `real` **`sd`**`(vector x)`<br>\newline
 The sample standard deviation of the values in x; see section
-[array reductions](array_operations.Rmd#array-reductions) for details.
+[array reductions](array_operations.qmd#array-reductions) for details.
 {{< since 2.0 >}}
 
 <!-- real; sd; (row_vector x); -->
@@ -981,7 +981,7 @@ The sample standard deviation of the values in x; see section
 
 `real` **`sd`**`(row_vector x)`<br>\newline
 The sample standard deviation of the values in x; see section
-[array reductions](array_operations.Rmd#array-reductions) for details.
+[array reductions](array_operations.qmd#array-reductions) for details.
 {{< since 2.0 >}}
 
 <!-- real; sd; (matrix x); -->
@@ -989,7 +989,7 @@ The sample standard deviation of the values in x; see section
 
 `real` **`sd`**`(matrix x)`<br>\newline
 The sample standard deviation of the values in x; see section
-[array reductions](array_operations.Rmd#array-reductions) for details.
+[array reductions](array_operations.qmd#array-reductions) for details.
 {{< since 2.0 >}}
 
 ### Quantile
@@ -1074,7 +1074,7 @@ of size n.
 {{< since 2.0 >}}
 
 Unlike the situation with array broadcasting (see section
-[array broadcasting](array_operations.Rmd#array-broadcasting)), where there is a distinction between
+[array broadcasting](array_operations.qmd#array-broadcasting)), where there is a distinction between
 integer and real arguments, the following two statements produce the
 same result for vector broadcasting;  row vector and matrix
 broadcasting behave similarly.
@@ -2394,7 +2394,7 @@ components.
 
 ## Sort functions
 
-See the [sorting functions section](array_operations.Rmd#sorting-functions)
+See the [sorting functions section](array_operations.qmd#sorting-functions)
 for examples of how the functions work.
 
 <!-- vector; sort_asc; (vector v); -->
diff --git a/src/functions-reference/mixed_operations.Rmd b/src/functions-reference/mixed_operations.qmd
similarity index 100%
rename from src/functions-reference/mixed_operations.Rmd
rename to src/functions-reference/mixed_operations.qmd
diff --git a/src/functions-reference/multivariate_discrete_distributions.Rmd b/src/functions-reference/multivariate_discrete_distributions.qmd
similarity index 100%
rename from src/functions-reference/multivariate_discrete_distributions.Rmd
rename to src/functions-reference/multivariate_discrete_distributions.qmd
diff --git a/src/functions-reference/positive_continuous_distributions.Rmd b/src/functions-reference/positive_continuous_distributions.qmd
similarity index 98%
rename from src/functions-reference/positive_continuous_distributions.Rmd
rename to src/functions-reference/positive_continuous_distributions.qmd
index 5b7bd808f..5405810c0 100644
--- a/src/functions-reference/positive_continuous_distributions.Rmd
+++ b/src/functions-reference/positive_continuous_distributions.qmd
@@ -77,7 +77,7 @@ function of y given location mu and scale sigma
 Generate a lognormal variate with location mu and scale sigma; may
 only be used in transformed data and generated quantities blocks.
 For a description of argument and return types, see section
-[vectorized PRNG functions](conventions_for_probability_functions.Rmd#prng-vectorization).
+[vectorized PRNG functions](conventions_for_probability_functions.qmd#prng-vectorization).
 {{< since 2.22 >}}
 
 ## Chi-square distribution
@@ -145,7 +145,7 @@ function of y given degrees of freedom nu
 Generate a Chi-square variate with degrees of freedom nu; may only be
 used in transformed data and generated quantities blocks.
 For a description of argument and return types, see section
-[vectorized PRNG functions](conventions_for_probability_functions.Rmd#prng-vectorization).
+[vectorized PRNG functions](conventions_for_probability_functions.qmd#prng-vectorization).
 {{< since 2.18 >}}
 
 ## Inverse chi-square distribution
@@ -215,7 +215,7 @@ distribution function of y given degrees of freedom nu
 Generate an inverse Chi-squared variate with degrees of freedom nu;
 may only be used in transformed data and generated quantities blocks.
 For a description of argument and return types, see section
-[vectorized PRNG functions](conventions_for_probability_functions.Rmd#prng-vectorization).
+[vectorized PRNG functions](conventions_for_probability_functions.qmd#prng-vectorization).
 {{< since 2.18 >}}
 
 ## Scaled inverse chi-square distribution
@@ -286,7 +286,7 @@ distribution function of y given degrees of freedom nu and scale sigma
 Generate a scaled inverse Chi-squared variate with degrees of freedom
 nu and scale sigma; may only be used in transformed data and generated
 quantities blocks. For a description of argument and return types, see section
-[vectorized PRNG functions](conventions_for_probability_functions.Rmd#prng-vectorization).
+[vectorized PRNG functions](conventions_for_probability_functions.qmd#prng-vectorization).
 {{< since 2.18 >}}
 
 ## Exponential distribution {#exponential-distribution}
@@ -353,7 +353,7 @@ function of y given inverse scale beta
 Generate an exponential variate with inverse scale beta; may only be
 used in transformed data and generated quantities blocks.
 For a description of argument and return types, see section
-[vectorized PRNG functions](conventions_for_probability_functions.Rmd#prng-vectorization).
+[vectorized PRNG functions](conventions_for_probability_functions.qmd#prng-vectorization).
 {{< since 2.18 >}}
 
 ## Gamma distribution
@@ -423,7 +423,7 @@ y given shape alpha and inverse scale beta
 Generate a gamma variate with shape alpha and inverse scale beta; may
 only be used in transformed data and generated quantities blocks.
 For a description of argument and return types, see section
-[vectorized PRNG functions](conventions_for_probability_functions.Rmd#prng-vectorization).
+[vectorized PRNG functions](conventions_for_probability_functions.qmd#prng-vectorization).
 {{< since 2.18 >}}
 
 ## Inverse gamma Distribution
@@ -493,7 +493,7 @@ function of y given shape alpha and scale beta
 Generate an inverse gamma variate with shape alpha and scale beta; may
 only be used in transformed data and generated quantities blocks.
 For a description of argument and return types, see section
-[vectorized PRNG functions](conventions_for_probability_functions.Rmd#prng-vectorization).
+[vectorized PRNG functions](conventions_for_probability_functions.qmd#prng-vectorization).
 {{< since 2.18 >}}
 
 ## Weibull distribution
@@ -566,7 +566,7 @@ of y given shape alpha and scale sigma
 Generate a weibull variate with shape alpha and scale sigma; may only
 be used in transformed data and generated quantities blocks.
 For a description of argument and return types, see section
-[vectorized PRNG functions](conventions_for_probability_functions.Rmd#prng-vectorization).
+[vectorized PRNG functions](conventions_for_probability_functions.qmd#prng-vectorization).
 {{< since 2.18 >}}
 
 ## Frechet distribution
@@ -639,7 +639,7 @@ of y given shape alpha and scale sigma
 Generate a Frechet variate with shape alpha and scale sigma; may only
 be used in transformed data and generated quantities blocks.
 For a description of argument and return types, see section
-[vectorized PRNG functions](conventions_for_probability_functions.Rmd#prng-vectorization).
+[vectorized PRNG functions](conventions_for_probability_functions.qmd#prng-vectorization).
 {{< since 2.18 >}}
 
 ## Rayleigh distribution
@@ -704,7 +704,7 @@ given scale sigma
 `R` **`rayleigh_rng`**`(reals sigma)`<br>\newline
 Generate a Rayleigh variate with scale sigma; may only be used in
 generated quantities block. For a description of argument and return
-types, see section [vectorized PRNG functions](conventions_for_probability_functions.Rmd#prng-vectorization).
+types, see section [vectorized PRNG functions](conventions_for_probability_functions.qmd#prng-vectorization).
 {{< since 2.18 >}}
 
 
@@ -752,5 +752,5 @@ The log-logistic cumulative distribution function of y given scale alpha and sha
 Generate a log-logistic variate with scale alpha and shape beta; may only
 be used in transformed data and generated quantities blocks.
 For a description of argument and return types, see section
-[vectorized PRNG functions](conventions_for_probability_functions.Rmd#prng-vectorization).
+[vectorized PRNG functions](conventions_for_probability_functions.qmd#prng-vectorization).
 {{< since 2.29 >}}
diff --git a/src/functions-reference/positive_lower-bounded_distributions.Rmd b/src/functions-reference/positive_lower-bounded_distributions.qmd
similarity index 99%
rename from src/functions-reference/positive_lower-bounded_distributions.Rmd
rename to src/functions-reference/positive_lower-bounded_distributions.qmd
index e2812a255..155be7370 100644
--- a/src/functions-reference/positive_lower-bounded_distributions.Rmd
+++ b/src/functions-reference/positive_lower-bounded_distributions.qmd
@@ -77,7 +77,7 @@ of y given positive minimum value y_min and shape alpha
 Generate a Pareto variate with positive minimum value y_min and shape
 alpha; may only be used in transformed data and generated quantities blocks. For a
 description of argument and return types, see section
-[vectorized PRNG functions](conventions_for_probability_functions.Rmd#prng-vectorization).
+[vectorized PRNG functions](conventions_for_probability_functions.qmd#prng-vectorization).
 {{< since 2.18 >}}
 
 ## Pareto type 2 distribution
@@ -152,7 +152,7 @@ function of y given location mu, scale lambda, and shape alpha
 Generate a Pareto Type 2 variate with location mu, scale lambda, and
 shape alpha; may only be used in transformed data and generated quantities blocks.
 For a description of argument and return types, see section
-[vectorized PRNG functions](conventions_for_probability_functions.Rmd#prng-vectorization).
+[vectorized PRNG functions](conventions_for_probability_functions.qmd#prng-vectorization).
 {{< since 2.18 >}}
 
 ## Wiener First Passage Time Distribution
diff --git a/src/functions-reference/real-valued_basic_functions.Rmd b/src/functions-reference/real-valued_basic_functions.qmd
similarity index 99%
rename from src/functions-reference/real-valued_basic_functions.Rmd
rename to src/functions-reference/real-valued_basic_functions.qmd
index ae3965c87..845f6f256 100644
--- a/src/functions-reference/real-valued_basic_functions.Rmd
+++ b/src/functions-reference/real-valued_basic_functions.qmd
@@ -1059,7 +1059,7 @@ The inverse of the complementary log-log function applied to x
 
 The error function `erf` is related to the standard normal cumulative
 distribution function $\Phi$ by scaling.  See section
-[normal distribution](unbounded_continuous_distributions.Rmd#normal-distribution) for the general normal cumulative
+[normal distribution](unbounded_continuous_distributions.qmd#normal-distribution) for the general normal cumulative
 distribution function (and its complement).
 
 <!-- R; erf; (T x); -->
@@ -1155,7 +1155,7 @@ Vectorized implementation of the `owens_t` function
 Return the beta function applied to alpha and beta. The beta function,
 $\text{B}(\alpha,\beta)$, computes the normalizing constant for the beta
 distribution, and is defined for $\alpha > 0$ and $\beta > 0$. See section
-[appendix](mathematical_functions.Rmd#beta-appendix) for definition of $\text{B}(\alpha, \beta)$.
+[appendix](mathematical_functions.qmd#beta-appendix) for definition of $\text{B}(\alpha, \beta)$.
 {{< since 2.25 >}}
 
 <!-- R; beta; (T1 x, T2 y); -->
@@ -1170,7 +1170,7 @@ Vectorized implementation of the `beta` function
 
 `real` **`inc_beta`**`(real alpha, real beta, real x)`<br>\newline
 Return the regularized incomplete beta function up to x applied to alpha and beta.
-See section [appendix](mathematical_functions.Rmd#inc-beta-appendix) for a definition.
+See section [appendix](mathematical_functions.qmd#inc-beta-appendix) for a definition.
 {{< since 2.10 >}}
 
 <!-- real; inv_inc_beta; (real alpha, real beta, real p); -->
@@ -1179,7 +1179,7 @@ See section [appendix](mathematical_functions.Rmd#inc-beta-appendix) for a defin
 `real` **`inv_inc_beta`**`(real alpha, real beta, real p)`<br>\newline
 Return the inverse of the regularized incomplete beta function. The return value
 `x` is the value that solves `p = inc_beta(alpha, beta, x)`.
-See section [appendix](mathematical_functions.Rmd#inc-beta-appendix) for a definition of the `inc_beta`.
+See section [appendix](mathematical_functions.qmd#inc-beta-appendix) for a definition of the `inc_beta`.
 {{< since 2.30 >}}
 
 <!-- real; lbeta; (real alpha, real beta); -->
@@ -1193,7 +1193,7 @@ $\alpha > 0$ and $\beta > 0$.
 \begin{equation*}
 \text{lbeta}(\alpha,\beta) = \log \Gamma(a) + \log \Gamma(b) - \log \Gamma(a+b)
 \end{equation*}
-See section [appendix](mathematical_functions.Rmd#beta-appendix) for definition of $\text{B}(\alpha, \beta)$.
+See section [appendix](mathematical_functions.qmd#beta-appendix) for definition of $\text{B}(\alpha, \beta)$.
 {{< since 2.0 >}}
 
 <!-- R; lbeta; (T1 x, T2 y); -->
diff --git a/src/functions-reference/references.Rmd b/src/functions-reference/references.qmd
similarity index 100%
rename from src/functions-reference/references.Rmd
rename to src/functions-reference/references.qmd
diff --git a/src/functions-reference/removed_functions.Rmd b/src/functions-reference/removed_functions.qmd
similarity index 98%
rename from src/functions-reference/removed_functions.Rmd
rename to src/functions-reference/removed_functions.qmd
index 0c7a22c06..50ec064fc 100644
--- a/src/functions-reference/removed_functions.Rmd
+++ b/src/functions-reference/removed_functions.qmd
@@ -42,7 +42,7 @@ to Stan's type promotion rules.
 
 ## Exponentiated quadratic covariance functions {#cov_exp_quad}
 
-These covariance functions have been replaced by those described in [Gaussian Process Covariance Functions](matrix_operations.Rmd#gaussian-process-covariance-functions)
+These covariance functions have been replaced by those described in [Gaussian Process Covariance Functions](matrix_operations.qmd#gaussian-process-covariance-functions)
 
 With magnitude $\alpha$ and length scale $l$, the exponentiated quadratic kernel is:
 
diff --git a/src/functions-reference/simplex_distributions.Rmd b/src/functions-reference/simplex_distributions.qmd
similarity index 98%
rename from src/functions-reference/simplex_distributions.Rmd
rename to src/functions-reference/simplex_distributions.qmd
index 90ac949c2..41acd92a2 100644
--- a/src/functions-reference/simplex_distributions.Rmd
+++ b/src/functions-reference/simplex_distributions.qmd
@@ -107,7 +107,7 @@ The Dirichlet probability functions are overloaded to allow
 the simplex $\theta$ and prior counts (plus one) $\alpha$ to be vectors or
 row vectors (or to mix the two types).  The density functions are also
 vectorized, so they allow arrays of row vectors or vectors as
-arguments; see section [vectorized function signatures](conventions_for_probability_functions.Rmd#prob-vectorization) for a description of
+arguments; see section [vectorized function signatures](conventions_for_probability_functions.qmd#prob-vectorization) for a description of
 vectorization.
 
 
diff --git a/src/functions-reference/sparse_matrix_operations.Rmd b/src/functions-reference/sparse_matrix_operations.qmd
similarity index 100%
rename from src/functions-reference/sparse_matrix_operations.Rmd
rename to src/functions-reference/sparse_matrix_operations.qmd
diff --git a/src/functions-reference/unbounded_continuous_distributions.Rmd b/src/functions-reference/unbounded_continuous_distributions.qmd
similarity index 99%
rename from src/functions-reference/unbounded_continuous_distributions.Rmd
rename to src/functions-reference/unbounded_continuous_distributions.qmd
index 327864e4a..7e3df3fb1 100644
--- a/src/functions-reference/unbounded_continuous_distributions.Rmd
+++ b/src/functions-reference/unbounded_continuous_distributions.qmd
@@ -89,7 +89,7 @@ other than a standard normal.
 Generate a normal variate with location mu and scale sigma; may only
 be used in transformed data and generated quantities blocks.
 For a description of argument and return types, see section
-[vectorized PRNG functions](conventions_for_probability_functions.Rmd#prng-vectorization).
+[vectorized PRNG functions](conventions_for_probability_functions.qmd#prng-vectorization).
 {{< since 2.18 >}}
 
 ### Standard normal distribution
@@ -448,7 +448,7 @@ lambda
 Generate a exponentially modified normal variate with location mu,
 scale sigma, and shape lambda; may only be used in transformed data and generated
 quantities blocks. For a description of argument and return types, see
-section [vectorized PRNG functions](conventions_for_probability_functions.Rmd#prng-vectorization).
+section [vectorized PRNG functions](conventions_for_probability_functions.qmd#prng-vectorization).
 {{< since 2.18 >}}
 
 ## Skew normal distribution
@@ -520,7 +520,7 @@ function of y given location xi, scale omega, and shape alpha
 Generate a skew normal variate with location xi, scale omega, and
 shape alpha; may only be used in transformed data and generated quantities blocks.
 For a description of argument and return types, see section
-[vectorized PRNG functions](conventions_for_probability_functions.Rmd#prng-vectorization).
+[vectorized PRNG functions](conventions_for_probability_functions.qmd#prng-vectorization).
 {{< since 2.18 >}}
 
 ## Student-t distribution
@@ -593,7 +593,7 @@ sigma
 Generate a Student-$t$ variate with degrees of freedom nu, location
 mu, and scale sigma; may only be used in transformed data and generated
 quantities blocks. For a description of argument and return types, see section
-[vectorized PRNG functions](conventions_for_probability_functions.Rmd#prng-vectorization).
+[vectorized PRNG functions](conventions_for_probability_functions.qmd#prng-vectorization).
 {{< since 2.18 >}}
 
 ## Cauchy distribution
@@ -662,7 +662,7 @@ of y given location mu and scale sigma
 Generate a Cauchy variate with location mu and scale sigma; may only
 be used in transformed data and generated quantities blocks.
 For a description of argument and return types, see section
-[vectorized PRNG functions](conventions_for_probability_functions.Rmd#prng-vectorization).
+[vectorized PRNG functions](conventions_for_probability_functions.qmd#prng-vectorization).
 {{< since 2.18 >}}
 
 ## Double exponential (Laplace) distribution
@@ -674,7 +674,7 @@ If $\mu \in \mathbb{R}$ and $\sigma \in \mathbb{R}^+$, then for $y \in
 \frac{1}{2\sigma}   \exp \left( - \, \frac{|y - \mu|}{\sigma} \right)
 . \end{equation*} Note that the double exponential distribution is parameterized in
 terms of the scale, in contrast to the exponential distribution (see
-section [exponential distribution](positive_continuous_distributions.Rmd#exponential-distribution)), which is
+section [exponential distribution](positive_continuous_distributions.qmd#exponential-distribution)), which is
 parameterized in terms of inverse scale.
 
 The double-exponential distribution can be defined as a compound
@@ -747,7 +747,7 @@ distribution function of y given location mu and scale sigma
 Generate a double exponential variate with location mu and scale
 sigma; may only be used in transformed data and generated quantities blocks. For a
 description of argument and return types, see section
-[vectorized PRNG functions](conventions_for_probability_functions.Rmd#prng-vectorization).
+[vectorized PRNG functions](conventions_for_probability_functions.qmd#prng-vectorization).
 {{< since 2.18 >}}
 
 ## Logistic distribution
@@ -817,7 +817,7 @@ of y given location mu and scale sigma
 Generate a logistic variate with location mu and scale sigma; may only
 be used in transformed data and generated quantities blocks.
 For a description of argument and return types, see section
-[vectorized PRNG functions](conventions_for_probability_functions.Rmd#prng-vectorization).
+[vectorized PRNG functions](conventions_for_probability_functions.qmd#prng-vectorization).
 {{< since 2.18 >}}
 
 ## Gumbel distribution
@@ -886,7 +886,7 @@ of y given location mu and scale beta
 Generate a gumbel variate with location mu and scale beta; may only be
 used in transformed data and generated quantities blocks. For a description
 of argument and return types, see section
-[vectorized PRNG functions](conventions_for_probability_functions.Rmd#prng-vectorization).
+[vectorized PRNG functions](conventions_for_probability_functions.qmd#prng-vectorization).
 {{< since 2.18 >}}
 
 ## Skew double exponential distribution
@@ -957,5 +957,5 @@ distribution function of y given location mu, scale sigma and skewness tau
 Generate a skew double exponential variate with location mu, scale
 sigma and skewness tau; may only be used in transformed data and generated quantities blocks. For a
 description of argument and return types, see section
-[vectorized PRNG functions](conventions_for_probability_functions.Rmd#prng-vectorization).
+[vectorized PRNG functions](conventions_for_probability_functions.qmd#prng-vectorization).
 {{< since 2.28 >}}
diff --git a/src/functions-reference/unbounded_discrete_distributions.Rmd b/src/functions-reference/unbounded_discrete_distributions.qmd
similarity index 99%
rename from src/functions-reference/unbounded_discrete_distributions.Rmd
rename to src/functions-reference/unbounded_discrete_distributions.qmd
index 5a96984b1..425e25e65 100644
--- a/src/functions-reference/unbounded_discrete_distributions.Rmd
+++ b/src/functions-reference/unbounded_discrete_distributions.qmd
@@ -85,7 +85,7 @@ function of `n` given shape `alpha` and inverse scale `beta`
 Generate a negative binomial variate with shape `alpha` and inverse
 scale `beta`; may only be used in transformed data and generated quantities blocks.
 `alpha` $/$ `beta` must be less than $2 ^ {29}$. For a description of argument and
-return types, see section [vectorized function signatures](conventions_for_probability_functions.Rmd#prob-vectorization).
+return types, see section [vectorized function signatures](conventions_for_probability_functions.qmd#prob-vectorization).
 {{< since 2.18 >}}
 
 ## Negative binomial distribution (alternative parameterization) {#nbalt}
@@ -93,7 +93,7 @@ return types, see section [vectorized function signatures](conventions_for_proba
 Stan also provides an alternative parameterization of the negative
 binomial distribution directly using a mean (i.e., location) parameter
 and a parameter that controls overdispersion relative to the square of
-the mean.  Section [combinatorial functions](real-valued_basic_functions.Rmd#betafun), below, provides a second
+the mean.  Section [combinatorial functions](real-valued_basic_functions.qmd#betafun), below, provides a second
 alternative parameterization directly in terms of the log mean.
 
 ### Probability mass function
@@ -173,7 +173,7 @@ function of `n` given location `mu` and precision `phi`.
 Generate a negative binomial variate with location `mu` and precision
 `phi`; may only be used in transformed data and generated quantities blocks. `mu`
 must be less than $2 ^ {29}$. For a description of argument and return types, see
-section [vectorized function signatures](conventions_for_probability_functions.Rmd#prob-vectorization).
+section [vectorized function signatures](conventions_for_probability_functions.qmd#prob-vectorization).
 {{< since 2.18 >}}
 
 ## Negative binomial distribution (log alternative parameterization) {#neg-binom-2-log}
@@ -221,7 +221,7 @@ and inverse overdispersion parameter `phi` dropping constant additive terms.
 Generate a negative binomial variate with log-location `eta` and inverse
 overdispersion control `phi`; may only be used in transformed data and generated
 quantities blocks. `eta` must be less than $29 \log 2$. For a description of
-argument and return types, see section [vectorized function signatures](conventions_for_probability_functions.Rmd#prob-vectorization).
+argument and return types, see section [vectorized function signatures](conventions_for_probability_functions.qmd#prob-vectorization).
 {{< since 2.18 >}}
 
 ## Negative-binomial-2-log generalized linear model (negative binomial regression) {#neg-binom-2-log-glm}
@@ -418,7 +418,7 @@ of n given rate lambda
 Generate a Poisson variate with rate lambda; may only be used in
 transformed data and generated quantities blocks. lambda must be less than
 $2^{30}$. For a description of argument and return types, see section
-[vectorized function signatures](conventions_for_probability_functions.Rmd#prob-vectorization).
+[vectorized function signatures](conventions_for_probability_functions.qmd#prob-vectorization).
 {{< since 2.18 >}}
 
 ## Poisson distribution, log parameterization
@@ -468,7 +468,7 @@ additive terms
 Generate a Poisson variate with log rate alpha; may only be used in
 transformed data and generated quantities blocks. alpha must be less than
 $30 \log 2$. For a description of argument and return types, see section
-[vectorized function signatures](conventions_for_probability_functions.Rmd#prob-vectorization).
+[vectorized function signatures](conventions_for_probability_functions.qmd#prob-vectorization).
 {{< since 2.18 >}}
 
 ## Poisson-log generalized linear model (Poisson regression) {#poisson-log-glm}
diff --git a/src/functions-reference/void_functions.Rmd b/src/functions-reference/void_functions.qmd
similarity index 100%
rename from src/functions-reference/void_functions.Rmd
rename to src/functions-reference/void_functions.qmd
diff --git a/src/reference-manual/_quarto.yml b/src/reference-manual/_quarto.yml
index c2cfa8995..3beeffc26 100644
--- a/src/reference-manual/_quarto.yml
+++ b/src/reference-manual/_quarto.yml
@@ -36,34 +36,34 @@ book:
     - index.qmd
     - part: "Language"
       chapters:
-        - encoding.Rmd
-        - includes.Rmd
-        - comments.Rmd
-        - whitespace.Rmd
-        - types.Rmd
-        - expressions.Rmd
-        - statements.Rmd
-        - blocks.Rmd
-        - user-functions.Rmd
-        - transforms.Rmd
-        - syntax.Rmd
-        - execution.Rmd
-        - deprecations.Rmd
-        - removals.Rmd
+        - encoding.qmd
+        - includes.qmd
+        - comments.qmd
+        - whitespace.qmd
+        - types.qmd
+        - expressions.qmd
+        - statements.qmd
+        - blocks.qmd
+        - user-functions.qmd
+        - transforms.qmd
+        - syntax.qmd
+        - execution.qmd
+        - deprecations.qmd
+        - removals.qmd
 
     - part: "Algorithms"
       chapters:
-        - mcmc.Rmd
-        - analysis.Rmd
-        - optimization.Rmd
-        - pathfinder.Rmd
-        - variational.Rmd
-        - laplace.Rmd
-        - diagnostics.Rmd
+        - mcmc.qmd
+        - analysis.qmd
+        - optimization.qmd
+        - pathfinder.qmd
+        - variational.qmd
+        - laplace.qmd
+        - diagnostics.qmd
 
     - part: "Usage"
       chapters:
-        - reproducibility.Rmd
-        - licenses.Rmd
+        - reproducibility.qmd
+        - licenses.qmd
 
-    - references.Rmd
+    - references.qmd
diff --git a/src/reference-manual/analysis.Rmd b/src/reference-manual/analysis.qmd
similarity index 99%
rename from src/reference-manual/analysis.Rmd
rename to src/reference-manual/analysis.qmd
index 485b2d967..7b06e4d6a 100644
--- a/src/reference-manual/analysis.Rmd
+++ b/src/reference-manual/analysis.qmd
@@ -35,7 +35,7 @@ p(\theta) = p(\theta^{(1)}) \prod_{n=2}^N p(\theta^{(n)}|\theta^{(n-1)}).
 $$
 
 Stan uses Hamiltonian Monte Carlo to generate a next state in a manner
-described in the [Hamiltonian Monte Carlo chapter](mcmc.Rmd).
+described in the [Hamiltonian Monte Carlo chapter](mcmc.qmd).
 
 The Markov chains Stan and other MCMC samplers generate are *ergodic*
 in the sense required by the Markov chain central limit theorem,
@@ -91,7 +91,7 @@ for the finite number of draws actually available.
 
 To establish basic notation, suppose a target Bayesian posterior
 density $p(\theta | y)$ given real-valued vectors of parameters
-$\theta$ and real- and discrete-valued data $y$.^[Using vectors simplifies high level exposition at the expense of collapsing structure.] 
+$\theta$ and real- and discrete-valued data $y$.^[Using vectors simplifies high level exposition at the expense of collapsing structure.]
 
 An MCMC *sample* consists of a set of a sequence of $M$ Markov chains,
 each consisting of an ordered sequence of $N$ *draws* from the
@@ -506,4 +506,4 @@ large.  To summarize, *the only reason to thin a sample is to reduce
 memory requirements*.
 
 If draws are anticorrelated, then thinning will increase correlation
-and reduce the overall effective sample size.
\ No newline at end of file
+and reduce the overall effective sample size.
diff --git a/src/reference-manual/blocks.Rmd b/src/reference-manual/blocks.qmd
similarity index 99%
rename from src/reference-manual/blocks.Rmd
rename to src/reference-manual/blocks.qmd
index 9bb9a0958..8f52cb8e2 100644
--- a/src/reference-manual/blocks.Rmd
+++ b/src/reference-manual/blocks.qmd
@@ -457,7 +457,7 @@ The probability function on the unconstrained parameters is defined in
 such a way that the order of the parameters in the vector corresponds
 to the order of the variables defined in the `parameters` program
 block.  The details of the specific transformations are provided in
-the [variable transforms chapter](transforms.Rmd).
+the [variable transforms chapter](transforms.qmd).
 
 
 ### Gradient calculation {-}
@@ -513,7 +513,7 @@ does not match its constraint is that the current parameter values
 will be rejected.  This can cause Stan's algorithms to hang or
 to devolve to random walks.  It is not intended to be a way to enforce
 ad hoc constraints in Stan programs.
-See the [section on reject statements](statements.Rmd#reject-statements)
+See the [section on reject statements](statements.qmd#reject-statements)
 for further discussion of the behavior of reject statements.
 
 
diff --git a/src/reference-manual/comments.Rmd b/src/reference-manual/comments.qmd
similarity index 100%
rename from src/reference-manual/comments.Rmd
rename to src/reference-manual/comments.qmd
diff --git a/src/reference-manual/deprecations.Rmd b/src/reference-manual/deprecations.qmd
similarity index 100%
rename from src/reference-manual/deprecations.Rmd
rename to src/reference-manual/deprecations.qmd
diff --git a/src/reference-manual/diagnostics.Rmd b/src/reference-manual/diagnostics.qmd
similarity index 100%
rename from src/reference-manual/diagnostics.Rmd
rename to src/reference-manual/diagnostics.qmd
diff --git a/src/reference-manual/encoding.Rmd b/src/reference-manual/encoding.qmd
similarity index 100%
rename from src/reference-manual/encoding.Rmd
rename to src/reference-manual/encoding.qmd
diff --git a/src/reference-manual/execution.Rmd b/src/reference-manual/execution.qmd
similarity index 99%
rename from src/reference-manual/execution.Rmd
rename to src/reference-manual/execution.qmd
index bc236c28b..b7dcde2a4 100644
--- a/src/reference-manual/execution.Rmd
+++ b/src/reference-manual/execution.qmd
@@ -132,7 +132,7 @@ transformed and the below-diagonal values are unconstrained.
 
 The initial values for other parameters can be determined from the
 transform that is applied.  The transforms are all described in full
-detail in the [chapter on variable transforms](transforms.Rmd).
+detail in the [chapter on variable transforms](transforms.qmd).
 
 
 ### Zero initial values {-}
diff --git a/src/reference-manual/expressions.Rmd b/src/reference-manual/expressions.qmd
similarity index 99%
rename from src/reference-manual/expressions.Rmd
rename to src/reference-manual/expressions.qmd
index 7bcaab032..366513bca 100644
--- a/src/reference-manual/expressions.Rmd
+++ b/src/reference-manual/expressions.qmd
@@ -35,7 +35,7 @@ The following list contains well-formed integer literals.
 ```
 
 Integer literals must have values that fall within the bounds for integer values
-(see [the section on numerical data types](types.Rmd#numerical-data-types.section)).
+(see [the section on numerical data types](types.qmd#numerical-data-types.section)).
 
 Integer literals may not contain decimal points (`.`).  Thus the
 expressions `1.` and `1.0` are of type `real` and may
@@ -239,7 +239,7 @@ shells and editors to display them properly.
 ## Container expressions {#vector-matrix-array-expressions.section}
 
 Expressions for the Stan container objects, namely arrays, vectors,
-row vectors, matrices, and tuples, can all be constructed using expressions.  
+row vectors, matrices, and tuples, can all be constructed using expressions.
 
 ### Vector expressions {-}
 
@@ -423,7 +423,7 @@ array[0] int a;   // a is fully defined as zero element array
 There is no way to declare or construct a zero-tuple or one-tuple in
 Stan.  Tuples must be at least two elements long.  The expression `()`
 does not pick out a zero-tuple---it is ill formed.  Similarly, the
-expression `(1)` is of type `int` rather than a tuple.  
+expression `(1)` is of type `int` rather than a tuple.
 
 ## Parentheses for grouping
 
diff --git a/src/reference-manual/includes.Rmd b/src/reference-manual/includes.qmd
similarity index 100%
rename from src/reference-manual/includes.Rmd
rename to src/reference-manual/includes.qmd
diff --git a/src/reference-manual/laplace.Rmd b/src/reference-manual/laplace.qmd
similarity index 100%
rename from src/reference-manual/laplace.Rmd
rename to src/reference-manual/laplace.qmd
diff --git a/src/reference-manual/licenses.Rmd b/src/reference-manual/licenses.qmd
similarity index 100%
rename from src/reference-manual/licenses.Rmd
rename to src/reference-manual/licenses.qmd
diff --git a/src/reference-manual/mcmc.Rmd b/src/reference-manual/mcmc.qmd
similarity index 99%
rename from src/reference-manual/mcmc.Rmd
rename to src/reference-manual/mcmc.qmd
index e1e30dc7c..a22ed551a 100644
--- a/src/reference-manual/mcmc.Rmd
+++ b/src/reference-manual/mcmc.qmd
@@ -2,7 +2,7 @@
 pagetitle: MCMC Sampling
 ---
 
-# MCMC Sampling {#hmc.chapter} 
+# MCMC Sampling {#hmc.chapter}
 
 This chapter presents the two Markov chain Monte Carlo (MCMC)
 algorithms used in Stan, the Hamiltonian Monte Carlo (HMC) algorithm
@@ -574,8 +574,8 @@ could be declared as a parameter and initialized as a parameter.
 
 Stan's interfaces provide a number of configuration options that are
 shared among the MCMC algorithms (this chapter), the [optimization
-algorithms chapter](optimization.Rmd), and the
-[diagnostics chapter](diagnostics.Rmd).
+algorithms chapter](optimization.qmd), and the
+[diagnostics chapter](diagnostics.qmd).
 
 
 ### Random number generator {-}
diff --git a/src/reference-manual/optimization.Rmd b/src/reference-manual/optimization.qmd
similarity index 100%
rename from src/reference-manual/optimization.Rmd
rename to src/reference-manual/optimization.qmd
diff --git a/src/reference-manual/pathfinder.Rmd b/src/reference-manual/pathfinder.qmd
similarity index 100%
rename from src/reference-manual/pathfinder.Rmd
rename to src/reference-manual/pathfinder.qmd
diff --git a/src/reference-manual/references.Rmd b/src/reference-manual/references.qmd
similarity index 100%
rename from src/reference-manual/references.Rmd
rename to src/reference-manual/references.qmd
diff --git a/src/reference-manual/removals.Rmd b/src/reference-manual/removals.qmd
similarity index 100%
rename from src/reference-manual/removals.Rmd
rename to src/reference-manual/removals.qmd
diff --git a/src/reference-manual/reproducibility.Rmd b/src/reference-manual/reproducibility.qmd
similarity index 100%
rename from src/reference-manual/reproducibility.Rmd
rename to src/reference-manual/reproducibility.qmd
diff --git a/src/reference-manual/statements.Rmd b/src/reference-manual/statements.qmd
similarity index 99%
rename from src/reference-manual/statements.Rmd
rename to src/reference-manual/statements.qmd
index d7697e33b..958d7736a 100644
--- a/src/reference-manual/statements.Rmd
+++ b/src/reference-manual/statements.qmd
@@ -5,7 +5,7 @@ pagetitle: Statements
 # Statements
 
 The blocks of a Stan program  are made up of
-variable declarations and statements; see the [blocks chapter](blocks.Rmd) for
+variable declarations and statements; see the [blocks chapter](blocks.qmd) for
 details.  Unlike programs in BUGS, the
 declarations and statements making up a Stan program are executed in
 the order in which they are written.  Variables must be defined to
@@ -28,7 +28,7 @@ because these blocks are solely used to declare the data variables for
 input and the parameter variables for sampling.  All other blocks
 allow statements.  In these blocks, both variable declarations and
 statements are allowed.  All top-level variables in a block are
-considered block variables. See the [blocks chapter](blocks.Rmd)
+considered block variables. See the [blocks chapter](blocks.qmd)
 for more information about the block structure of Stan programs.
 
 
@@ -147,7 +147,7 @@ dimensions.
 ### Multiple indexes {-}
 
 Multiple indexes, as described in the
-[multi-indexing section](expressions.Rmd#language-multi-indexing.section), are also permitted on the
+[multi-indexing section](expressions.qmd#language-multi-indexing.section), are also permitted on the
 left-hand side of assignments.  Indexing on the left side works
 exactly as it does for expressions, with multiple indexes preserving
 index positions and single indexes reducing them.    The type on the
diff --git a/src/reference-manual/syntax.Rmd b/src/reference-manual/syntax.qmd
similarity index 99%
rename from src/reference-manual/syntax.Rmd
rename to src/reference-manual/syntax.qmd
index 061bdff5a..abcbcb3ba 100644
--- a/src/reference-manual/syntax.Rmd
+++ b/src/reference-manual/syntax.qmd
@@ -348,7 +348,7 @@ Many of the tokens used in the BNF grammars follow obviously
 from their names: `DATABLOCK` is the literal string 'data',
 `COMMA` is a single ',' character, etc. The literal representation
 of each operator is additionally provided in the [operator
-precedence table](expressions.Rmd#operator-precedence-table).
+precedence table](expressions.qmd#operator-precedence-table).
 
 A few tokens are not so obvious, and are defined here in
 regular expressions:
@@ -388,7 +388,7 @@ compilation with an error message indicating the location of the problem.
 
 In the Stan grammar provided in this chapter, the expression `1 + 2 *
 3` has two parses.  As described in the [operator precedence
-table](expressions.Rmd#operator-precedence-table), Stan disambiguates between the meaning $1
+table](expressions.qmd#operator-precedence-table), Stan disambiguates between the meaning $1
 + (2 \times 3)$ and the meaning $(1 + 2) \times 3$ based on operator
 precedences and associativities.
 
diff --git a/src/reference-manual/transforms.Rmd b/src/reference-manual/transforms.qmd
similarity index 100%
rename from src/reference-manual/transforms.Rmd
rename to src/reference-manual/transforms.qmd
diff --git a/src/reference-manual/types.Rmd b/src/reference-manual/types.qmd
similarity index 99%
rename from src/reference-manual/types.Rmd
rename to src/reference-manual/types.qmd
index 74746435c..9a356b6c7 100644
--- a/src/reference-manual/types.Rmd
+++ b/src/reference-manual/types.qmd
@@ -597,7 +597,7 @@ Vectors and matrices cannot be typed to return integer values.  They
 are restricted to `real` and `complex` values.
 
 For constructing vectors and matrices in Stan, see [Vector, Matrix,
-and Array Expressions](expressions.Rmd#vector-matrix-array-expressions.section).
+and Array Expressions](expressions.qmd#vector-matrix-array-expressions.section).
 
 ### Indexing from 1 {-}
 
@@ -1032,7 +1032,7 @@ a convenient shorthand for `a[m][n]`.
 Vectors, matrices, and arrays are not assignable to one another, even
 if their dimensions are identical.
 
-For constructing arrays in Stan, see [Vector, Matrix, and Array Expressions](expressions.Rmd#vector-matrix-array-expressions.section).
+For constructing arrays in Stan, see [Vector, Matrix, and Array Expressions](expressions.qmd#vector-matrix-array-expressions.section).
 
 ### Declaring array variables {-}
 
@@ -1399,7 +1399,7 @@ For convenience of using values stored in tuples, Stan supports
 
 Given a tuple `t` of type `tuple(T1, ..., Tn)` and a sequence of
 [assignable expressions](#rhs-expressions) of types
-`v1`, ..., `vn`, where each `vi` has a [type which is assignable](statements.Rmd#promotion)
+`v1`, ..., `vn`, where each `vi` has a [type which is assignable](statements.qmd#promotion)
 from type `Ti`, individual elements of the tuple may be assigned to the
 corresponding variables in the sequence by the statement
 
@@ -1538,7 +1538,7 @@ block variables, like those inside `data`, which can have
 [constraints](#constrained-data-types) and must include sizes for their types,
 like in the above examples. Local variables, like those defined inside loops
 or local blocks cannot be constrained, but still include sizes. Finally,
-variables declared as [function parameters](user-functions.Rmd#argument-types-and-qualifiers)
+variables declared as [function parameters](user-functions.qmd#argument-types-and-qualifiers)
 are not constrained types and _exclude_ sizes.
 
 ```stan
diff --git a/src/reference-manual/user-functions.Rmd b/src/reference-manual/user-functions.qmd
similarity index 99%
rename from src/reference-manual/user-functions.Rmd
rename to src/reference-manual/user-functions.qmd
index 9285f908c..b9c11c62f 100644
--- a/src/reference-manual/user-functions.Rmd
+++ b/src/reference-manual/user-functions.qmd
@@ -174,7 +174,7 @@ block or user-defined probability functions.
 See the [section on function bodies](#function-bodies.section) for more
 information on these special types of function.
 
-## Argument types and qualifiers {#argument-types-and-qualifiers]
+## Argument types and qualifiers {#argument-types-and-qualifiers}
 
 Stan's functions all have declared types for both arguments and
 returned value.  As with built-in functions, user-defined functions are
diff --git a/src/reference-manual/variational.Rmd b/src/reference-manual/variational.qmd
similarity index 100%
rename from src/reference-manual/variational.Rmd
rename to src/reference-manual/variational.qmd
diff --git a/src/reference-manual/whitespace.Rmd b/src/reference-manual/whitespace.qmd
similarity index 100%
rename from src/reference-manual/whitespace.Rmd
rename to src/reference-manual/whitespace.qmd
diff --git a/src/sdlc.Rmd b/src/sdlc.qmd
similarity index 100%
rename from src/sdlc.Rmd
rename to src/sdlc.qmd
diff --git a/src/stan-users-guide/_quarto.yml b/src/stan-users-guide/_quarto.yml
index d95e07071..436aae1a0 100644
--- a/src/stan-users-guide/_quarto.yml
+++ b/src/stan-users-guide/_quarto.yml
@@ -36,46 +36,46 @@ book:
     - index.qmd
     - part: "Example Models"
       chapters:
-        - regression.Rmd
-        - time-series.Rmd
-        - missing-data.Rmd
-        - truncation-censoring.Rmd
-        - finite-mixtures.Rmd
-        - measurement-error.Rmd
-        - latent-discrete.Rmd
-        - sparse-ragged.Rmd
-        - clustering.Rmd
-        - gaussian-processes.Rmd
-        - hyperspherical-models.Rmd
-        - algebraic-equations.Rmd
-        - odes.Rmd
-        - one-dimensional-integrals.Rmd
-        - complex-numbers.Rmd
-        - dae.Rmd
+        - regression.qmd
+        - time-series.qmd
+        - missing-data.qmd
+        - truncation-censoring.qmd
+        - finite-mixtures.qmd
+        - measurement-error.qmd
+        - latent-discrete.qmd
+        - sparse-ragged.qmd
+        - clustering.qmd
+        - gaussian-processes.qmd
+        - hyperspherical-models.qmd
+        - algebraic-equations.qmd
+        - odes.qmd
+        - one-dimensional-integrals.qmd
+        - complex-numbers.qmd
+        - dae.qmd
     - part:  "Programming Techniques"
       chapters:
-        - floating-point.Rmd
-        - matrices-arrays.Rmd
-        - multi-indexing.Rmd
-        - user-functions.Rmd
-        - custom-probability.Rmd
-        - proportionality-constants.Rmd
-        - problematic-posteriors.Rmd
-        - reparameterization.Rmd
-        - efficiency-tuning.Rmd
-        - parallelization.Rmd
+        - floating-point.qmd
+        - matrices-arrays.qmd
+        - multi-indexing.qmd
+        - user-functions.qmd
+        - custom-probability.qmd
+        - proportionality-constants.qmd
+        - problematic-posteriors.qmd
+        - reparameterization.qmd
+        - efficiency-tuning.qmd
+        - parallelization.qmd
     - part: "Posterior Inference & Model Checking"
       chapters:
-        - posterior-prediction.Rmd
-        - simulation-based-calibration.Rmd
-        - posterior-predictive-checks.Rmd
-        - cross-validation.Rmd
-        - poststratification.Rmd
-        - decision-analysis.Rmd
-        - bootstrap.Rmd
+        - posterior-prediction.qmd
+        - simulation-based-calibration.qmd
+        - posterior-predictive-checks.qmd
+        - cross-validation.qmd
+        - poststratification.qmd
+        - decision-analysis.qmd
+        - bootstrap.qmd
     - part: "Appendices"
       chapters:
-        - using-stanc.Rmd
-        - style-guide.Rmd
-        - for-bugs-users.Rmd
-    - references.Rmd
+        - using-stanc.qmd
+        - style-guide.qmd
+        - for-bugs-users.qmd
+    - references.qmd
diff --git a/src/stan-users-guide/algebraic-equations.Rmd b/src/stan-users-guide/algebraic-equations.qmd
similarity index 98%
rename from src/stan-users-guide/algebraic-equations.Rmd
rename to src/stan-users-guide/algebraic-equations.qmd
index 8379a9825..e108dd769 100644
--- a/src/stan-users-guide/algebraic-equations.Rmd
+++ b/src/stan-users-guide/algebraic-equations.qmd
@@ -36,7 +36,7 @@ A system of algebraic equations is coded directly in Stan as a
 function with a strictly specified signature. For example, the
 nonlinear system given above can be coded using the
 following function in Stan (see the [user-defined functions
-section](user-functions.Rmd) for more information on coding
+section](user-functions.qmd) for more information on coding
 user-defined functions).
 
 ```stan
@@ -92,7 +92,7 @@ The solver has three tuning parameters to determine convergence: the
 relative tolerance, the function tolerance, and the maximum number of
 steps.  Their behavior is explained in
 the section about [algebraic solvers with control
-parameters](algebraic-equations.Rmd#algebra-control.section).
+parameters](algebraic-equations.qmd#algebra-control.section).
 
 The following code returns the solution to our nonlinear algebraic system:
 
diff --git a/src/stan-users-guide/before-chapter.R b/src/stan-users-guide/before-chapter.R
deleted file mode 100644
index 17abaea98..000000000
--- a/src/stan-users-guide/before-chapter.R
+++ /dev/null
@@ -1,5 +0,0 @@
-library(dplyr)
-library(ggplot2)
-library(kableExtra)
-
-options(digits = 2)
diff --git a/src/stan-users-guide/bootstrap.Rmd b/src/stan-users-guide/bootstrap.qmd
similarity index 100%
rename from src/stan-users-guide/bootstrap.Rmd
rename to src/stan-users-guide/bootstrap.qmd
diff --git a/src/stan-users-guide/clustering.Rmd b/src/stan-users-guide/clustering.qmd
similarity index 99%
rename from src/stan-users-guide/clustering.Rmd
rename to src/stan-users-guide/clustering.qmd
index 283f90e8d..6e4e685d7 100644
--- a/src/stan-users-guide/clustering.Rmd
+++ b/src/stan-users-guide/clustering.qmd
@@ -13,11 +13,11 @@ a form of clustering which may be supervised.  These models are
 typically expressed using discrete parameters for cluster assignments.
 Nevertheless, they can be implemented in Stan like any other mixture
 model by marginalizing out the discrete parameters (see
-the [mixture modeling chapter](finite-mixtures.Rmd)).
+the [mixture modeling chapter](finite-mixtures.qmd)).
 
 ## Relation to finite mixture models
 
-As mentioned in the [clustering section](finite-mixtures.Rmd#clustering-mixture.section),
+As mentioned in the [clustering section](finite-mixtures.qmd#clustering-mixture.section),
 clustering models and finite mixture models are really just two sides
 of the same coin.  The "soft" $K$-means model described in the next
 section is a normal mixture model (with varying assumptions about
@@ -169,7 +169,7 @@ inference for clustering models, the lack of parameter identifiability
 and the extreme multimodality of the posteriors.  There is additional
 discussion related to the non-identifiability due to label switching
 in the [label switching
-section](problematic-posteriors.Rmd#label-switching-problematic.section).
+section](problematic-posteriors.qmd#label-switching-problematic.section).
 
 ### Non-identifiability {-}
 
diff --git a/src/stan-users-guide/complex-numbers.Rmd b/src/stan-users-guide/complex-numbers.qmd
similarity index 100%
rename from src/stan-users-guide/complex-numbers.Rmd
rename to src/stan-users-guide/complex-numbers.qmd
diff --git a/src/stan-users-guide/cross-validation.Rmd b/src/stan-users-guide/cross-validation.qmd
similarity index 99%
rename from src/stan-users-guide/cross-validation.Rmd
rename to src/stan-users-guide/cross-validation.qmd
index 517da2c40..01052b639 100644
--- a/src/stan-users-guide/cross-validation.Rmd
+++ b/src/stan-users-guide/cross-validation.qmd
@@ -416,7 +416,7 @@ structured.  For example, in a simple natural language application,
 data might be structured by document.  For cross-validation, one needs
 to cross-validate at the document level, not at the individual word
 level.  This is related to [mixed replication in posterior predictive
-checking](posterior-predictive-checks.Rmd#mixed-replication), where there is a choice to simulate new
+checking](posterior-predictive-checks.qmd#mixed-replication), where there is a choice to simulate new
 elements of existing groups or generate entirely new groups.
 
 Education testing applications are typically grouped by school
diff --git a/src/stan-users-guide/custom-probability.Rmd b/src/stan-users-guide/custom-probability.qmd
similarity index 100%
rename from src/stan-users-guide/custom-probability.Rmd
rename to src/stan-users-guide/custom-probability.qmd
diff --git a/src/stan-users-guide/dae.Rmd b/src/stan-users-guide/dae.qmd
similarity index 96%
rename from src/stan-users-guide/dae.Rmd
rename to src/stan-users-guide/dae.qmd
index 46d945bff..617d20fc1 100644
--- a/src/stan-users-guide/dae.Rmd
+++ b/src/stan-users-guide/dae.qmd
@@ -8,7 +8,7 @@ Stan support solving systems of differential-algebraic equations
 (DAEs) of index 1 [@serban_user:2021]. The solver adaptively
 refines the solutions in order to satisfy given tolerances.
 
-One can think a differential-algebraic system of equations 
+One can think a differential-algebraic system of equations
 as ODEs with additional algebraic constraints applied to some
 of the variables. In such a system, the variable derivatives may not be
 expressed explicitly with a right-hand-side as in ODEs, but implicitly
@@ -18,12 +18,12 @@ Similar to ODE solvers, the DAE
 solvers must not only provide the solution to the DAE itself, but also
 the gradient of the DAE solution with respect to parameters (the
 sensitivities). Stan's DAE solver uses
-the *forward sensitivity* calculation to expand the base DAE system 
+the *forward sensitivity* calculation to expand the base DAE system
 with additional DAE equations for the gradients of the solution.
 For each parameter, an additional full set of $N$
 sensitivity states are added meaning that the full DAE solved has
 $N \, + N \cdot M$ states.
-  
+
 Two interfaces are provided for the forward sensitivity solver: one
 with default tolerances and default max number of steps, and one
 that allows these controls to be modified. Choosing tolerances is
@@ -32,7 +32,7 @@ will not work everywhere. The tolerances should be chosen primarily
 with consideration to the scales of the solutions, the accuracy
 needed for the solutions, and how the solutions are used in the
 model. The same principles in the [control parameters
-section](#control-ode.section) apply here.
+section](odes.qmd#control-ode.section) apply here.
 
 Internally Stan's DAE solver uses a variable-step, variable-order,
 backward-differentiation formula implementation
@@ -69,7 +69,7 @@ example the reaction rate coefficients $(\alpha, \beta, \gamma)$.
 
 Unlike solving ODEs, solving DAEs requires a *consistent* initial
 condition. That is, one must specify both $y(t_0)$
-and $y'(t_0)$ so that residual function becomes zero at initial time $t_0$ 
+and $y'(t_0)$ so that residual function becomes zero at initial time $t_0$
 \begin{equation*}
 r(y'(t_0), y(t_0), t_0) = 0
 \end{equation*}
@@ -109,7 +109,7 @@ reaction rate coefficient $\alpha$, $\beta$, and $\gamma$), data, or any quantit
 DAE.
 
 The above reaction be coded using the following function
-in Stan (see the [user-defined functions chapter](user-functions.Rmd) for
+in Stan (see the [user-defined functions chapter](user-functions.qmd) for
 more information on coding user-defined functions).
 
 ```stan
@@ -152,11 +152,11 @@ vector my_dae3(real t, vector y, vector yp, matrix a0, array[] real a1, row_vect
 but these are not allowed:
 
 ```stan
-vector my_dae1(real t, array[] real y, vector yp); 
+vector my_dae1(real t, array[] real y, vector yp);
 // Second argument is not a vector
-array[] real my_dae2(real t, vector y, vector yp); 
+array[] real my_dae2(real t, vector y, vector yp);
 // Return type is not a vector
-vector my_dae3(real t, vector y); 
+vector my_dae3(real t, vector y);
 // First argument is not a real and missing the third argument
 ```
 
@@ -183,7 +183,7 @@ transformed parameters {
 }
 ```
 
-Since `gamma` is a parameter, the DAE solver is called in the transformed parameters block. 
+Since `gamma` is a parameter, the DAE solver is called in the transformed parameters block.
 
 ## Control parameters for DAE solving {#control-dae.section}
 
diff --git a/src/stan-users-guide/decision-analysis.Rmd b/src/stan-users-guide/decision-analysis.qmd
similarity index 100%
rename from src/stan-users-guide/decision-analysis.Rmd
rename to src/stan-users-guide/decision-analysis.qmd
diff --git a/src/stan-users-guide/efficiency-tuning.Rmd b/src/stan-users-guide/efficiency-tuning.qmd
similarity index 99%
rename from src/stan-users-guide/efficiency-tuning.Rmd
rename to src/stan-users-guide/efficiency-tuning.qmd
index 9bc0accd6..a22e84e86 100644
--- a/src/stan-users-guide/efficiency-tuning.Rmd
+++ b/src/stan-users-guide/efficiency-tuning.qmd
@@ -166,7 +166,7 @@ model so that the same result may be calculated using a density or
 penalized maximum likelihood that is better conditioned.  Again, see
 the example of reparameterizing Neal's funnel for an example, and also
 the examples in the [change of variables
-chapter](reparameterization.Rmd).
+chapter](reparameterization.qmd).
 
 One has to be careful in using change-of-variables reparameterizations
 when using maximum likelihood estimation, because they can change the
@@ -523,7 +523,7 @@ distribution approaches a normal distribution.  Thus the parameter
 
 Unfortunately, the usual situation in applied Bayesian modeling
 involves complex geometries and interactions that are not known
-analytically.  Nevertheless, reparameterization can still be 
+analytically.  Nevertheless, reparameterization can still be
 effective for separating parameters.
 
 #### Centered parameterization {-}
@@ -1391,7 +1391,7 @@ The parameters are renamed to indicate that they aren't the
 "natural" parameters, but the model is otherwise identical.  In
 particular, the fairly diffuse priors on the coefficients and error
 scale are the same.  These could have been transformed as well, but
-here they are left as is, because the scales make sense as 
+here they are left as is, because the scales make sense as
 diffuse priors for standardized data; the priors could be made more
 informative.  For instance, because the outputs $y$ have been
 standardized, the error $\sigma$ should not be greater than 1, because
@@ -1486,7 +1486,7 @@ cancel, as does subtracting the log of the scale.
 
 The map-reduce operation, even without multi-core MPI support, can be
 used to make programs more scalable and also more efficient.  See the
-[map-reduce chapter](parallelization.Rmd) for more information on
+[map-reduce chapter](parallelization.qmd) for more information on
 implementing map-reduce operations.
 
 Map-reduce allows greater scalability because only the Jacobian of the
diff --git a/src/stan-users-guide/finite-mixtures.Rmd b/src/stan-users-guide/finite-mixtures.qmd
similarity index 99%
rename from src/stan-users-guide/finite-mixtures.Rmd
rename to src/stan-users-guide/finite-mixtures.qmd
index a004a640c..eefdf8240 100644
--- a/src/stan-users-guide/finite-mixtures.Rmd
+++ b/src/stan-users-guide/finite-mixtures.qmd
@@ -16,7 +16,7 @@ be used as priors for other parameters.
 ## Relation to clustering {#clustering-mixture.section}
 
 Clustering models, as discussed in the [clustering
-chapter](clustering.Rmd), are just a particular class of mixture
+chapter](clustering.qmd), are just a particular class of mixture
 models that have been widely applied to clustering in the engineering
 and machine-learning literature.  The normal mixture model discussed
 in this chapter reappears in multivariate form as the statistical
@@ -189,7 +189,7 @@ On the log scale, the normalized probability is computed as
                                                  + \log p\left(z_n = k' \mid \lambda\right)\big).
 \end{align*}
 This can be coded up directly in Stan; the change-point model in the
-[change point section](latent-discrete.Rmd#change-point.section) provides an example.
+[change point section](latent-discrete.qmd#change-point.section) provides an example.
 
 ### Estimating parameters of a mixture {-}
 
diff --git a/src/stan-users-guide/floating-point.Rmd b/src/stan-users-guide/floating-point.qmd
similarity index 100%
rename from src/stan-users-guide/floating-point.Rmd
rename to src/stan-users-guide/floating-point.qmd
diff --git a/src/stan-users-guide/for-bugs-users.Rmd b/src/stan-users-guide/for-bugs-users.qmd
similarity index 99%
rename from src/stan-users-guide/for-bugs-users.Rmd
rename to src/stan-users-guide/for-bugs-users.qmd
index 8112303e7..74a4f6d2c 100644
--- a/src/stan-users-guide/for-bugs-users.Rmd
+++ b/src/stan-users-guide/for-bugs-users.qmd
@@ -289,7 +289,7 @@ language section of this manual.
 Stan supports general conditional statements using a standard
 if-else syntax.  For example, a zero-inflated (or -deflated) Poisson
 mixture model is defined using the if-else syntax as described in
-the [zero inflation section](finite-mitures.Rmd#zero-inflated.section).
+the [zero inflation section](finite-mixtures.qmd#zero-inflated.section).
 
 Stan supports general while loops using a standard syntax.
 While loops give Stan full Turing equivalent computational power.
diff --git a/src/stan-users-guide/gaussian-processes.Rmd b/src/stan-users-guide/gaussian-processes.qmd
similarity index 100%
rename from src/stan-users-guide/gaussian-processes.Rmd
rename to src/stan-users-guide/gaussian-processes.qmd
diff --git a/src/stan-users-guide/hyperspherical-models.Rmd b/src/stan-users-guide/hyperspherical-models.qmd
similarity index 100%
rename from src/stan-users-guide/hyperspherical-models.Rmd
rename to src/stan-users-guide/hyperspherical-models.qmd
diff --git a/src/stan-users-guide/latent-discrete.Rmd b/src/stan-users-guide/latent-discrete.qmd
similarity index 98%
rename from src/stan-users-guide/latent-discrete.Rmd
rename to src/stan-users-guide/latent-discrete.qmd
index 02cce60ee..4e761b6cc 100644
--- a/src/stan-users-guide/latent-discrete.Rmd
+++ b/src/stan-users-guide/latent-discrete.qmd
@@ -3,8 +3,6 @@ pagetitle: Latent Discrete Parameters
 ---
 
 # Latent Discrete Parameters  {#latent-discrete.chapter}
-```{r include, file="before-chapter.R", echo=FALSE, message=FALSE, warning=FALSE}
-```
 
 Stan does not support sampling discrete parameters.  So it is not
 possible to directly translate BUGS or JAGS models with discrete
@@ -14,7 +12,7 @@ parameters by marginalizing out the discrete parameters.^[The computations are s
 
 This chapter shows how to code several widely-used models involving
 latent discrete parameters.  The next chapter, the [clustering
-chapter](clustering.Rmd), on clustering models, considers further
+chapter](clustering.qmd), on clustering models, considers further
 models involving latent discrete parameters.
 
 ## The benefits of marginalization {#rao-blackwell.section}
@@ -243,16 +241,12 @@ default MCMC implementation is shown in the posterior plot.
 
 Log probability of change point being in year, calculated analytically.
 
-```{r include=TRUE, fig.align="center", fig.cap=c("Analytical change-point posterior"), echo=FALSE}
-knitr::include_graphics("./img/change-point-posterior.png", auto_pdf = TRUE)
-```
+![Analytical change-point posterior](./img/change-point-posterior){width=50%}
 
 The frequency of change points generated by sampling the discrete change
 points.
 
-```{r include=TRUE, fig.align="center", fig.cap=c("Sampled change-point posterior"), echo=FALSE}
-knitr::include_graphics("./img/s-discrete-posterior.png", auto_pdf = TRUE)
-```
+![Sampled change-point posterior](./img/s-discrete-posterior){width=50%}
 
 In order their range of estimates be visible, the first plot is on the log
 scale and the second plot on the linear scale; note the narrower range
@@ -389,7 +383,7 @@ mark-recapture study.  The lower bound on $N$ is necessary to
 efficiently eliminate impossible values.
 
 The probabilistic variant of the Lincoln-Petersen estimator can be
-directly coded in Stan as shown in the Lincon-Petersen model figure. 
+directly coded in Stan as shown in the Lincon-Petersen model figure.
 The Lincoln-Petersen estimate is the maximum likelihood estimate (MLE)
 for this model.
 
@@ -412,8 +406,8 @@ details of all constrained parameter transforms.
 
 ### Cormack-Jolly-Seber with discrete parameter {-}
 
-The Cormack-Jolly-Seber (CJS) model [@Cormack:1964; @Jolly:1965; @Seber:1965] 
-is an open-population model in which the population may change over time 
+The Cormack-Jolly-Seber (CJS) model [@Cormack:1964; @Jolly:1965; @Seber:1965]
+is an open-population model in which the population may change over time
 due to death; the presentation here draws heavily on @Schofield:2007.
 
 The basic data are
@@ -963,7 +957,7 @@ probability function on the log scale,
 \begin{align*}
 \log p(y \mid \theta, \pi)
  &= \sum_{i=1}^I \log \left( \sum_{k=1}^K \exp
-    \left(\log \textsf{categorical}(k \mid \pi) \vphantom{\sum_{j=1}^J}\right.\right. 
+    \left(\log \textsf{categorical}(k \mid \pi) \vphantom{\sum_{j=1}^J}\right.\right.
     \left.\left. + \ \sum_{j=1}^J
            \log \textsf{categorical}(y_{i, j} \mid \theta_{j, k})
     \right) \right),
@@ -1018,7 +1012,7 @@ model {
 ```
 
 The model marginalizes out the discrete parameter $z$, storing the
-unnormalized conditional probability $\log q(z_i=k|\theta,\pi)$ in 
+unnormalized conditional probability $\log q(z_i=k|\theta,\pi)$ in
 `log_q_z[i, k]`.
 
 The Stan model converges quickly and mixes well using NUTS starting at
@@ -1045,30 +1039,30 @@ distribution over each $z_i$.
 
 ### Introduction {-}
 
-This section describes in more detail the mathematics of statistical inference using the output of marginalized Stan models, such as those presented in the last three sections. It provides a mathematical explanation of why and how certain manipulations of Stan's output produce valid summaries of the posterior distribution when discrete parameters have been marginalized out of a statistical model. Ultimately, however, fully understanding the mathematics in this section is *not* necessary to fit models with discrete parameters using Stan.  
+This section describes in more detail the mathematics of statistical inference using the output of marginalized Stan models, such as those presented in the last three sections. It provides a mathematical explanation of why and how certain manipulations of Stan's output produce valid summaries of the posterior distribution when discrete parameters have been marginalized out of a statistical model. Ultimately, however, fully understanding the mathematics in this section is *not* necessary to fit models with discrete parameters using Stan.
 
-Throughout, the model under consideration consists of both continuous parameters, $\Theta$, and discrete parameters, $Z$. It is also assumed that $Z$ can only take finitely many values, as is the case for all the models described in this chapter of the User's Guide. To simplify notation, any conditioning on data is suppressed in this section, except where specified. As with all Bayesian analyses, however, all inferences using models with marginalized parameters are made conditional on the observed data. 
+Throughout, the model under consideration consists of both continuous parameters, $\Theta$, and discrete parameters, $Z$. It is also assumed that $Z$ can only take finitely many values, as is the case for all the models described in this chapter of the User's Guide. To simplify notation, any conditioning on data is suppressed in this section, except where specified. As with all Bayesian analyses, however, all inferences using models with marginalized parameters are made conditional on the observed data.
 
 ### Estimating expectations {-}
 
 When performing Bayesian inference, interest often centers on estimating some (constant) low-dimensional summary statistics of the posterior distribution. Mathematically, we are interested in estimating $\mu$, say, where $\mu = \mathbb{E}[g(\Theta, Z)]$ and $g(\cdot)$ is an arbitrary function. An example of such a quantity is $\mathbb{E}[\Theta]$, the posterior mean of the continuous parameters, where we would take $g(\theta, z) = \theta$. To estimate $\mu$ the most common approach is to sample a series of values, at least approximately, from the posterior distribution of the parameters of interest. The numerical values of these draws can then be used to calculate the quantities of interest. Often, this process of calculation is trivial, but more care is required when working with marginalized posteriors as we describe in this section.
 
 If both $\Theta$ and $Z$ were continuous, Stan could be used to sample $M$ draws from the joint posterior $p_{\Theta, Z}(\theta, z)$ and then estimate $\mu$ with
-$$   
+$$
 \hat{\mu} = \frac{1}{M} \sum_{i = 1}^M {g(\theta^{(i)}, z^{(i)})}.
 $$
-Given $Z$ is discrete, however, Stan cannot be used to sample from the joint posterior (or even to do optimization). Instead, as outlined in the previous sections describing specific models, the user can first marginalize out $Z$ from the joint posterior to give the marginalized posterior $p_\Theta(\theta)$. This marginalized posterior can then be implemented in Stan as usual, and Stan will give draws $\{\theta^{(i)}\}_{i = 1}^M$ from the marginalized posterior. 
+Given $Z$ is discrete, however, Stan cannot be used to sample from the joint posterior (or even to do optimization). Instead, as outlined in the previous sections describing specific models, the user can first marginalize out $Z$ from the joint posterior to give the marginalized posterior $p_\Theta(\theta)$. This marginalized posterior can then be implemented in Stan as usual, and Stan will give draws $\{\theta^{(i)}\}_{i = 1}^M$ from the marginalized posterior.
 
 Using only these draws, how can we estimate $\mathbb{E}[g(\Theta, Z)]$? We can use a conditional estimator. We explain in more detail below, but at a high level the idea is that, for each function $g$ of interest, we compute
-$$    
+$$
 h(\Theta) = \mathbb{E}[g(\Theta, Z) \mid \Theta]
 $$
 and then estimate $\mathbb{E}[g(\Theta, Z)]$ with
-$$    
+$$
 \hat{\mu} = \frac{1}{M} \sum_{i = 1}^M h(\theta^{(i)}).
 $$
 This estimator is justified by the law of iterated expectation, the fact that
-$$    
+$$
 \mathbb{E}[h(\Theta)] = \mathbb{E}[\mathbb{E}[g(\Theta, Z] \mid \Theta)] = \mathbb{E}[g(\Theta, Z)] = \mu.
 $$
 Using this marginalized estimator provides a way to estimate the expectation of any function $g(\cdot)$ for all combinations of discrete or continuous parameters in the model. However, it presents a possible new challenge: evaluating the conditional expectation $\mathbb{E}[g(\Theta, Z) \mid \Theta]$.
@@ -1076,12 +1070,12 @@ Using this marginalized estimator provides a way to estimate the expectation of
 ### Evaluating the conditional expectation {-}
 
 Fortunately, the discrete nature of $Z$ makes evaluating $\mathbb{E}[g(\Theta, Z) \mid \Theta]$ easy. The function $h(\Theta)$ can be written as:
-$$    
+$$
 h(\Theta)
 = \mathbb{E}[g(\Theta, Z) \mid \Theta]
 = \sum_{k} g(\Theta, k) \Pr[Z = k \mid \Theta],
 $$
-where we sum over the possible values of the latent discrete parameters. An essential part of this formula is the probability of the discrete parameters conditional on the continuous parameters, $\Pr[Z = k \mid \Theta]$. More detail on how this quantity can be calculated is included below. Note that if $Z$ takes infinitely many values then computing the infinite sums will involve, potentially computationally expensive, approximation. 
+where we sum over the possible values of the latent discrete parameters. An essential part of this formula is the probability of the discrete parameters conditional on the continuous parameters, $\Pr[Z = k \mid \Theta]$. More detail on how this quantity can be calculated is included below. Note that if $Z$ takes infinitely many values then computing the infinite sums will involve, potentially computationally expensive, approximation.
 
 When $g(\theta, z)$ is a function of either $\theta$ or $z$ only, the above formula simplifies further.
 
@@ -1093,7 +1087,7 @@ h(\Theta)
 &= g(\Theta).
 \end{align*}
 This means that we can estimate $\mathbb{E}[g(\Theta)]$ with the standard, seemingly unconditional, estimator:
-$$    
+$$
 \frac{1}{M} \sum_{i = 1}^M g(\theta^{(i)}).
 $$
 Even after marginalization, computing expectations of functions of the continuous parameters can be performed as if no marginalization had taken place.
@@ -1103,8 +1097,8 @@ $$
 h(\Theta) = \sum_{k} g(k) \Pr[Z = k \mid \Theta].
 $$
 An important special case of this result is when $g(\theta, z) = \textrm{I}(z = k)$, where $\textrm{I}$ is the indicator function. This choice allows us to recover the probability mass function of the discrete random variable $Z$, since $\mathbb{E}[\textrm{I}(Z = k)] = \Pr[Z = k]$. In this case,
-$$    
-h(\Theta) 
+$$
+h(\Theta)
 = \sum_{k} \textrm{I}(z = k) \Pr[Z = k \mid \Theta]
 = \Pr[Z = k \mid \Theta].
 $$
@@ -1117,11 +1111,11 @@ $$
 \frac{1}{M} \sum_{i = 1}^M \Pr[Z = k \mid \Theta = \theta^{(i)}, Y].
 $$
 This point is important as it suggests one of the main ways of calculating the required conditional probability. Using Bayes's theorem gives us
-$$    
+$$
 \Pr[Z = k \mid \Theta = \theta^{(i)}, Y]
 = \frac{\Pr[Y \mid Z = k, \Theta = \theta^{(i)}]
 \Pr[Z = k \mid \Theta = \theta^{(i)}]}
-{\sum_{k = 1}^K \Pr[Y \mid Z = k, \Theta = \theta^{(i)}] 
+{\sum_{k = 1}^K \Pr[Y \mid Z = k, \Theta = \theta^{(i)}]
 \Pr[Z = k \mid \Theta = \theta^{(i)}]}.
 $$
 Here, $\Pr[Y \mid \Theta = \theta^{(i)}, Z = k]$ is the likelihood conditional on a particular value of the latent variables. Crucially, all elements of the expression can be calculated using the draws from the posterior of the continuous parameters and knowledge of the model structure.
diff --git a/src/stan-users-guide/matrices-arrays.Rmd b/src/stan-users-guide/matrices-arrays.qmd
similarity index 99%
rename from src/stan-users-guide/matrices-arrays.Rmd
rename to src/stan-users-guide/matrices-arrays.qmd
index ffa08d062..d550327bc 100644
--- a/src/stan-users-guide/matrices-arrays.Rmd
+++ b/src/stan-users-guide/matrices-arrays.qmd
@@ -401,7 +401,7 @@ convert a matrix into a vector, or a multi-dimensional array into a
 one-dimensional array, or convert a vector to an array.  See the
 section on mixed matrix and array operations in the functions
 reference manual for a complete list of conversion operators and the
-[multi-indexing chapter](multi-indexing.Rmd) for some reshaping
+[multi-indexing chapter](multi-indexing.qmd) for some reshaping
 operations involving multiple indexing and range indexing.
 
 
diff --git a/src/stan-users-guide/measurement-error.Rmd b/src/stan-users-guide/measurement-error.qmd
similarity index 100%
rename from src/stan-users-guide/measurement-error.Rmd
rename to src/stan-users-guide/measurement-error.qmd
diff --git a/src/stan-users-guide/missing-data.Rmd b/src/stan-users-guide/missing-data.qmd
similarity index 98%
rename from src/stan-users-guide/missing-data.Rmd
rename to src/stan-users-guide/missing-data.qmd
index 7adc6e7e2..f229deac5 100644
--- a/src/stan-users-guide/missing-data.Rmd
+++ b/src/stan-users-guide/missing-data.qmd
@@ -149,9 +149,9 @@ model {
 The index arrays `ii_obs` and `ii_mis` contain the indexes into the
 final array `y` of the observed data (coded as a data vector `y_obs`)
 and the missing data (coded as a parameter vector `y_mis`).  See the
-[time series chapter](time-series.Rmd) for further discussion of
+[time series chapter](time-series.qmd) for further discussion of
 time-series model and specifically the [autoregression
-section](time-series.Rmd#autoregressive) for an explanation of the
+section](time-series.qmd#autoregressive) for an explanation of the
 vectorization for `y` as well as an explanation of how to convert this
 example to a full AR(1) model.  To ensure `y[1]` has a proper
 posterior in case it is missing, we have given it an explicit, albeit
@@ -171,7 +171,7 @@ where the relevant variables are all hard coded with index `2` because
 Stan doesn't support ragged arrays.  These could all be packed into a
 single array with more fiddly indexing that slices out vectors from
 longer vectors (see the [ragged data structures
-section](sparse-ragged.Rmd#ragged-data-structs.section) for a general discussion of
+section](sparse-ragged.qmd#ragged-data-structs.section) for a general discussion of
 coding ragged data structures in Stan).
 
 ## Loading matrix for factor analysis
diff --git a/src/stan-users-guide/multi-indexing.Rmd b/src/stan-users-guide/multi-indexing.qmd
similarity index 100%
rename from src/stan-users-guide/multi-indexing.Rmd
rename to src/stan-users-guide/multi-indexing.qmd
diff --git a/src/stan-users-guide/odes.Rmd b/src/stan-users-guide/odes.qmd
similarity index 98%
rename from src/stan-users-guide/odes.Rmd
rename to src/stan-users-guide/odes.qmd
index 315aae6d3..e46a84da9 100644
--- a/src/stan-users-guide/odes.Rmd
+++ b/src/stan-users-guide/odes.qmd
@@ -17,12 +17,12 @@ in Stan to solve this problem, each having very different
 computational cost depending on the number of ODE
 states $N$ and the number of parameters $M$ being used:
 
-* A *forward sensitivity* solver expands the base ODE system 
+* A *forward sensitivity* solver expands the base ODE system
   with additional ODE equations for the gradients of the solution.
   For each parameter, an additional full set of $N$
   sensitivity states are added meaning that the full ODE solved has
   $N \, + N \cdot M$ states.
-  
+
 * An *adjoint sensitivity* solver starts by solving the base ODE system
   forward in time to get the ODE solution and then solves
   another ODE system (the adjoint) backward in time to get the
@@ -152,7 +152,7 @@ coefficient), data, or any quantities that are needed to define the
 differential equation.
 
 The simple harmonic oscillator can be coded using the following function
-in Stan (see the [user-defined functions chapter](user-functions.Rmd) for
+in Stan (see the [user-defined functions chapter](user-functions.qmd) for
 more information on coding user-defined functions).
 
 ```stan
@@ -193,11 +193,11 @@ vector myode3(real t, vector y, matrix a0, array[] real a1, row_vector a2);
 but these are not allowed:
 
 ```stan
-vector myode1(real t, array[] real y, real a0); 
+vector myode1(real t, array[] real y, real a0);
 // Second argument is not a vector
-array[] real myode2(real t, vector y, real a0); 
+array[] real myode2(real t, vector y, real a0);
 // Return type is not a vector
-vector myode3(vector y, real a0); 
+vector myode3(vector y, real a0);
 // First argument is not a real and second is not a vector
 ```
 
@@ -288,9 +288,7 @@ Because all none of the input arguments are a function of parameters, the ODE
 solver is called in the generated quantities block. The random measurement noise
 is added to each of the `T` outputs with `normal_rng`.
 
-```{r  include = TRUE, echo = FALSE, fig.align = "center", fig.cap = "Typical realization of harmonic oscillator trajectory."}
-knitr::include_graphics("./img/sho-ode-trajectory.png", auto_pdf = TRUE)
-```
+![Typical realization of harmonic oscillator trajectory.](./img/sho-ode-trajectory.png){width=50%}
 
 ### Estimating system parameters and initial state {-}
 
@@ -527,8 +525,8 @@ array[T] vector[2] y_sim
                           max_num_steps,
                           150,                                   // number of steps between checkpoints
                           1,                                     // interpolation polynomial: 1=Hermite, 2=polynomial
-                          2,                                     // solver for forward phase: 1=Adams, 2=BDF 
-                          2,                                     // solver for backward phase: 1=Adams, 2=BDF 
+                          2,                                     // solver for forward phase: 1=Adams, 2=BDF
+                          2,                                     // solver for backward phase: 1=Adams, 2=BDF
                           theta);
 ```
 
diff --git a/src/stan-users-guide/one-dimensional-integrals.Rmd b/src/stan-users-guide/one-dimensional-integrals.qmd
similarity index 100%
rename from src/stan-users-guide/one-dimensional-integrals.Rmd
rename to src/stan-users-guide/one-dimensional-integrals.qmd
diff --git a/src/stan-users-guide/parallelization.Rmd b/src/stan-users-guide/parallelization.qmd
similarity index 100%
rename from src/stan-users-guide/parallelization.Rmd
rename to src/stan-users-guide/parallelization.qmd
diff --git a/src/stan-users-guide/posterior-prediction.Rmd b/src/stan-users-guide/posterior-prediction.qmd
similarity index 99%
rename from src/stan-users-guide/posterior-prediction.Rmd
rename to src/stan-users-guide/posterior-prediction.qmd
index 3b4797c6a..11310492e 100644
--- a/src/stan-users-guide/posterior-prediction.Rmd
+++ b/src/stan-users-guide/posterior-prediction.qmd
@@ -71,7 +71,7 @@ $$
 = \log \sum_{m = 1}^M \exp v_m
 $$
 is used to maintain arithmetic precision.  See the [section on log sum
-of exponentials](floating-point.Rmd#log-sum-of-exponentials) for more details.
+of exponentials](floating-point.qmd#log-sum-of-exponentials) for more details.
 
 
 ## Sampling from the posterior predictive distribution
diff --git a/src/stan-users-guide/posterior-predictive-checks.Rmd b/src/stan-users-guide/posterior-predictive-checks.qmd
similarity index 95%
rename from src/stan-users-guide/posterior-predictive-checks.Rmd
rename to src/stan-users-guide/posterior-predictive-checks.qmd
index acbac84fc..82ccf5dbe 100644
--- a/src/stan-users-guide/posterior-predictive-checks.Rmd
+++ b/src/stan-users-guide/posterior-predictive-checks.qmd
@@ -136,10 +136,7 @@ resulting small multiples plot shows the original data plotted in the
 upper left and eight different posterior replications plotted in the
 remaining boxes.
 
-
-```{r include = TRUE, echo = FALSE, fig.align = "center", fig.cap = "Posterior predictive checks for Poisson data generating process and Poisson model."}
-knitr::include_graphics("./img/ppc-pois-pois.jpg", auto_pdf = TRUE)
-```
+![Posterior predictive checks for Poisson data generating process and Poisson model.](./img/ppc-pois-pois.jpg){width=50% .lightbox}
 
 With a Poisson data-generating process and Poisson model, the
 posterior replications look similar to the original data.  If it were
@@ -155,9 +152,7 @@ $\sqrt{\lambda},$ which is $\sqrt{5}$ for $\textrm{Poisson}(5).$
 Here's the resulting small multiples plot, again with original data in
 the upper left.
 
-```{r include = TRUE, echo = FALSE, fig.align = "center", fig.cap = "Posterior predictive checks for negative binomial data generating process and Poisson model."}
-knitr::include_graphics("./img/ppc-nb-pois.jpg", auto_pdf = TRUE)
-```
+![Posterior predictive checks for negative binomial data generating process and Poisson model.](./img/ppc-nb-pois.jpg){width=50% .lightbox}
 
 This time, the original data stands out in stark contrast to the
 replicated data sets, all of which are clearly more symmetric and
@@ -220,9 +215,7 @@ data generated by a negative binomial distribution was fit with a
 simple Poisson model, the following plot illustrates the posterior
 p-value calculation for the mean statistic.
 
-```{r include = TRUE, echo = FALSE, fig.align = "center", out.width = "50%", fig.cap = "Histogram of means of replicated data sets; vertical red line at mean of original data."}
-knitr::include_graphics("./img/ppc-pvalue-nb-pois-mean.jpg", auto_pdf = TRUE)
-```
+![Histogram of means of replicated data sets; vertical red line at mean of original data.](./img/ppc-pvalue-nb-pois-mean.jpg){width=50% .lightbox}
 
 The p-value for the mean is just the percentage of replicated data
 sets whose statistic is greater than or equal that of the original
@@ -232,9 +225,7 @@ extracted as the posterior mean of the indicator variable `mean_gt`.
 
 The standard deviation statistic tells a different story.
 
-```{r include = TRUE, echo = FALSE, fig.align = "center", out.width = "50%", fig.cap = "Scatterplot of standard deviations of replicated data sets; the vertical red line is at standard deviation of original data."}
-knitr::include_graphics("./img/ppc-pvalue-nb-pois-sd.jpg", auto_pdf = TRUE)
-```
+![Scatterplot of standard deviations of replicated data sets; the vertical red line is at standard deviation of original data.](./img/ppc-pvalue-nb-pois-sd.jpg){width=50% .lightbox}
 
 Here, the original data has much higher standard deviation than any of
 the replicated data sets.  The resulting $p$-value estimated by Stan
diff --git a/src/stan-users-guide/poststratification.Rmd b/src/stan-users-guide/poststratification.qmd
similarity index 100%
rename from src/stan-users-guide/poststratification.Rmd
rename to src/stan-users-guide/poststratification.qmd
diff --git a/src/stan-users-guide/problematic-posteriors.Rmd b/src/stan-users-guide/problematic-posteriors.qmd
similarity index 100%
rename from src/stan-users-guide/problematic-posteriors.Rmd
rename to src/stan-users-guide/problematic-posteriors.qmd
diff --git a/src/stan-users-guide/proportionality-constants.Rmd b/src/stan-users-guide/proportionality-constants.qmd
similarity index 100%
rename from src/stan-users-guide/proportionality-constants.Rmd
rename to src/stan-users-guide/proportionality-constants.qmd
diff --git a/src/stan-users-guide/references.Rmd b/src/stan-users-guide/references.qmd
similarity index 100%
rename from src/stan-users-guide/references.Rmd
rename to src/stan-users-guide/references.qmd
diff --git a/src/stan-users-guide/regression.Rmd b/src/stan-users-guide/regression.qmd
similarity index 99%
rename from src/stan-users-guide/regression.Rmd
rename to src/stan-users-guide/regression.qmd
index 0f212a78b..17528cc9c 100644
--- a/src/stan-users-guide/regression.Rmd
+++ b/src/stan-users-guide/regression.qmd
@@ -477,7 +477,7 @@ suitable prior on the coefficients.
 
 An alternative is to use $(K-1)$-vectors by fixing one of them to be
 zero. The [partially known parameters
-section](missing-data.Rmd#partially-known-parameters.section) discusses how to mix
+section](missing-data.qmd#partially-known-parameters.section) discusses how to mix
 constants and parameters in a vector.  In the multi-logit case, the
 parameter block would be redefined to use $(K - 1)$-vectors
 
@@ -827,7 +827,7 @@ The declaration of `beta` as an array of vectors means that the
 expression `beta[l]` denotes a vector.  Although `beta` could have
 been declared as a matrix, an array of vectors (or a two-dimensional
 array) is more efficient for accessing rows; see the [indexing
-efficiency section](matrices-arrays.Rmd#indexing-efficiency.section) for more information
+efficiency section](matrices-arrays.qmd#indexing-efficiency.section) for more information
 on the efficiency tradeoffs among arrays, vectors, and matrices.
 
 This model can be further sped up and at the same time made more
@@ -1086,7 +1086,7 @@ The model is parameterized here with student abilities `alpha` being
 given a standard normal prior.  This is to identify both the scale and
 the location of the parameters, both of which would be unidentified
 otherwise; see the [problematic posteriors
-chapter](problematic-posteriors.Rmd) for further discussion of
+chapter](problematic-posteriors.qmd) for further discussion of
 identifiability. The difficulty and discrimination parameters `beta`
 and `gamma` then have varying scales given hierarchically in this
 model.  They could also be given weakly informative non-hierarchical
@@ -1116,7 +1116,7 @@ y[n] ~ bernoulli_logit(gamma[kk[n]] * (alpha[jj[n]] - beta[kk[n]]));
 
 Non-centered parameterizations tend to be more efficient in
 hierarchical models; see the [reparameterization
-section](efficiency-tuning.Rmd#reparameterization.section) for more information on
+section](efficiency-tuning.qmd#reparameterization.section) for more information on
 non-centered reparameterizations.
 
 The intercept term `mu_beta` can't itself be modeled
@@ -1141,7 +1141,7 @@ and/or location of a group of parameters. For example, in the IRT
 models discussed in the previous section, there is both a location and
 scale non-identifiability.  With uniform priors, the posteriors will
 float in terms of both scale and location.  See the [collinearity
-section](problematic-posteriors.Rmd#collinearity.section) for a simple example of the problems
+section](problematic-posteriors.qmd#collinearity.section) for a simple example of the problems
 this poses for estimation.
 
 The non-identifiability is resolved by providing a standard normal (i.e.,
diff --git a/src/stan-users-guide/reparameterization.Rmd b/src/stan-users-guide/reparameterization.qmd
similarity index 100%
rename from src/stan-users-guide/reparameterization.Rmd
rename to src/stan-users-guide/reparameterization.qmd
diff --git a/src/stan-users-guide/simulation-based-calibration.Rmd b/src/stan-users-guide/simulation-based-calibration.qmd
similarity index 99%
rename from src/stan-users-guide/simulation-based-calibration.Rmd
rename to src/stan-users-guide/simulation-based-calibration.qmd
index 01c0e1408..f367feae2 100644
--- a/src/stan-users-guide/simulation-based-calibration.Rmd
+++ b/src/stan-users-guide/simulation-based-calibration.qmd
@@ -545,5 +545,5 @@ This is because Stan's no-U-turn sampler is unable to sample with the
 model formulated in the centered parameterization---the posterior
 geometry has regions of extremely high curvature as $\tau$ approaches
 zero and the $\theta_j$ become highly constrained.  The [chapter on
-reparameterization](reparameterization.Rmd) explains how to
+reparameterization](reparameterization.qmd) explains how to
 remedy this problem and fit this kind of hierarchical model with Stan.
diff --git a/src/stan-users-guide/sparse-ragged.Rmd b/src/stan-users-guide/sparse-ragged.qmd
similarity index 81%
rename from src/stan-users-guide/sparse-ragged.Rmd
rename to src/stan-users-guide/sparse-ragged.qmd
index 8fbebd100..0a2bf3e6c 100644
--- a/src/stan-users-guide/sparse-ragged.Rmd
+++ b/src/stan-users-guide/sparse-ragged.qmd
@@ -3,8 +3,6 @@ pagetitle: Sparse and Ragged Data Structures
 ---
 
 # Sparse and Ragged Data Structures  {#sparse-ragged.chapter}
-```{r include, file="before-chapter.R", echo=FALSE, message=FALSE, warning=FALSE}
-```
 
 Stan does not directly support either sparse or ragged data
 structures, though both can be accommodated with some programming
@@ -18,7 +16,7 @@ covers more general data structures.
 Coding sparse data structures is as easy as moving from a matrix-like
 data structure to a database-like data structure.  For example,
 consider the coding of sparse data for the IRT models discussed in the
-[item-response model section](regression.Rmd#item-response-models.section).
+[item-response model section](regression.qmd#item-response-models.section).
 There are $J$ students and $K$ questions, and if every student answers every
 question, then it is practical to declare the data as a $J \times K$
 array of answers.
@@ -46,11 +44,6 @@ will no longer work, because Stan does not support undefined values.
 The following missing data example shows an example with $J=3$ and $K=4$,
 with missing responses shown as NA, as in R.
 
-```{r results='asis', echo=FALSE}
-cat(ifelse(knitr::is_html_output(),"", "\n\\begin{figure}[!h]"))
-```
-
-
 \begin{equation*}
 y
 =
@@ -65,10 +58,6 @@ y
 \right]
 \end{equation*}
 
-```{r results='asis', echo=FALSE}
-cat(ifelse(knitr::is_html_output(),"", "\n\\end{figure}"))
-```
-
 There is no support within Stan for R's NA values, so this data structure cannot be used
 directly. Instead, it must be converted to a "long form" as in a database,
 with columns indicating the indices along with the value.
@@ -80,19 +69,14 @@ As the array becomes larger and sparser, the long form becomes
 the more economical encoding.
 
 
-```{r sparsedata, echo=FALSE}
-df <- read.table(text="
-jj   | kk   | y
-1    | 1    | 0
-1    | 2    | 1
-1    | 4    | 1
-2    | 1    | 0
-2    | 4    | 1
-3    | 2    | 0
-", sep="|", header=TRUE, check.names=FALSE)
-kable(df, booktabs=TRUE, escape=FALSE, linesep="") %>%
-  kable_styling(full_width=FALSE)
-```
+|*jj* | *kk* |*y* |
+|:----|:-----|:---|
+|1    | 1    | 0  |
+|1    | 2    | 1  |
+|1    | 4    | 1  |
+|2    | 1    | 0  |
+|2    | 4    | 1  |
+|3    | 2    | 0  |
 
 Letting $N$ be the number of $y$ that are defined, here $N=6$,
 the data and model can be formulated as follows.
@@ -133,33 +117,21 @@ a linear array.
 For example, consider a data structure for three groups, each of which
 has a different number of observations.
 
-```{r results='asis', echo=FALSE}
-cat(ifelse(knitr::is_html_output(),
-"<table><tr><td>",
-"\\begin{minipage}[c]{0.35\\textwidth}"))
-```
-$y_1 =  \left[1.3 \ \ 2.4 \ \ 0.9\right]$
 
-$y_2 = \left[-1.8 \ \ -0.1\right]$
+:::: {layout-ncol=2}
 
+::: {#first-column}
+$y_1 =  \left[1.3 \ \ 2.4 \ \ 0.9\right]\\$
+$y_2 = \left[-1.8 \ \ -0.1\right]\\$
 $y_3 = \left[12.9 \ \ 18.7 \ \ 42.9 \ \ 4.7\right]$
+:::
 
-```{r results='asis', echo=FALSE}
-cat(ifelse(knitr::is_html_output(),
-"</td><td>",
-"\\end{minipage} \\begin{minipage}[c]{0.60\\textwidth}"))
-```
-
-$z = [1.3 \ \ 2.4 \ \ 0.9 \ \ -1.8 \ \ -0.1 \ \ 12.9 \ \ 18.7 \ \ 42.9 \ \ 4.7]$
-
+::: {#second-column}
+$z = [1.3 \ \ 2.4 \ \ 0.9 \ \ -1.8 \ \ -0.1 \ \ 12.9 \ \ 18.7 \ \ 42.9 \ \ 4.7]\\$
 $s  =  \{ 3 \ \ 2 \ \ 4 \}$
+:::
 
-
-```{r results='asis', echo=FALSE}
-cat(ifelse(knitr::is_html_output(),
-"</td></tr></table>",
-"\\end{minipage}"))
-```
+::::
 
 On the left is the definition of a ragged data structure $y$ with three rows of
 different sizes ($y_1$ is size 3, $y_2$ size 2, and $y_3$ size 4).  On the right
diff --git a/src/stan-users-guide/style-guide.Rmd b/src/stan-users-guide/style-guide.qmd
similarity index 99%
rename from src/stan-users-guide/style-guide.Rmd
rename to src/stan-users-guide/style-guide.qmd
index cb0e188c5..ac68eff00 100644
--- a/src/stan-users-guide/style-guide.Rmd
+++ b/src/stan-users-guide/style-guide.qmd
@@ -297,7 +297,7 @@ aligned under each other.
 
 Function documentation should follow the Javadoc and Doxygen styles.
 Here's an example repeated from the [documenting functions
-section](user-functions.Rmd#documenting-functions.section).
+section](user-functions.qmd#documenting-functions.section).
 
 ``` stan
 /**
diff --git a/src/stan-users-guide/survival.Rmd b/src/stan-users-guide/survival.qmd
similarity index 100%
rename from src/stan-users-guide/survival.Rmd
rename to src/stan-users-guide/survival.qmd
diff --git a/src/stan-users-guide/time-series.Rmd b/src/stan-users-guide/time-series.qmd
similarity index 99%
rename from src/stan-users-guide/time-series.Rmd
rename to src/stan-users-guide/time-series.qmd
index 6782d1e48..9b54b68c6 100644
--- a/src/stan-users-guide/time-series.Rmd
+++ b/src/stan-users-guide/time-series.qmd
@@ -9,7 +9,7 @@ Times series data come arranged in temporal order.  This chapter
 presents two kinds of time series models, regression-like models such
 as autoregressive and moving average models, and hidden Markov models.
 
-The [Gaussian processes chapter](gaussian-processes.Rmd) presents
+The [Gaussian processes chapter](gaussian-processes.qmd) presents
 Gaussian processes, which may also be used for time-series (and
 spatial) data.
 
@@ -88,7 +88,7 @@ coefficient `beta` may be constrained with bounds as follows.
 real<lower=-1, upper=1> beta;
 ```
 
-In practice, such a constraint is not recommended.  If the data are not well fit by a 
+In practice, such a constraint is not recommended.  If the data are not well fit by a
 stationary model it is best to know this.
 Stationary parameter estimates can be encouraged with a prior favoring
 values of `beta` near zero.
@@ -179,7 +179,7 @@ parameters {
 }
 model {
   for (t in 2:T) {
-    r[t] ~ normal(mu, sqrt(alpha0 + alpha1 
+    r[t] ~ normal(mu, sqrt(alpha0 + alpha1
                                     * pow(r[t - 1] - mu,2)));
   }
 }
@@ -849,7 +849,7 @@ model {
   for (t in 2:T_unsup) {
     for (k in 1:K) {
       for (j in 1:K) {
-        acc[j] = gamma[t - 1, j] + log(theta[j, k]) 
+        acc[j] = gamma[t - 1, j] + log(theta[j, k])
                  + log(phi[k, u[t]]);
       }
       gamma[t, k] = log_sum_exp(acc);
diff --git a/src/stan-users-guide/truncation-censoring.Rmd b/src/stan-users-guide/truncation-censoring.qmd
similarity index 100%
rename from src/stan-users-guide/truncation-censoring.Rmd
rename to src/stan-users-guide/truncation-censoring.qmd
diff --git a/src/stan-users-guide/user-functions.Rmd b/src/stan-users-guide/user-functions.qmd
similarity index 99%
rename from src/stan-users-guide/user-functions.Rmd
rename to src/stan-users-guide/user-functions.qmd
index f80c03e8b..9565e17e0 100644
--- a/src/stan-users-guide/user-functions.Rmd
+++ b/src/stan-users-guide/user-functions.qmd
@@ -193,7 +193,7 @@ This qualifier should be used when writing functions that call the
 built-in ordinary differential equation (ODE) solvers, algebraic
 solvers, or map functions.  These higher-order functions have strictly
 specified signatures where some arguments of are data only
-expressions. (See the [ODE solver chapter](odes.Rmd) for
+expressions. (See the [ODE solver chapter](odes.qmd) for
 more usage details and the functions reference manual for full
 definitions.)  When writing a function which calls the ODE or
 algebraic solver, arguments to that function which are passed into the
@@ -257,7 +257,7 @@ the transformed parameters and model blocks.
 Here is an example of a function to assign standard normal priors to a
 vector of coefficients, along with a center and scale, and return the
 translated and scaled coefficients; see the [reparameterization
-section](efficiency-tuning.Rmd#reparameterization.section) for more information on
+section](efficiency-tuning.qmd#reparameterization.section) for more information on
 efficient non-centered parameterizations
 
 ```stan
diff --git a/src/stan-users-guide/using-stanc.Rmd b/src/stan-users-guide/using-stanc.qmd
similarity index 100%
rename from src/stan-users-guide/using-stanc.Rmd
rename to src/stan-users-guide/using-stanc.qmd