diff --git a/pymc/sampling.py b/pymc/sampling.py
index 3a54b59886..ff4bb18f16 100644
--- a/pymc/sampling.py
+++ b/pymc/sampling.py
@@ -61,7 +61,7 @@
 from pymc.backends.base import BaseTrace, MultiTrace
 from pymc.backends.ndarray import NDArray
 from pymc.blocking import DictToArrayBijection
-from pymc.exceptions import IncorrectArgumentsError, SamplingError
+from pymc.exceptions import SamplingError
 from pymc.initial_point import (
     PointType,
     StartDict,
@@ -1769,10 +1769,8 @@ def expand(node):
 
 def sample_posterior_predictive(
     trace,
-    samples: Optional[int] = None,
     model: Optional[Model] = None,
     var_names: Optional[List[str]] = None,
-    keep_size: Optional[bool] = None,
     random_seed: RandomState = None,
     progressbar: bool = True,
     return_inferencedata: bool = True,
@@ -1788,25 +1786,11 @@ def sample_posterior_predictive(
     trace : backend, list, xarray.Dataset, arviz.InferenceData, or MultiTrace
         Trace generated from MCMC sampling, or a list of dicts (eg. points or from find_MAP()),
         or xarray.Dataset (eg. InferenceData.posterior or InferenceData.prior)
-    samples : int
-        Number of posterior predictive samples to generate. Defaults to one posterior predictive
-        sample per posterior sample, that is, the number of draws times the number of chains.
-
-        It is not recommended to modify this value; when modified, some chains may not be
-        represented in the posterior predictive sample. Instead, in cases when generating
-        posterior predictive samples is too expensive to do it once per posterior sample,
-        the recommended approach is to thin the ``trace`` argument
-        before passing it to ``sample_posterior_predictive``. In such cases it
-        might be advisable to set ``extend_inferencedata`` to ``False`` and extend
-        the inferencedata manually afterwards.
     model : Model (optional if in ``with`` context)
         Model to be used to generate the posterior predictive samples. It will
         generally be the model used to generate the ``trace``, but it doesn't need to be.
     var_names : Iterable[str]
         Names of variables for which to compute the posterior predictive samples.
-    keep_size : bool, default True
-        Force posterior predictive sample to have the same shape as posterior and sample stats
-        data: ``(nchains, ndraws, ...)``. Overrides samples parameter.
     random_seed : int, RandomState or Generator, optional
         Seed for the random number generator.
     progressbar : bool
@@ -1882,38 +1866,18 @@ def sample_posterior_predictive(
     else:
         raise TypeError(f"Unsupported type for `trace` argument: {type(trace)}.")
 
-    if keep_size is None:
-        # This will allow users to set return_inferencedata=False and
-        # automatically get the old behaviour instead of needing to
-        # set both return_inferencedata and keep_size to False
-        keep_size = return_inferencedata
-
-    if keep_size and samples is not None:
-        raise IncorrectArgumentsError(
-            "Should not specify both keep_size and samples arguments. "
-            "See the docstring of the samples argument for more details."
+    if isinstance(_trace, MultiTrace):
+        samples = sum(len(v) for v in _trace._straces.values())
+    elif isinstance(_trace, list):
+        # this is a list of points
+        samples = len(_trace)
+    else:
+        raise TypeError(
+            "Do not know how to compute number of samples for trace argument of type %s"
+            % type(_trace)
         )
 
-    if samples is None:
-        if isinstance(_trace, MultiTrace):
-            samples = sum(len(v) for v in _trace._straces.values())
-        elif isinstance(_trace, list):
-            # this is a list of points
-            samples = len(_trace)
-        else:
-            raise TypeError(
-                "Do not know how to compute number of samples for trace argument of type %s"
-                % type(_trace)
-            )
-
     assert samples is not None
-    if samples < len_trace * nchain:
-        warnings.warn(
-            "samples parameter is smaller than nchains times ndraws, some draws "
-            "and/or chains may not be represented in the returned posterior "
-            "predictive sample",
-            stacklevel=2,
-        )
 
     model = modelcontext(model)
 
@@ -2001,9 +1965,9 @@ def sample_posterior_predictive(
         pass
 
     ppc_trace = ppc_trace_t.trace_dict
-    if keep_size:
-        for k, ary in ppc_trace.items():
-            ppc_trace[k] = ary.reshape((nchain, len_trace, *ary.shape[1:]))
+
+    for k, ary in ppc_trace.items():
+        ppc_trace[k] = ary.reshape((nchain, len_trace, *ary.shape[1:]))
 
     if not return_inferencedata:
         return ppc_trace
diff --git a/pymc/tests/backends/test_arviz.py b/pymc/tests/backends/test_arviz.py
index 0d42ac0fa2..ce287d0302 100644
--- a/pymc/tests/backends/test_arviz.py
+++ b/pymc/tests/backends/test_arviz.py
@@ -89,7 +89,7 @@ def get_predictions_inference_data(
         with data.model:
             prior = pm.sample_prior_predictive(return_inferencedata=False)
             posterior_predictive = pm.sample_posterior_predictive(
-                data.obj, keep_size=True, return_inferencedata=False
+                data.obj, return_inferencedata=False
             )
 
             idata = to_inference_data(
@@ -111,7 +111,7 @@ def make_predictions_inference_data(
     ) -> Tuple[InferenceData, Dict[str, np.ndarray]]:
         with data.model:
             posterior_predictive = pm.sample_posterior_predictive(
-                data.obj, keep_size=True, return_inferencedata=False
+                data.obj, return_inferencedata=False
             )
             idata = predictions_to_inference_data(
                 posterior_predictive,
@@ -190,7 +190,7 @@ def test_predictions_to_idata_new(self, data, eight_schools_params):
     def test_posterior_predictive_keep_size(self, data, chains, draws, eight_schools_params):
         with data.model:
             posterior_predictive = pm.sample_posterior_predictive(
-                data.obj, keep_size=True, return_inferencedata=False
+                data.obj, return_inferencedata=False
             )
             inference_data = to_inference_data(
                 trace=data.obj,
@@ -204,26 +204,6 @@ def test_posterior_predictive_keep_size(self, data, chains, draws, eight_schools
             [obs_s == s for obs_s, s in zip(shape, (chains, draws, eight_schools_params["J"]))]
         )
 
-    def test_posterior_predictive_warning(self, data, eight_schools_params, caplog):
-        with data.model:
-            with warnings.catch_warnings():
-                warnings.filterwarnings(
-                    "ignore", ".*smaller than nchains times ndraws.*", UserWarning
-                )
-                posterior_predictive = pm.sample_posterior_predictive(
-                    data.obj, 370, return_inferencedata=False, keep_size=False
-                )
-            with pytest.warns(UserWarning, match="shape of variables"):
-                inference_data = to_inference_data(
-                    trace=data.obj,
-                    posterior_predictive=posterior_predictive,
-                    coords={"school": np.arange(eight_schools_params["J"])},
-                    dims={"theta": ["school"], "eta": ["school"]},
-                )
-
-        shape = inference_data.posterior_predictive.obs.shape
-        assert np.all([obs_s == s for obs_s, s in zip(shape, (1, 370, eight_schools_params["J"]))])
-
     def test_posterior_predictive_thinned(self, data):
         with data.model:
             draws = 20
diff --git a/pymc/tests/distributions/test_mixture.py b/pymc/tests/distributions/test_mixture.py
index ca81a4c718..8d13deae88 100644
--- a/pymc/tests/distributions/test_mixture.py
+++ b/pymc/tests/distributions/test_mixture.py
@@ -546,7 +546,7 @@ def test_single_poisson_predictive_sampling_shape(self):
         with model:
             prior = sample_prior_predictive(samples=n_samples, return_inferencedata=False)
             ppc = sample_posterior_predictive(
-                [self.get_inital_point(model)], samples=n_samples, return_inferencedata=False
+                n_samples * [self.get_inital_point(model)], return_inferencedata=False
             )
 
         assert prior["like0"].shape == (n_samples, 20)
@@ -554,10 +554,10 @@ def test_single_poisson_predictive_sampling_shape(self):
         assert prior["like2"].shape == (n_samples, 20)
         assert prior["like3"].shape == (n_samples, 20)
 
-        assert ppc["like0"].shape == (n_samples, 20)
-        assert ppc["like1"].shape == (n_samples, 20)
-        assert ppc["like2"].shape == (n_samples, 20)
-        assert ppc["like3"].shape == (n_samples, 20)
+        assert ppc["like0"].shape == (1, n_samples, 20)
+        assert ppc["like1"].shape == (1, n_samples, 20)
+        assert ppc["like2"].shape == (1, n_samples, 20)
+        assert ppc["like3"].shape == (1, n_samples, 20)
 
     def test_list_mvnormals_predictive_sampling_shape(self):
         N = 100  # number of data points
@@ -592,9 +592,16 @@ def test_list_mvnormals_predictive_sampling_shape(self):
         with model:
             prior = sample_prior_predictive(samples=n_samples, return_inferencedata=False)
             ppc = sample_posterior_predictive(
-                [self.get_inital_point(model)], samples=n_samples, return_inferencedata=False
+                n_samples * [self.get_inital_point(model)], return_inferencedata=False
             )
-        assert ppc["x_obs"].shape == (n_samples,) + X.shape
+        assert (
+            ppc["x_obs"].shape
+            == (
+                1,
+                n_samples,
+            )
+            + X.shape
+        )
         assert prior["x_obs"].shape == (n_samples,) + X.shape
         assert prior["mu0"].shape == (n_samples, D)
         assert prior["chol_cov_0"].shape == (n_samples, D * (D + 1) // 2)
diff --git a/pymc/tests/distributions/test_simulator.py b/pymc/tests/distributions/test_simulator.py
index 7d04c79d81..4a6532af54 100644
--- a/pymc/tests/distributions/test_simulator.py
+++ b/pymc/tests/distributions/test_simulator.py
@@ -80,9 +80,7 @@ def test_one_gaussian(self):
         with self.SMABC_test:
             trace = pm.sample_smc(draws=1000, chains=1, return_inferencedata=False)
             pr_p = pm.sample_prior_predictive(1000, return_inferencedata=False)
-            po_p = pm.sample_posterior_predictive(
-                trace, keep_size=False, return_inferencedata=False
-            )
+            po_p = pm.sample_posterior_predictive(trace, return_inferencedata=False)
 
         assert abs(self.data.mean() - trace["a"].mean()) < 0.05
         assert abs(self.data.std() - trace["b"].mean()) < 0.05
@@ -91,7 +89,7 @@ def test_one_gaussian(self):
         assert abs(0 - pr_p["s"].mean()) < 0.15
         assert abs(1.4 - pr_p["s"].std()) < 0.10
 
-        assert po_p["s"].shape == (1000, 1000)
+        assert po_p["s"].shape == (1, 1000, 1000)
         assert abs(self.data.mean() - po_p["s"].mean()) < 0.10
         assert abs(self.data.std() - po_p["s"].std()) < 0.10
 
diff --git a/pymc/tests/test_model.py b/pymc/tests/test_model.py
index edb2888834..42c80a94a7 100644
--- a/pymc/tests/test_model.py
+++ b/pymc/tests/test_model.py
@@ -1263,13 +1263,18 @@ def test_interval_missing_observations():
 
         # Make sure that the observed values are newly generated samples and that
         # the observed and deterministic matche
-        pp_trace = pm.sample_posterior_predictive(
-            trace, return_inferencedata=False, keep_size=False
+        pp_idata = pm.sample_posterior_predictive(trace)
+        pp_trace = pp_idata.posterior_predictive.stack(sample=["chain", "draw"]).transpose(
+            "sample", ...
         )
         assert np.all(np.var(pp_trace["theta1"], 0) > 0.0)
         assert np.all(np.var(pp_trace["theta2"], 0) > 0.0)
-        assert np.mean(pp_trace["theta1"][:, ~obs1.mask] - pp_trace["theta1_observed"]) == 0.0
-        assert np.mean(pp_trace["theta2"][:, ~obs2.mask] - pp_trace["theta2_observed"]) == 0.0
+        assert np.isclose(
+            np.mean(pp_trace["theta1"][:, ~obs1.mask] - pp_trace["theta1_observed"]), 0
+        )
+        assert np.isclose(
+            np.mean(pp_trace["theta2"][:, ~obs2.mask] - pp_trace["theta2_observed"]), 0
+        )
 
 
 def test_double_counting():
diff --git a/pymc/tests/test_sampling.py b/pymc/tests/test_sampling.py
index b4365d0c51..9c06b4244f 100644
--- a/pymc/tests/test_sampling.py
+++ b/pymc/tests/test_sampling.py
@@ -43,7 +43,7 @@
 from pymc.backends.base import MultiTrace
 from pymc.backends.ndarray import NDArray
 from pymc.distributions import transforms
-from pymc.exceptions import IncorrectArgumentsError, SamplingError
+from pymc.exceptions import SamplingError
 from pymc.sampling import (
     _get_seeds_per_chain,
     assign_step_methods,
@@ -622,13 +622,12 @@ def test_normal_scalar(self):
             trace = pm.sample(
                 draws=ndraws,
                 chains=nchains,
-                return_inferencedata=False,
             )
 
         with model:
             # test list input
             ppc0 = pm.sample_posterior_predictive(
-                [model.initial_point()], samples=10, return_inferencedata=False
+                10 * [model.initial_point()], return_inferencedata=False
             )
             # # deprecated argument is not introduced to fast version [2019/08/20:rpg]
             ppc = pm.sample_posterior_predictive(trace, var_names=["a"], return_inferencedata=False)
@@ -637,15 +636,18 @@ def test_normal_scalar(self):
             assert len(ppc) == 0
 
             # test keep_size parameter
-            ppc = pm.sample_posterior_predictive(trace, keep_size=True, return_inferencedata=False)
+            ppc = pm.sample_posterior_predictive(trace, return_inferencedata=False)
             assert ppc["a"].shape == (nchains, ndraws)
 
             # test default case
-            ppc = pm.sample_posterior_predictive(trace, var_names=["a"], return_inferencedata=False)
+            idata_ppc = pm.sample_posterior_predictive(trace, var_names=["a"])
+            ppc = idata_ppc.posterior_predictive
             assert "a" in ppc
-            assert ppc["a"].shape == (nchains * ndraws,)
+            assert ppc["a"].shape == (nchains, ndraws)
             # mu's standard deviation may have changed thanks to a's observed
-            _, pval = stats.kstest(ppc["a"] - trace["mu"], stats.norm(loc=0, scale=1).cdf)
+            _, pval = stats.kstest(
+                (ppc["a"] - trace.posterior["mu"]).values.flatten(), stats.norm(loc=0, scale=1).cdf
+            )
             assert pval > 0.001
 
     def test_normal_scalar_idata(self):
@@ -670,45 +672,27 @@ def test_normal_scalar_idata(self):
             idata = pm.to_inference_data(trace)
             assert isinstance(idata, InferenceData)
 
-            ppc = pm.sample_posterior_predictive(idata, keep_size=True, return_inferencedata=False)
+            ppc = pm.sample_posterior_predictive(idata, return_inferencedata=False)
             assert ppc["a"].shape == (nchains, ndraws)
 
     def test_normal_vector(self):
         with pm.Model() as model:
             mu = pm.Normal("mu", 0.0, 1.0)
             a = pm.Normal("a", mu=mu, sigma=1, observed=np.array([0.5, 0.2]))
-            trace = pm.sample(return_inferencedata=False)
+            trace = pm.sample(return_inferencedata=False, draws=12, chains=1)
 
         with model:
             # test list input
             ppc0 = pm.sample_posterior_predictive(
-                [model.initial_point()], return_inferencedata=False, samples=10
+                10 * [model.initial_point()],
+                return_inferencedata=False,
             )
-            with warnings.catch_warnings():
-                warnings.filterwarnings(
-                    "ignore", ".*smaller than nchains times ndraws.*", UserWarning
-                )
-                ppc = pm.sample_posterior_predictive(
-                    trace, return_inferencedata=False, samples=12, var_names=[]
-                )
+            ppc = pm.sample_posterior_predictive(trace, return_inferencedata=False, var_names=[])
             assert len(ppc) == 0
 
-            # test keep_size parameter
-            ppc = pm.sample_posterior_predictive(trace, return_inferencedata=False, keep_size=True)
+            ppc = pm.sample_posterior_predictive(trace, return_inferencedata=False)
             assert ppc["a"].shape == (trace.nchains, len(trace), 2)
-            with pytest.warns(UserWarning):
-                ppc = pm.sample_posterior_predictive(
-                    trace, return_inferencedata=False, samples=12, var_names=["a"]
-                )
-            assert "a" in ppc
-            assert ppc["a"].shape == (12, 2)
-
-            with pytest.warns(UserWarning):
-                ppc = pm.sample_posterior_predictive(
-                    trace, return_inferencedata=False, samples=12, var_names=["a"]
-                )
-            assert "a" in ppc
-            assert ppc["a"].shape == (12, 2)
+            assert ppc0["a"].shape == (1, 10, 2)
 
     def test_normal_vector_idata(self):
         with pm.Model() as model:
@@ -723,7 +707,7 @@ def test_normal_vector_idata(self):
             idata = pm.to_inference_data(trace)
             assert isinstance(idata, InferenceData)
 
-            ppc = pm.sample_posterior_predictive(idata, return_inferencedata=False, keep_size=True)
+            ppc = pm.sample_posterior_predictive(idata, return_inferencedata=False)
             assert ppc["a"].shape == (trace.nchains, len(trace), 2)
 
     def test_exceptions(self):
@@ -733,59 +717,31 @@ def test_exceptions(self):
             idata = pm.sample(idata_kwargs={"log_likelihood": False})
 
         with model:
-            with pytest.raises(IncorrectArgumentsError):
-                ppc = pm.sample_posterior_predictive(idata, samples=10, keep_size=True)
-
             # test wrong type argument
             bad_trace = {"mu": stats.norm.rvs(size=1000)}
             with pytest.raises(TypeError, match="type for `trace`"):
                 ppc = pm.sample_posterior_predictive(bad_trace)
 
-    def test_vector_observed(self):
-        with pm.Model() as model:
-            mu = pm.Normal("mu", mu=0, sigma=1)
-            a = pm.Normal("a", mu=mu, sigma=1, observed=np.array([0.0, 1.0]))
-            idata = pm.sample(idata_kwargs={"log_likelihood": False})
-
-        with model:
-            # test list input
-            # ppc0 = pm.sample_posterior_predictive([model.initial_point], samples=10)
-            # TODO: Assert something about the output
-            # ppc = pm.sample_posterior_predictive(idata, samples=12, var_names=[])
-            # assert len(ppc) == 0
-            with warnings.catch_warnings():
-                warnings.filterwarnings(
-                    "ignore", ".*smaller than nchains times ndraws.*", UserWarning
-                )
-                ppc = pm.sample_posterior_predictive(
-                    idata, return_inferencedata=False, samples=12, var_names=["a"]
-                )
-            assert "a" in ppc
-            assert ppc["a"].shape == (12, 2)
-
     def test_sum_normal(self):
         with pm.Model() as model:
             a = pm.Normal("a", sigma=0.2)
             b = pm.Normal("b", mu=a)
-            idata = pm.sample()
+            idata = pm.sample(draws=1000, chains=1)
 
         with model:
             # test list input
-            with warnings.catch_warnings():
-                warnings.filterwarnings(
-                    "ignore", ".*smaller than nchains times ndraws.*", UserWarning
-                )
-                ppc0 = pm.sample_posterior_predictive(
-                    [model.initial_point()], return_inferencedata=False, samples=10
-                )
-                assert ppc0 == {}
-                ppc = pm.sample_posterior_predictive(
-                    idata, return_inferencedata=False, samples=1000, var_names=["b"]
-                )
+            ppc0 = pm.sample_posterior_predictive(
+                10 * [model.initial_point()], return_inferencedata=False
+            )
+            assert ppc0 == {}
+            ppc = pm.sample_posterior_predictive(idata, return_inferencedata=False, var_names=["b"])
             assert len(ppc) == 1
-            assert ppc["b"].shape == (1000,)
+            assert ppc["b"].shape == (
+                1,
+                1000,
+            )
             scale = np.sqrt(1 + 0.2**2)
-            _, pval = stats.kstest(ppc["b"], stats.norm(scale=scale).cdf)
+            _, pval = stats.kstest(ppc["b"].flatten(), stats.norm(scale=scale).cdf)
             assert pval > 0.001
 
     def test_model_not_drawable_prior(self):
@@ -803,12 +759,8 @@ def test_model_not_drawable_prior(self):
             with pytest.raises(NotImplementedError) as excinfo:
                 pm.sample_prior_predictive(50)
             assert "Cannot sample" in str(excinfo.value)
-            with warnings.catch_warnings():
-                warnings.filterwarnings(
-                    "ignore", ".*smaller than nchains times ndraws.*", UserWarning
-                )
-                samples = pm.sample_posterior_predictive(idata, 40, return_inferencedata=False)
-            assert samples["foo"].shape == (40, 200)
+            samples = pm.sample_posterior_predictive(idata, return_inferencedata=False)
+            assert samples["foo"].shape == (1, 40, 200)
 
     def test_model_shared_variable(self):
         rng = np.random.RandomState(9832)
@@ -817,30 +769,30 @@ def test_model_shared_variable(self):
         y = x > 0
         x_shared = aesara.shared(x)
         y_shared = aesara.shared(y)
+        samples = 100
         with pm.Model() as model:
             coeff = pm.Normal("x", mu=0, sigma=1)
             logistic = pm.Deterministic("p", pm.math.sigmoid(coeff * x_shared))
 
             obs = pm.Bernoulli("obs", p=logistic, observed=y_shared)
             trace = pm.sample(
-                100, return_inferencedata=False, compute_convergence_checks=False, random_seed=rng
+                samples,
+                chains=1,
+                return_inferencedata=False,
+                compute_convergence_checks=False,
+                random_seed=rng,
             )
 
         x_shared.set_value([-1, 0, 1.0])
         y_shared.set_value([0, 0, 0])
 
-        samples = 100
         with model:
-            with warnings.catch_warnings():
-                warnings.filterwarnings(
-                    "ignore", ".*smaller than nchains times ndraws.*", UserWarning
-                )
-                post_pred = pm.sample_posterior_predictive(
-                    trace, return_inferencedata=False, samples=samples, var_names=["p", "obs"]
-                )
+            post_pred = pm.sample_posterior_predictive(
+                trace, return_inferencedata=False, var_names=["p", "obs"]
+            )
 
-        expected_p = np.array([logistic.eval({coeff: val}) for val in trace["x"][:samples]])
-        assert post_pred["obs"].shape == (samples, 3)
+        expected_p = np.array([[logistic.eval({coeff: val}) for val in trace["x"][:samples]]])
+        assert post_pred["obs"].shape == (1, samples, 3)
         npt.assert_allclose(post_pred["p"], expected_p)
 
     def test_deterministic_of_observed(self):
@@ -877,7 +829,6 @@ def test_deterministic_of_observed(self):
                 return_inferencedata=False,
                 model=model,
                 trace=trace,
-                samples=len(trace) * nchains,
                 random_seed=0,
                 var_names=[var.name for var in (model.deterministics + model.basic_RVs)],
             )
@@ -917,7 +868,6 @@ def test_deterministic_of_observed_modified_interface(self):
                 return_inferencedata=False,
                 model=model,
                 trace=ppc_trace,
-                samples=len(ppc_trace),
                 var_names=[x.name for x in (model.deterministics + model.basic_RVs)],
             )
 
@@ -935,11 +885,7 @@ def test_variable_type(self):
                 )
 
         with model:
-            with warnings.catch_warnings():
-                warnings.filterwarnings(
-                    "ignore", ".*smaller than nchains times ndraws.*", UserWarning
-                )
-                ppc = pm.sample_posterior_predictive(trace, return_inferencedata=False, samples=1)
+            ppc = pm.sample_posterior_predictive(trace, return_inferencedata=False)
             assert ppc["a"].dtype.kind == "f"
             assert ppc["b"].dtype.kind == "i"
 
@@ -1429,11 +1375,11 @@ def test_multivariate2(self):
             return_inferencedata=False, samples=20, model=dm_model
         )
         sim_ppc = pm.sample_posterior_predictive(
-            burned_trace, return_inferencedata=False, samples=20, model=dm_model
+            burned_trace, return_inferencedata=False, model=dm_model
         )
         assert sim_priors["probs"].shape == (20, 6)
         assert sim_priors["obs"].shape == (20,) + mn_data.shape
-        assert sim_ppc["obs"].shape == (20,) + mn_data.shape
+        assert sim_ppc["obs"].shape == (1, 20) + mn_data.shape
 
     def test_layers(self):
         with pm.Model() as model: