diff --git a/src/Control/Monad/Bayes/Population.hs b/src/Control/Monad/Bayes/Population.hs index 9dff3152..1ad9c97a 100644 --- a/src/Control/Monad/Bayes/Population.hs +++ b/src/Control/Monad/Bayes/Population.hs @@ -27,6 +27,8 @@ module Control.Monad.Bayes.Population resampleSystematic, stratified, resampleStratified, + onlyBelowEffectiveSampleSize, + effectiveSampleSize, extractEvidence, pushEvidence, proper, @@ -67,7 +69,7 @@ import Numeric.Log qualified as Log import Prelude hiding (all, sum) -- | The old-fashioned, broken list transformer, adding a list/nondeterminism/choice effect. --- It is not a valid monad transformer, but it is a valid 'Applicative'. +-- It is not a valid monad transformer, but it is a valid 'Applicative'. newtype ListT m a = ListT {getListT :: Compose m [] a} deriving newtype (Functor, Applicative, Alternative) @@ -244,6 +246,43 @@ resampleMultinomial :: PopulationT m a resampleMultinomial = resampleGeneric multinomial +-- ** Effective sample size + +-- | Only use the given resampler when the effective sample size is below a certain threshold. +-- +-- See 'withEffectiveSampleSize'. +onlyBelowEffectiveSampleSize :: + (MonadDistribution m) => + -- | The threshold under which the effective sample size must fall before the resampler is used. + -- For example, this may be half of the number of particles. + Double -> + -- | The resampler to user under the threshold + (forall n. (MonadDistribution n) => PopulationT n a -> PopulationT n a) -> + -- | The new resampler + (PopulationT m a -> PopulationT m a) +onlyBelowEffectiveSampleSize threshold resampler pop = fromWeightedList $ do + (as, ess) <- withEffectiveSampleSize pop + if ess < threshold then runPopulationT $ resampler $ fromWeightedList $ pure as else return as + +-- | Compute the effective sample size of a population from the weights. +-- +-- See https://en.wikipedia.org/wiki/Design_effect#Effective_sample_size +effectiveSampleSize :: (Functor m) => PopulationT m a -> m Double +effectiveSampleSize = fmap snd . withEffectiveSampleSize + +-- | Compute the effective sample size alongside the samples themselves. +-- +-- The advantage over 'effectiveSampleSize' is that the samples need not be created a second time. +withEffectiveSampleSize :: (Functor m) => PopulationT m a -> m ([(a, Log Double)], Double) +withEffectiveSampleSize = fmap (\as -> (as, effectiveSampleSizeKish $ (exp . ln . snd) <$> as)) . runPopulationT + where + effectiveSampleSizeKish :: [Double] -> Double + effectiveSampleSizeKish weights = square (Data.List.sum weights) / Data.List.sum (square <$> weights) + square :: Double -> Double + square x = x * x + +-- ** Utility functions + -- | Separate the sum of weights into the 'WeightedT' transformer. -- Weights are normalized after this operation. extractEvidence ::