Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Statistically verify result #288

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions hedgehog-example/hedgehog-example.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ library

exposed-modules:
Test.Example.Basic
, Test.Example.Confidence
, Test.Example.Coverage
, Test.Example.Exception
, Test.Example.List
Expand Down
25 changes: 25 additions & 0 deletions hedgehog-example/src/Test/Example/Confidence.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
module Test.Example.Confidence where

import Hedgehog
import qualified Hedgehog.Range as Range
import qualified Hedgehog.Internal.Gen as Gen

------------------------------------------------------------------------
-- Example 0: This test will certify that it is impossible to get 60%
-- coverage for the property label "number == 1"
--
-- Note that it will abort running once it knows its task is
-- impossible - it will not run 1000000 tests
--
prop_without_confidence :: Property
prop_without_confidence =
verifiedTermination . withConfidence (10^9) . withTests 1000000 . property $ do
number <- forAll (Gen.int $ Range.constant 1 2)
cover 60 "number == 1" $ number == 1

------------------------------------------------------------------------
tests :: IO Bool
tests =
checkSequential $$(discover)
20 changes: 11 additions & 9 deletions hedgehog-example/test/test.hs
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
import System.IO (BufferMode(..), hSetBuffering, stdout, stderr)

import qualified Test.Example.Basic as Test.Example.Basic
import qualified Test.Example.Coverage as Test.Example.Coverage
import qualified Test.Example.Exception as Test.Example.Exception
import qualified Test.Example.QuickCheck as Test.Example.QuickCheck
import qualified Test.Example.References as Test.Example.References
import qualified Test.Example.Registry as Test.Example.Registry
import qualified Test.Example.Resource as Test.Example.Resource
import qualified Test.Example.Roundtrip as Test.Example.Roundtrip
import qualified Test.Example.STLC as Test.Example.STLC
import qualified Test.Example.Basic
import qualified Test.Example.Confidence
import qualified Test.Example.Coverage
import qualified Test.Example.Exception
import qualified Test.Example.QuickCheck
import qualified Test.Example.References
import qualified Test.Example.Registry
import qualified Test.Example.Resource
import qualified Test.Example.Roundtrip
import qualified Test.Example.STLC

main :: IO ()
main = do
Expand All @@ -17,6 +18,7 @@ main = do

_results <- sequence [
Test.Example.Basic.tests
, Test.Example.Confidence.tests
, Test.Example.Coverage.tests
, Test.Example.Exception.tests
, Test.Example.QuickCheck.tests
Expand Down
2 changes: 2 additions & 0 deletions hedgehog/hedgehog.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ library
, concurrent-output >= 1.7 && < 1.11
, containers >= 0.4 && < 0.7
, directory >= 1.2 && < 1.4
, erf >= 2.0 && < 2.1
, exceptions >= 0.7 && < 0.11
, fail >= 4.9 && < 5
, lifted-async >= 0.7 && < 0.11
Expand Down Expand Up @@ -126,6 +127,7 @@ test-suite test

other-modules:
Test.Hedgehog.Applicative
Test.Hedgehog.Confidence
Test.Hedgehog.Filter
Test.Hedgehog.Seed
Test.Hedgehog.Text
Expand Down
5 changes: 5 additions & 0 deletions hedgehog/src/Hedgehog.hs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ module Hedgehog (
, checkParallel
, checkSequential

, Confidence
, verifiedTermination
, withConfidence

, withTests
, TestLimit

Expand Down Expand Up @@ -167,6 +171,7 @@ import Hedgehog.Internal.Property (forAll, forAllWith)
import Hedgehog.Internal.Property (LabelName, MonadTest(..))
import Hedgehog.Internal.Property (Property, PropertyT, PropertyName)
import Hedgehog.Internal.Property (Group(..), GroupName)
import Hedgehog.Internal.Property (Confidence, verifiedTermination, withConfidence)
import Hedgehog.Internal.Property (ShrinkLimit, withShrinks)
import Hedgehog.Internal.Property (ShrinkRetries, withRetries)
import Hedgehog.Internal.Property (Test, TestT, property, test)
Expand Down
153 changes: 146 additions & 7 deletions hedgehog/src/Hedgehog/Internal/Property.hs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TemplateHaskell #-}
Expand Down Expand Up @@ -41,6 +42,7 @@ module Hedgehog.Internal.Property (
, forAllT
, forAllWith
, forAllWithT
, defaultMinTests
, discard

-- * Group
Expand Down Expand Up @@ -92,6 +94,15 @@ module Hedgehog.Internal.Property (
, CoverPercentage(..)
, toCoverCount

-- * Confidence
, Confidence(..)
, TerminationCriteria(..)
, confidenceSuccess
, confidenceFailure
, withConfidence
, verifiedTermination
, defaultConfidence

-- * Internal
-- $internal
, defaultConfig
Expand All @@ -105,6 +116,8 @@ module Hedgehog.Internal.Property (
, mkTestT
, runTest
, runTestT

, wilsonBounds
) where

import Control.Applicative (Alternative(..))
Expand Down Expand Up @@ -139,11 +152,14 @@ import qualified Control.Monad.Trans.Writer.Strict as Strict

import qualified Data.Char as Char
import Data.Functor.Identity (Identity(..))
import Data.Int (Int64)
import Data.Map (Map)
import qualified Data.Map.Strict as Map
import Data.Number.Erf (invnormcdf)
import qualified Data.List as List
import Data.Semigroup (Semigroup(..))
import Data.String (IsString)
import Data.Ratio ((%))
import Data.Typeable (typeOf)

import Hedgehog.Internal.Distributive
Expand Down Expand Up @@ -221,14 +237,23 @@ newtype PropertyName =
unPropertyName :: String
} deriving (Eq, Ord, Show, IsString, Semigroup, Lift)

-- | The acceptable occurrence of false positives
--
-- Example, @Confidence 10^9@ would mean that you'd accept a false positive
-- for 1 in 10^9 tests.
newtype Confidence =
Confidence {
unConfidence :: Int64
} deriving (Eq, Ord, Show, Num, Lift)

-- | Configuration for a property test.
--
data PropertyConfig =
PropertyConfig {
propertyTestLimit :: !TestLimit
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removing this field broke the API between 1.0.1 and 1.0.2 😞

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where's MIMA when you need it 😢

Copy link
Member

@moodmosaic moodmosaic Mar 17, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was it removed by accident? Also—it's time to start adding some more automated tests.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think everything in Hedgehog.Internal.Property is considered internal.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was it removed for a reason though? (We do try not to break other peoples code if possible.)

, propertyDiscardLimit :: !DiscardLimit
propertyDiscardLimit :: !DiscardLimit
, propertyShrinkLimit :: !ShrinkLimit
, propertyShrinkRetries :: !ShrinkRetries
, propertyTerminationCriteria :: !TerminationCriteria
} deriving (Eq, Ord, Show, Lift)

-- | The number of successful tests that need to be run before a property test
Expand Down Expand Up @@ -333,6 +358,12 @@ newtype PropertyCount =
PropertyCount Int
deriving (Eq, Ord, Show, Num, Enum, Real, Integral)

data TerminationCriteria =
EarlyTermination Confidence TestLimit
| NoEarlyTermination Confidence TestLimit
| NoConfidenceTermination TestLimit
deriving (Eq, Ord, Show, Lift)

--
-- FIXME This whole Log/Failure thing could be a lot more structured to allow
-- FIXME for richer user controlled error messages, think Doc. Ideally we'd
Expand Down Expand Up @@ -403,7 +434,7 @@ newtype CoverCount =
newtype CoverPercentage =
CoverPercentage {
unCoverPercentage :: Double
} deriving (Eq, Ord, Show, Num)
} deriving (Eq, Ord, Show, Num, Fractional)

-- | The name of a classifier.
--
Expand Down Expand Up @@ -890,22 +921,60 @@ test =
defaultConfig :: PropertyConfig
defaultConfig =
PropertyConfig {
propertyTestLimit =
100
, propertyDiscardLimit =
propertyDiscardLimit =
100
, propertyShrinkLimit =
1000
, propertyShrinkRetries =
0
, propertyTerminationCriteria =
NoConfidenceTermination defaultMinTests
}

-- | The minimum amount of tests to run for a 'Property'
--
defaultMinTests :: TestLimit
defaultMinTests = 100

-- | The default confidence allows one false positive in 10^9 tests
--
defaultConfidence :: Confidence
defaultConfidence = 10 ^ (9 :: Int)

-- | Map a config modification function over a property.
--
mapConfig :: (PropertyConfig -> PropertyConfig) -> Property -> Property
mapConfig f (Property cfg t) =
Property (f cfg) t

-- | Make sure that the result is statistically significant in accordance to
-- the passed 'Confidence'
--
withConfidence :: Confidence -> Property -> Property
withConfidence c =
let
setConfidence = \case
NoEarlyTermination _ tests -> NoEarlyTermination c tests
NoConfidenceTermination tests -> NoEarlyTermination c tests
EarlyTermination _ tests -> EarlyTermination c tests
in
mapConfig $ \config@PropertyConfig{..} ->
config
{ propertyTerminationCriteria =
setConfidence propertyTerminationCriteria
}

verifiedTermination :: Property -> Property
verifiedTermination =
mapConfig $ \config@PropertyConfig{..} ->
let
newTerminationCriteria = case propertyTerminationCriteria of
NoEarlyTermination c tests -> EarlyTermination c tests
NoConfidenceTermination tests -> EarlyTermination defaultConfidence tests
EarlyTermination c tests -> EarlyTermination c tests
in
config { propertyTerminationCriteria = newTerminationCriteria }

-- | Set the number of times a property should be executed before it is considered
-- successful.
--
Expand All @@ -915,7 +984,14 @@ mapConfig f (Property cfg t) =
--
withTests :: TestLimit -> Property -> Property
withTests n =
mapConfig $ \config -> config { propertyTestLimit = n }
let
setTestLimit tests = \case
NoEarlyTermination c _ -> NoEarlyTermination c tests
NoConfidenceTermination _ -> NoConfidenceTermination tests
EarlyTermination c _ -> EarlyTermination c tests
in
mapConfig $ \config@PropertyConfig{..} ->
config { propertyTerminationCriteria = setTestLimit n propertyTerminationCriteria }

-- | Set the number of times a property is allowed to discard before the test
-- runner gives up.
Expand Down Expand Up @@ -1013,6 +1089,7 @@ labelCovered :: TestCount -> Label CoverCount -> Bool
labelCovered tests (MkLabel _ _ minimum_ population) =
coverPercentage tests population >= minimum_

-- | All labels are covered
coverageSuccess :: TestCount -> Coverage CoverCount -> Bool
coverageSuccess tests =
null . coverageFailures tests
Expand All @@ -1021,6 +1098,68 @@ coverageFailures :: TestCount -> Coverage CoverCount -> [Label CoverCount]
coverageFailures tests (Coverage kvs) =
filter (not . labelCovered tests) (Map.elems kvs)

-- | Is true when the test coverage satisfies the specified 'Confidence'
-- contstraint for all 'Coverage CoverCount's
confidenceSuccess :: TestCount -> Confidence -> Coverage CoverCount -> Bool
confidenceSuccess tests confidence =
let
assertLow :: Label CoverCount -> Bool
assertLow coverCount@MkLabel{..} =
fst (boundsForLabel tests confidence coverCount)
>= unCoverPercentage labelMinimum / 100.0
in
and . fmap assertLow . Map.elems . coverageLabels

-- | Is true when there exists a label that is sure to have failed according to
-- the 'Confidence' constraint
confidenceFailure :: TestCount -> Confidence -> Coverage CoverCount -> Bool
confidenceFailure tests confidence =
let
assertHigh :: Label CoverCount -> Bool
assertHigh coverCount@MkLabel{..} =
snd (boundsForLabel tests confidence coverCount)
< (unCoverPercentage labelMinimum / 100.0)
in
or . fmap assertHigh . Map.elems . coverageLabels

boundsForLabel :: TestCount -> Confidence -> Label CoverCount -> (Double, Double)
boundsForLabel tests confidence MkLabel{..} =
wilsonBounds
(fromIntegral $ unCoverCount labelAnnotation)
(fromIntegral tests)
(1 / fromIntegral (unConfidence confidence))

-- In order to get an accurate measurement with small sample sizes, we're
-- using the Wilson score interval
-- (<https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson_score_interval
-- wikipedia>) instead of a normal approximation interval.
wilsonBounds :: Integer -> Integer -> Double -> (Double, Double)
wilsonBounds positives count acceptance =
let
p =
fromRational $ positives % count
n =
fromIntegral count
z =
invnormcdf $ 1 - acceptance / 2

midpoint =
p + z * z / (2 * n)

offset =
z / (1 + z ** 2 / n) * sqrt (p * (1 - p) / n + z ** 2 / (4 * n ** 2))

denominator =
1 + z * z / n

low =
(midpoint - offset) / denominator

high =
(midpoint + offset) / denominator
in
(low, high)

fromLabel :: Label a -> Coverage a
fromLabel x =
Coverage $
Expand Down
Loading