From 9c66d604c33cdae48f40223e3ab03fc26dd96f12 Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Thu, 14 Mar 2024 20:24:57 +0100 Subject: [PATCH 01/19] fix: Add zero and negative byte length checks to ensure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `ensure` is a publically exported function, which has some odd behaviour when given a byte length of zero or less. It raises an error when the value is zero, but happily returns a value when the byte length is negative. I've switched these two behaviours around. ``` λ> runGet (ensure (-1)) "0123" Right "0123" λ> runGet (isolate 0 getWord8) "0123" Left "too few bytes\nFrom:\tdemandInput\n\n" ``` ``` λ> runGet (ensure (-1)) "0123" Left "Failed reading: Attempted to ensure negative amount of bytes\nEmpty call stack\n" λ> runGet (ensure 0) "0123" Right "" ``` --- src/Data/Serialize/Get.hs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Data/Serialize/Get.hs b/src/Data/Serialize/Get.hs index 692eea4..b89032e 100644 --- a/src/Data/Serialize/Get.hs +++ b/src/Data/Serialize/Get.hs @@ -366,7 +366,10 @@ runGetLazyState m lstr = case runGetLazy' m lstr of -- input, otherwise fail. {-# INLINE ensure #-} ensure :: Int -> Get B.ByteString -ensure n0 = n0 `seq` Get $ \ s0 b0 m0 w0 kf ks -> let +ensure n0 + | n0 < 0 = fail "Attempted to ensure negative amount of bytes" + | n0 == 0 = pure mempty + | otherwise = n0 `seq` Get $ \ s0 b0 m0 w0 kf ks -> let n' = n0 - B.length s0 in if n' <= 0 then ks s0 b0 m0 w0 s0 From 024bba7741d4fd40db292b7ca604bff9f0754c95 Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Thu, 14 Mar 2024 21:53:40 +0100 Subject: [PATCH 02/19] fix: Guard internal uses of ensure' so we don't override leftovers This adds an internal variant, `ensure'`, which is used to avoid checking for `0` and `<0` more than once in a given code path. --- src/Data/Serialize/Get.hs | 46 +++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/src/Data/Serialize/Get.hs b/src/Data/Serialize/Get.hs index b89032e..70f36ee 100644 --- a/src/Data/Serialize/Get.hs +++ b/src/Data/Serialize/Get.hs @@ -364,12 +364,15 @@ runGetLazyState m lstr = case runGetLazy' m lstr of -- | If at least @n@ bytes of input are available, return the current -- input, otherwise fail. -{-# INLINE ensure #-} ensure :: Int -> Get B.ByteString -ensure n0 - | n0 < 0 = fail "Attempted to ensure negative amount of bytes" - | n0 == 0 = pure mempty - | otherwise = n0 `seq` Get $ \ s0 b0 m0 w0 kf ks -> let +ensure n + | n < 0 = fail "Attempted to ensure negative amount of bytes" + | n == 0 = pure mempty + | otherwise = ensure' n + +{-# INLINE ensure #-} +ensure' :: Int -> Get B.ByteString +ensure' n0 = n0 `seq` Get $ \ s0 b0 m0 w0 kf ks -> let n' = n0 - B.length s0 in if n' <= 0 then ks s0 b0 m0 w0 s0 @@ -408,9 +411,16 @@ ensure n0 -- | Isolate an action to operating within a fixed block of bytes. The action -- is required to consume all the bytes that it is isolated to. isolate :: Int -> Get a -> Get a +isolate 0 m = do + rest <- get + cur <- bytesRead + put mempty cur + a <- m + put rest cur + pure a isolate n m = do M.when (n < 0) (fail "Attempted to isolate a negative number of bytes") - s <- ensure n + s <- ensure' n let (s',rest) = B.splitAt n s cur <- bytesRead put s' cur @@ -427,8 +437,10 @@ failDesc err = do -- | Skip ahead @n@ bytes. Fails if fewer than @n@ bytes are available. skip :: Int -> Get () +skip 0 = pure () skip n = do - s <- ensure n + M.when (n < 0) (fail "Attempted to skip a negative number of bytes") + s <- ensure' n cur <- bytesRead put (B.drop n s) (cur + n) @@ -523,15 +535,17 @@ getShortByteString n = do -- | Pull @n@ bytes from the input, as a strict ByteString. getBytes :: Int -> Get B.ByteString -getBytes n | n < 0 = fail "getBytes: negative length requested" -getBytes n = do - s <- ensure n - let consume = B.unsafeTake n s - rest = B.unsafeDrop n s - -- (consume,rest) = B.splitAt n s - cur <- bytesRead - put rest (cur + n) - return consume +getBytes n + | n < 0 = fail "getBytes: negative length requested" + | n == 0 = pure mempty + | otherwise = do + s <- ensure' n + let consume = B.unsafeTake n s + rest = B.unsafeDrop n s + -- (consume,rest) = B.splitAt n s + cur <- bytesRead + put rest (cur + n) + return consume {-# INLINE getBytes #-} From 91f1add9f0a0707030f7b7b90a565a0a01fd899f Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Thu, 14 Mar 2024 11:55:30 +0100 Subject: [PATCH 03/19] feat: Add lazy isolation primitive --- src/Data/Serialize/Get.hs | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/src/Data/Serialize/Get.hs b/src/Data/Serialize/Get.hs index 70f36ee..855fd6c 100644 --- a/src/Data/Serialize/Get.hs +++ b/src/Data/Serialize/Get.hs @@ -39,6 +39,7 @@ module Data.Serialize.Get ( -- * Parsing , ensure , isolate + , isolateLazy , label , skip , uncheckedSkip @@ -408,6 +409,9 @@ ensure' n0 = n0 `seq` Get $ \ s0 b0 m0 w0 kf ks -> let in ks s b m0 w0 s else getMore n' s0 ss b0 m0 w0 kf ks +negativeIsolation :: Get a +negativeIsolation = fail "Attempted to isolate a negative number of bytes" + -- | Isolate an action to operating within a fixed block of bytes. The action -- is required to consume all the bytes that it is isolated to. isolate :: Int -> Get a -> Get a @@ -419,7 +423,7 @@ isolate 0 m = do put rest cur pure a isolate n m = do - M.when (n < 0) (fail "Attempted to isolate a negative number of bytes") + M.when (n < 0) negativeIsolation s <- ensure' n let (s',rest) = B.splitAt n s cur <- bytesRead @@ -430,6 +434,35 @@ isolate n m = do put rest (cur + n) return a +getAtMost :: Int -> Get B.ByteString +getAtMost n = do + (bs, rest) <- B.splitAt n <$> get + m <- bytesRead + unless (B.null rest) $ put rest (m - B.length rest) + pure bs + +-- | An incremental version of 'isolate', which doesn't try to read the input +-- into a buffer all at once. +isolateLazy :: forall a. Int -> Get a -> Get a +isolateLazy n _ + | n < 0 = negativeIsolation +isolateLazy n parser = go . runGetPartial parser =<< getAtMost n + where + go :: Result a -> Get a + go r = case r of + Fail err bs -> bytesRead >>= put bs >> fail err + Done a bs + | not (B.null bs) -> throwDidntParseEnough + | otherwise -> do + bytesRead' <- bytesRead + unless (bytesRead' == n) throwDidntParseEnough + pure a + Partial cont -> do + bs <- getAtMost . (n -) =<< bytesRead + go $ cont bs + + throwDidntParseEnough = fail "Not all bytes parsed in isolateLazy" + failDesc :: String -> Get a failDesc err = do let msg = "Failed reading: " ++ err From fb53e7a1e653097d2c20b4c1257ba2818dc95d38 Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Thu, 14 Mar 2024 20:19:51 +0100 Subject: [PATCH 04/19] feat: Isolation tests --- cereal.cabal | 2 ++ src/Data/Serialize/Get.hs | 11 +++--- tests/GetTests.hs | 71 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 78 insertions(+), 6 deletions(-) diff --git a/cereal.cabal b/cereal.cabal index bb8c7c0..ceb34f0 100644 --- a/cereal.cabal +++ b/cereal.cabal @@ -66,8 +66,10 @@ test-suite test-cereal build-depends: base == 4.*, bytestring >= 0.9, + HUnit, QuickCheck, test-framework, + test-framework-hunit, test-framework-quickcheck2, cereal diff --git a/src/Data/Serialize/Get.hs b/src/Data/Serialize/Get.hs index 855fd6c..780774b 100644 --- a/src/Data/Serialize/Get.hs +++ b/src/Data/Serialize/Get.hs @@ -450,7 +450,7 @@ isolateLazy n parser = go . runGetPartial parser =<< getAtMost n where go :: Result a -> Get a go r = case r of - Fail err bs -> bytesRead >>= put bs >> fail err + Fail err bs -> bytesRead >>= put bs >> failRaw err Done a bs | not (B.null bs) -> throwDidntParseEnough | otherwise -> do @@ -461,12 +461,13 @@ isolateLazy n parser = go . runGetPartial parser =<< getAtMost n bs <- getAtMost . (n -) =<< bytesRead go $ cont bs - throwDidntParseEnough = fail "Not all bytes parsed in isolateLazy" + throwDidntParseEnough = fail "not all bytes parsed in isolate" + +failRaw :: String -> Get a +failRaw msg = Get (\s0 b0 m0 _ kf _ -> kf s0 b0 m0 [] msg) failDesc :: String -> Get a -failDesc err = do - let msg = "Failed reading: " ++ err - Get (\s0 b0 m0 _ kf _ -> kf s0 b0 m0 [] msg) +failDesc err = failRaw $ "Failed reading: " ++ err -- | Skip ahead @n@ bytes. Fails if fewer than @n@ bytes are available. skip :: Int -> Get () diff --git a/tests/GetTests.hs b/tests/GetTests.hs index 3686279..e4e9d61 100644 --- a/tests/GetTests.hs +++ b/tests/GetTests.hs @@ -12,7 +12,11 @@ import qualified Data.ByteString.Lazy as LB import Data.Serialize.Get import Test.Framework (Test(),testGroup) import Test.Framework.Providers.QuickCheck2 (testProperty) -import Test.QuickCheck as QC +import Test.Framework.Providers.HUnit (testCase) +import Test.HUnit (Assertion, (@=?), assertFailure) +import Test.QuickCheck hiding (Result) +import qualified Test.QuickCheck as QC +import Data.List (isInfixOf) -- Data to express Get parser to generate @@ -254,6 +258,68 @@ alterDistr' p1 p2 p3 = y = buildGet p2 z = buildGet p3 +isolateLazyIsIncremental :: Assertion +isolateLazyIsIncremental = go (runGetPartial parser $ BS.replicate 11 0) + where + parser :: Get () + parser = isolateLazy 100 $ do + skip 10 + fail failStr + pure () + + failStr :: String + failStr = "no thanks" + + go :: Result () -> IO () + go r = case r of + Done () _ -> assertFailure "Impossible" + Fail failure _ -> unless (failStr `isInfixOf` failure) $ assertFailure "Wrong error!" + Partial cont -> assertFailure "Asked for more input!" + +isolateLazyLeavesRemainingBytes :: Assertion +isolateLazyLeavesRemainingBytes = go (runGetPartial parser $ BS.replicate 11 0) + where + parser :: Get () + parser = isolateLazy 100 $ do + skip 10 + fail failStr + pure () + + failStr :: String + failStr = "no thanks" + + go :: Result () -> IO () + go r = case r of + Done () _ -> assertFailure "Impossible" + Fail failure _ -> unless (failStr `isInfixOf` failure) $ assertFailure "Wrong error!" + Partial cont -> assertFailure "Asked for more input!" + +isolateAndIsolateLazy :: Int -> GetD -> Property +isolateAndIsolateLazy n parser' = isolate n parser ==~ isolateLazy n parser + where + parser = buildGet parser' + +-- isolateLazyLeavesRemainingBytesAfterSuccess :: Assertion +-- isolateLazyLeavesRemainingBytesAfterSuccess = go + +isolateIsNotIncremental :: Assertion +isolateIsNotIncremental = go (runGetPartial parser $ BS.replicate 11 0) + where + parser :: Get () + parser = isolate 100 $ do + skip 10 + fail failStr + pure () + + failStr :: String + failStr = "no thanks" + + go :: Result () -> IO () + go r = case r of + Done () _ -> assertFailure "Impossible" + Fail failure _ -> assertFailure $ "Strict isolate was incremental: " <> failure + Partial cont -> pure () + tests :: Test tests = testGroup "GetTests" @@ -275,4 +341,7 @@ tests = testGroup "GetTests" , testProperty "strict - alternative assoc" alterAssoc' , testProperty "lazy - alternative distr" alterDistr , testProperty "strict - alternative distr" alterDistr' + , testCase "isolate is not incremental" isolateIsNotIncremental + , testCase "isolateLazy is incremental" isolateLazyIsIncremental + , testProperty "isolations are equivalent" isolateAndIsolateLazy ] From ad2c297b93fd91b2ee10b92c57296d7cc81a1861 Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Thu, 14 Mar 2024 20:36:56 +0100 Subject: [PATCH 05/19] refactor: Extract out isolation under-parse errors --- src/Data/Serialize/Get.hs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/Data/Serialize/Get.hs b/src/Data/Serialize/Get.hs index 780774b..c51a74e 100644 --- a/src/Data/Serialize/Get.hs +++ b/src/Data/Serialize/Get.hs @@ -412,6 +412,9 @@ ensure' n0 = n0 `seq` Get $ \ s0 b0 m0 w0 kf ks -> let negativeIsolation :: Get a negativeIsolation = fail "Attempted to isolate a negative number of bytes" +isolationUnderParse :: Get a +isolationUnderParse = fail "not all bytes parsed in isolate" + -- | Isolate an action to operating within a fixed block of bytes. The action -- is required to consume all the bytes that it is isolated to. isolate :: Int -> Get a -> Get a @@ -430,7 +433,7 @@ isolate n m = do put s' cur a <- m used <- get - unless (B.null used) (fail "not all bytes parsed in isolate") + unless (B.null used) isolationUnderParse put rest (cur + n) return a @@ -452,17 +455,15 @@ isolateLazy n parser = go . runGetPartial parser =<< getAtMost n go r = case r of Fail err bs -> bytesRead >>= put bs >> failRaw err Done a bs - | not (B.null bs) -> throwDidntParseEnough + | not (B.null bs) -> isolationUnderParse | otherwise -> do bytesRead' <- bytesRead - unless (bytesRead' == n) throwDidntParseEnough + unless (bytesRead' == n) isolationUnderParse pure a Partial cont -> do bs <- getAtMost . (n -) =<< bytesRead go $ cont bs - throwDidntParseEnough = fail "not all bytes parsed in isolate" - failRaw :: String -> Get a failRaw msg = Get (\s0 b0 m0 _ kf _ -> kf s0 b0 m0 [] msg) From 647462baab7651cb559112d2ec3d43dc2200d15c Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Thu, 14 Mar 2024 21:38:14 +0100 Subject: [PATCH 06/19] feat: Expose high and low level result interface via pattern synonym --- src/Data/Serialize/Get.hs | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/src/Data/Serialize/Get.hs b/src/Data/Serialize/Get.hs index c51a74e..0b12bd7 100644 --- a/src/Data/Serialize/Get.hs +++ b/src/Data/Serialize/Get.hs @@ -2,6 +2,8 @@ {-# LANGUAGE MagicHash #-} {-# LANGUAGE Rank2Types #-} {-# LANGUAGE BangPatterns #-} +{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE PatternSynonyms #-} ----------------------------------------------------------------------------- -- | @@ -32,7 +34,7 @@ module Data.Serialize.Get ( , runGetLazyState -- ** Incremental interface - , Result(..) + , Result(Fail, ..) , runGetPartial , runGetChunk @@ -126,10 +128,11 @@ import qualified Data.Tree as T #if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__) import GHC.Base import GHC.Word +import Debug.Trace #endif -- | The result of a parse. -data Result r = Fail String B.ByteString +data Result r = FailRaw (String, [String]) B.ByteString -- ^ The parse failed. The 'String' is the -- message describing the error, if any. | Partial (B.ByteString -> Result r) @@ -141,13 +144,20 @@ data Result r = Fail String B.ByteString -- input that had not yet been consumed (if any) when -- the parse succeeded. +pattern Fail :: String -> B.ByteString -> Result r +pattern Fail msg bs <- FailRaw (formatFailure -> msg) bs +{-# COMPLETE Fail, Partial, Done #-} + +formatFailure :: (String, [String]) -> String +formatFailure (err, stack) = unlines [err, formatTrace stack] + instance Show r => Show (Result r) where - show (Fail msg _) = "Fail " ++ show msg + show (FailRaw msg _) = "Fail " ++ show msg show (Partial _) = "Partial _" show (Done r bs) = "Done " ++ show r ++ " " ++ show bs instance Functor Result where - fmap _ (Fail msg rest) = Fail msg rest + fmap _ (FailRaw a bs) = FailRaw a bs fmap f (Partial k) = Partial (fmap f . k) fmap f (Done r bs) = Done (f r) bs @@ -276,7 +286,7 @@ finalK s _ _ _ a = Done a s failK :: Failure a failK s b _ ls msg = - Fail (unlines [msg, formatTrace ls]) (s `B.append` bufferBytes b) + FailRaw (msg, ls) (s `B.append` bufferBytes b) -- | Run the Get monad applies a 'get'-based parser on the input ByteString runGet :: Get a -> B.ByteString -> Either String a @@ -440,8 +450,8 @@ isolate n m = do getAtMost :: Int -> Get B.ByteString getAtMost n = do (bs, rest) <- B.splitAt n <$> get - m <- bytesRead - unless (B.null rest) $ put rest (m - B.length rest) + curr <- bytesRead + unless (B.null rest) $ put rest (curr + B.length bs) pure bs -- | An incremental version of 'isolate', which doesn't try to read the input @@ -453,22 +463,23 @@ isolateLazy n parser = go . runGetPartial parser =<< getAtMost n where go :: Result a -> Get a go r = case r of - Fail err bs -> bytesRead >>= put bs >> failRaw err + FailRaw (msg, stack) bs -> bytesRead >>= put bs >> failRaw msg stack Done a bs | not (B.null bs) -> isolationUnderParse | otherwise -> do bytesRead' <- bytesRead unless (bytesRead' == n) isolationUnderParse + -- fail $ "mismatch " ++ show (bytesRead', n) pure a Partial cont -> do bs <- getAtMost . (n -) =<< bytesRead go $ cont bs -failRaw :: String -> Get a -failRaw msg = Get (\s0 b0 m0 _ kf _ -> kf s0 b0 m0 [] msg) +failRaw :: String -> [String] -> Get a +failRaw msg stack = Get (\s0 b0 m0 _ kf _ -> kf s0 b0 m0 stack msg) failDesc :: String -> Get a -failDesc err = failRaw $ "Failed reading: " ++ err +failDesc err = failRaw ("Failed reading: " ++ err) [] -- | Skip ahead @n@ bytes. Fails if fewer than @n@ bytes are available. skip :: Int -> Get () @@ -477,6 +488,7 @@ skip n = do M.when (n < 0) (fail "Attempted to skip a negative number of bytes") s <- ensure' n cur <- bytesRead + -- traceShow (n, s, cur) $ put (B.drop n s) (cur + n) -- | Skip ahead up to @n@ bytes in the current chunk. No error if there aren't From 35467c2b5c4f1f9cd0fa31dfdead742ea7b61055 Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Thu, 14 Mar 2024 22:47:59 +0100 Subject: [PATCH 07/19] Test behaviour between isolation modes --- src/Data/Serialize/Get.hs | 58 ++++++++++++++++++++++----------------- tests/GetTests.hs | 33 +++++++++++++++++----- 2 files changed, 59 insertions(+), 32 deletions(-) diff --git a/src/Data/Serialize/Get.hs b/src/Data/Serialize/Get.hs index 0b12bd7..941dfd5 100644 --- a/src/Data/Serialize/Get.hs +++ b/src/Data/Serialize/Get.hs @@ -128,7 +128,6 @@ import qualified Data.Tree as T #if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__) import GHC.Base import GHC.Word -import Debug.Trace #endif -- | The result of a parse. @@ -425,54 +424,64 @@ negativeIsolation = fail "Attempted to isolate a negative number of bytes" isolationUnderParse :: Get a isolationUnderParse = fail "not all bytes parsed in isolate" --- | Isolate an action to operating within a fixed block of bytes. The action --- is required to consume all the bytes that it is isolated to. -isolate :: Int -> Get a -> Get a -isolate 0 m = do +isolationUnderSupply :: Get a +isolationUnderSupply = failRaw "too few bytes" ["demandInput"] + +isolate0 :: Get a -> Get a +isolate0 parser = do rest <- get cur <- bytesRead put mempty cur - a <- m + a <- parser put rest cur pure a -isolate n m = do - M.when (n < 0) negativeIsolation - s <- ensure' n - let (s',rest) = B.splitAt n s - cur <- bytesRead - put s' cur - a <- m - used <- get - unless (B.null used) isolationUnderParse - put rest (cur + n) - return a + +-- | Isolate an action to operating within a fixed block of bytes. The action +-- is required to consume all the bytes that it is isolated to. +isolate :: Int -> Get a -> Get a +isolate n m + | n < 0 = negativeIsolation + | n == 0 = isolate0 m + | otherwise = do + s <- ensure' n + let (s',rest) = B.splitAt n s + cur <- bytesRead + put s' cur + a <- m + used <- get + unless (B.null used) isolationUnderParse + put rest (cur + n) + return a getAtMost :: Int -> Get B.ByteString getAtMost n = do (bs, rest) <- B.splitAt n <$> get curr <- bytesRead - unless (B.null rest) $ put rest (curr + B.length bs) + put rest (curr + B.length bs) pure bs -- | An incremental version of 'isolate', which doesn't try to read the input -- into a buffer all at once. -isolateLazy :: forall a. Int -> Get a -> Get a -isolateLazy n _ +isolateLazy :: Int -> Get a -> Get a +isolateLazy n parser | n < 0 = negativeIsolation + | n == 0 = isolate0 parser isolateLazy n parser = go . runGetPartial parser =<< getAtMost n where go :: Result a -> Get a go r = case r of FailRaw (msg, stack) bs -> bytesRead >>= put bs >> failRaw msg stack Done a bs - | not (B.null bs) -> isolationUnderParse | otherwise -> do bytesRead' <- bytesRead - unless (bytesRead' == n) isolationUnderParse - -- fail $ "mismatch " ++ show (bytesRead', n) + -- Technically this is both undersupply, and underparse + -- buyt we use undersupply to match strict isolation + unless (bytesRead' == n) isolationUnderSupply + unless (B.null bs) isolationUnderParse pure a Partial cont -> do - bs <- getAtMost . (n -) =<< bytesRead + pos <- bytesRead + bs <- getAtMost $ n - pos go $ cont bs failRaw :: String -> [String] -> Get a @@ -488,7 +497,6 @@ skip n = do M.when (n < 0) (fail "Attempted to skip a negative number of bytes") s <- ensure' n cur <- bytesRead - -- traceShow (n, s, cur) $ put (B.drop n s) (cur + n) -- | Skip ahead up to @n@ bytes in the current chunk. No error if there aren't diff --git a/tests/GetTests.hs b/tests/GetTests.hs index e4e9d61..aec7412 100644 --- a/tests/GetTests.hs +++ b/tests/GetTests.hs @@ -1,5 +1,6 @@ {-# OPTIONS_GHC -fno-warn-orphans #-} {-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE ViewPatterns #-} module GetTests (tests) where @@ -17,6 +18,7 @@ import Test.HUnit (Assertion, (@=?), assertFailure) import Test.QuickCheck hiding (Result) import qualified Test.QuickCheck as QC import Data.List (isInfixOf) +import Debug.Trace -- Data to express Get parser to generate @@ -127,10 +129,12 @@ isEmpty2 = do pure True -- Compare with chunks -(==~) :: Eq a => Get a -> Get a -> Property +(==~) :: (Eq a, Show a) => Get a -> Get a -> Property p1 ==~ p2 = conjoin - [ counterexample (show in0) $ R (runGetLazy p1 s) == R (runGetLazy p2 s) + [ let rl = runGetLazy p1 s + rr = runGetLazy p2 s + in counterexample (show (in0, n, s, rl, rr)) $ R rl == R rr | n <- [0 .. testLength] , let Chunks in0 = mkChunks n s = LB.fromChunks [ BS.pack c | c <- in0 ] @@ -294,14 +298,29 @@ isolateLazyLeavesRemainingBytes = go (runGetPartial parser $ BS.replicate 11 0) Fail failure _ -> unless (failStr `isInfixOf` failure) $ assertFailure "Wrong error!" Partial cont -> assertFailure "Asked for more input!" -isolateAndIsolateLazy :: Int -> GetD -> Property -isolateAndIsolateLazy n parser' = isolate n parser ==~ isolateLazy n parser +instance Arbitrary LB.ByteString where + arbitrary = LB.fromChunks . pure . BS.pack <$> arbitrary + +newtype IsolationRes a = IRes (Either String a) + deriving Show + +-- Sometimes lazy and strict isolations return different errors, +-- eg. when EOF is called before the end of an isolation which isn't prodided +-- enough input. +-- Strict sees it as a lack of bytes, Lazy sees it as a guard failure ("empty"). +instance Eq a => Eq (IsolationRes a) where + IRes a == IRes b = case (a, b) of + (Left e1, Left e2) -> e1 == e2 || errsEqAsymmetric e1 e2 || errsEqAsymmetric e2 e1 + _ -> a == b + where + errsEqAsymmetric e1 e2 = "too few bytes" `isInfixOf` e1 && "empty" `isInfixOf` e2 + +isolateAndIsolateLazy :: Int -> GetD -> LB.ByteString -> Property +isolateAndIsolateLazy n parser' bs + = IRes (runGetLazy (isolate n parser) bs) === IRes (runGetLazy (isolateLazy n parser) bs) where parser = buildGet parser' --- isolateLazyLeavesRemainingBytesAfterSuccess :: Assertion --- isolateLazyLeavesRemainingBytesAfterSuccess = go - isolateIsNotIncremental :: Assertion isolateIsNotIncremental = go (runGetPartial parser $ BS.replicate 11 0) where From 2dc6b02c0b0dc9d52ba2333928010122019da0ad Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Thu, 14 Mar 2024 23:57:13 +0100 Subject: [PATCH 08/19] chore: Remove raw failure constructor from export list --- src/Data/Serialize/Get.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Data/Serialize/Get.hs b/src/Data/Serialize/Get.hs index 941dfd5..af0d69b 100644 --- a/src/Data/Serialize/Get.hs +++ b/src/Data/Serialize/Get.hs @@ -34,7 +34,7 @@ module Data.Serialize.Get ( , runGetLazyState -- ** Incremental interface - , Result(Fail, ..) + , Result(Fail, Partial, Done) , runGetPartial , runGetChunk From 579b534c0ac5a520a39dacc81e73a494f5c8464d Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Fri, 15 Mar 2024 13:54:32 +0100 Subject: [PATCH 09/19] test: Add leftover isolation tests --- tests/GetTests.hs | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/GetTests.hs b/tests/GetTests.hs index aec7412..42a51c1 100644 --- a/tests/GetTests.hs +++ b/tests/GetTests.hs @@ -1,6 +1,7 @@ {-# OPTIONS_GHC -fno-warn-orphans #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE OverloadedStrings #-} module GetTests (tests) where @@ -339,6 +340,25 @@ isolateIsNotIncremental = go (runGetPartial parser $ BS.replicate 11 0) Fail failure _ -> assertFailure $ "Strict isolate was incremental: " <> failure Partial cont -> pure () +-- Checks return values, leftovers, fails for continuations +assertResultsMatch :: Eq a => Result a -> (Maybe a, BS.ByteString) -> Assertion +assertResultsMatch r1 r2 = case (r1, r2) of + (Partial _, _) -> assertFailure "Continuation received" + (Done a1 bs1, (Just a2, bs2)) -> do + unless (a1 == a2) $ assertFailure "Result mismatch" + unless (bs1 == bs2) $ assertFailure $ "Success leftover mismatch: " ++ show (bs1, bs2) + (Fail msg1 bs1, (Nothing, bs2)) -> + unless (bs1 == bs2) $ assertFailure $ "Failure leftovers mismatch: " ++ show (bs1, bs2) + _ -> assertFailure "Different result types" + +isolateLazyDeterminesLeftovers :: Assertion +isolateLazyDeterminesLeftovers = do + assertResultsMatch (runGetPartial (isolateLazy 1 getWord8) "123") (Just $ toEnum $ fromEnum '1', "23") + assertResultsMatch (runGetPartial (isolateLazy 2 getWord8) "123") (Nothing, "3") + -- I don't think this is the right behaviour, but it's the existing behaviour, so + -- we're checking we're consistent + assertResultsMatch (runGetPartial (isolate 2 $ fail "no thanks" *> pure ()) "123") (Nothing, "12") + assertResultsMatch (runGetPartial (isolateLazy 2 $ fail "no thanks" *> pure ()) "123") (Nothing, "12") tests :: Test tests = testGroup "GetTests" @@ -363,4 +383,5 @@ tests = testGroup "GetTests" , testCase "isolate is not incremental" isolateIsNotIncremental , testCase "isolateLazy is incremental" isolateLazyIsIncremental , testProperty "isolations are equivalent" isolateAndIsolateLazy + , testCase "isolateLazy determines leftovers" isolateLazyDeterminesLeftovers ] From bde4c0af2d6ecb588dd8cf8d1cc466573ddbb02c Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Fri, 15 Mar 2024 14:42:02 +0100 Subject: [PATCH 10/19] fix: Refuse to continue empty isolated parser --- src/Data/Serialize/Get.hs | 9 ++++++++- tests/GetTests.hs | 4 ++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/Data/Serialize/Get.hs b/src/Data/Serialize/Get.hs index af0d69b..bb11300 100644 --- a/src/Data/Serialize/Get.hs +++ b/src/Data/Serialize/Get.hs @@ -482,7 +482,14 @@ isolateLazy n parser = go . runGetPartial parser =<< getAtMost n Partial cont -> do pos <- bytesRead bs <- getAtMost $ n - pos - go $ cont bs + -- We want to give the inner parser a chance to determine + -- output, but if it returns a continuation, we'll throw + -- instead of recursing indefinitely + if B.null bs + then case cont bs of + Partial cont -> isolationUnderSupply + a -> go a + else go $ cont bs failRaw :: String -> [String] -> Get a failRaw msg stack = Get (\s0 b0 m0 _ kf _ -> kf s0 b0 m0 stack msg) diff --git a/tests/GetTests.hs b/tests/GetTests.hs index 42a51c1..2623d17 100644 --- a/tests/GetTests.hs +++ b/tests/GetTests.hs @@ -355,8 +355,8 @@ isolateLazyDeterminesLeftovers :: Assertion isolateLazyDeterminesLeftovers = do assertResultsMatch (runGetPartial (isolateLazy 1 getWord8) "123") (Just $ toEnum $ fromEnum '1', "23") assertResultsMatch (runGetPartial (isolateLazy 2 getWord8) "123") (Nothing, "3") - -- I don't think this is the right behaviour, but it's the existing behaviour, so - -- we're checking we're consistent + -- Note(414owen): I don't think this is the ideal behaviour, but it's the existing behaviour, so + -- I'll at least check that isolateLazy matches the behaviour of isolate... assertResultsMatch (runGetPartial (isolate 2 $ fail "no thanks" *> pure ()) "123") (Nothing, "12") assertResultsMatch (runGetPartial (isolateLazy 2 $ fail "no thanks" *> pure ()) "123") (Nothing, "12") From ac8c7be1a608b2956e7e481a916558c5a4f2e8af Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Sun, 17 Mar 2024 22:40:06 +0100 Subject: [PATCH 11/19] fix: Add initialBytesRead param to isolateLazy loop --- src/Data/Serialize/Get.hs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/Data/Serialize/Get.hs b/src/Data/Serialize/Get.hs index bb11300..a94c36a 100644 --- a/src/Data/Serialize/Get.hs +++ b/src/Data/Serialize/Get.hs @@ -466,30 +466,32 @@ isolateLazy :: Int -> Get a -> Get a isolateLazy n parser | n < 0 = negativeIsolation | n == 0 = isolate0 parser -isolateLazy n parser = go . runGetPartial parser =<< getAtMost n +isolateLazy n parser = do + initialBytesRead <- bytesRead + go initialBytesRead . runGetPartial parser =<< getAtMost n where - go :: Result a -> Get a - go r = case r of + go :: Int -> Result a -> Get a + go initialBytesRead r = case r of FailRaw (msg, stack) bs -> bytesRead >>= put bs >> failRaw msg stack Done a bs | otherwise -> do bytesRead' <- bytesRead -- Technically this is both undersupply, and underparse -- buyt we use undersupply to match strict isolation - unless (bytesRead' == n) isolationUnderSupply + unless (bytesRead' - initialBytesRead == n) isolationUnderSupply unless (B.null bs) isolationUnderParse pure a Partial cont -> do pos <- bytesRead - bs <- getAtMost $ n - pos + bs <- getAtMost $ n - (pos - initialBytesRead) -- We want to give the inner parser a chance to determine -- output, but if it returns a continuation, we'll throw -- instead of recursing indefinitely if B.null bs then case cont bs of Partial cont -> isolationUnderSupply - a -> go a - else go $ cont bs + a -> go initialBytesRead a + else go initialBytesRead $ cont bs failRaw :: String -> [String] -> Get a failRaw msg stack = Get (\s0 b0 m0 _ kf _ -> kf s0 b0 m0 stack msg) From 81128908c05ec81df9b613fc9d95c7decd4ae799 Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Mon, 18 Mar 2024 00:48:14 +0100 Subject: [PATCH 12/19] fix: Ensure at least one byte is given to isolateLazy --- src/Data/Serialize/Get.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Data/Serialize/Get.hs b/src/Data/Serialize/Get.hs index a94c36a..1ee71f9 100644 --- a/src/Data/Serialize/Get.hs +++ b/src/Data/Serialize/Get.hs @@ -455,7 +455,7 @@ isolate n m getAtMost :: Int -> Get B.ByteString getAtMost n = do - (bs, rest) <- B.splitAt n <$> get + (bs, rest) <- B.splitAt n <$> ensure' 1 curr <- bytesRead put rest (curr + B.length bs) pure bs From c27d82920088b0d4f4f85abe02baa9ba481424b3 Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Wed, 20 Mar 2024 10:13:26 +0100 Subject: [PATCH 13/19] chore: Fix lazy isolation comment --- src/Data/Serialize/Get.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Data/Serialize/Get.hs b/src/Data/Serialize/Get.hs index 1ee71f9..44366a7 100644 --- a/src/Data/Serialize/Get.hs +++ b/src/Data/Serialize/Get.hs @@ -476,8 +476,8 @@ isolateLazy n parser = do Done a bs | otherwise -> do bytesRead' <- bytesRead - -- Technically this is both undersupply, and underparse - -- buyt we use undersupply to match strict isolation + -- Technically this matches both undersupply, and underparse + -- but we throw undersupply to match strict isolation unless (bytesRead' - initialBytesRead == n) isolationUnderSupply unless (B.null bs) isolationUnderParse pure a From 8370b9087d0a564cc2fb65d501ad28bd838643dc Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Mon, 29 Apr 2024 14:09:50 +0200 Subject: [PATCH 14/19] chore: Apply isolateLazy suggestions --- cabal.project.local | 2 ++ src/Data/Serialize/Get.hs | 27 ++++++++++++++------------- tests/GetTests.hs | 4 +--- 3 files changed, 17 insertions(+), 16 deletions(-) create mode 100644 cabal.project.local diff --git a/cabal.project.local b/cabal.project.local new file mode 100644 index 0000000..376588b --- /dev/null +++ b/cabal.project.local @@ -0,0 +1,2 @@ +ignore-project: False +test-show-details: direct diff --git a/src/Data/Serialize/Get.hs b/src/Data/Serialize/Get.hs index 44366a7..b8017e3 100644 --- a/src/Data/Serialize/Get.hs +++ b/src/Data/Serialize/Get.hs @@ -422,10 +422,10 @@ negativeIsolation :: Get a negativeIsolation = fail "Attempted to isolate a negative number of bytes" isolationUnderParse :: Get a -isolationUnderParse = fail "not all bytes parsed in isolate" +isolationUnderParse = fail "Isolated parser didn't consume all input" isolationUnderSupply :: Get a -isolationUnderSupply = failRaw "too few bytes" ["demandInput"] +isolationUnderSupply = fail "Too few bytes supplied to isolated parser" isolate0 :: Get a -> Get a isolate0 parser = do @@ -454,6 +454,7 @@ isolate n m return a getAtMost :: Int -> Get B.ByteString +getAtMost 0 = pure mempty getAtMost n = do (bs, rest) <- B.splitAt n <$> ensure' 1 curr <- bytesRead @@ -468,19 +469,19 @@ isolateLazy n parser | n == 0 = isolate0 parser isolateLazy n parser = do initialBytesRead <- bytesRead - go initialBytesRead . runGetPartial parser =<< getAtMost n + bs <- getAtMost n + go initialBytesRead $ runGetPartial parser bs where go :: Int -> Result a -> Get a go initialBytesRead r = case r of - FailRaw (msg, stack) bs -> bytesRead >>= put bs >> failRaw msg stack - Done a bs - | otherwise -> do - bytesRead' <- bytesRead - -- Technically this matches both undersupply, and underparse - -- but we throw undersupply to match strict isolation - unless (bytesRead' - initialBytesRead == n) isolationUnderSupply - unless (B.null bs) isolationUnderParse - pure a + FailRaw (msg, stack) bs -> do + m <- bytesRead + put bs m + failRaw msg stack + Done a bs -> do + bytesRead' <- bytesRead + unless (bytesRead' - initialBytesRead == n && B.null bs) isolationUnderParse + pure a Partial cont -> do pos <- bytesRead bs <- getAtMost $ n - (pos - initialBytesRead) @@ -489,7 +490,7 @@ isolateLazy n parser = do -- instead of recursing indefinitely if B.null bs then case cont bs of - Partial cont -> isolationUnderSupply + Partial _ -> isolationUnderSupply a -> go initialBytesRead a else go initialBytesRead $ cont bs diff --git a/tests/GetTests.hs b/tests/GetTests.hs index 2623d17..896d3aa 100644 --- a/tests/GetTests.hs +++ b/tests/GetTests.hs @@ -311,10 +311,8 @@ newtype IsolationRes a = IRes (Either String a) -- Strict sees it as a lack of bytes, Lazy sees it as a guard failure ("empty"). instance Eq a => Eq (IsolationRes a) where IRes a == IRes b = case (a, b) of - (Left e1, Left e2) -> e1 == e2 || errsEqAsymmetric e1 e2 || errsEqAsymmetric e2 e1 + (Left e1, Left e2) -> True _ -> a == b - where - errsEqAsymmetric e1 e2 = "too few bytes" `isInfixOf` e1 && "empty" `isInfixOf` e2 isolateAndIsolateLazy :: Int -> GetD -> LB.ByteString -> Property isolateAndIsolateLazy n parser' bs From ec2718487e7c2e33c6b792607db5be53903ba8ed Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Thu, 16 May 2024 11:00:55 +0200 Subject: [PATCH 15/19] docs: Improve rationale for not accepting continuations for empty chunks --- src/Data/Serialize/Get.hs | 13 ++++++++++--- tests/GetTests.hs | 2 -- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/Data/Serialize/Get.hs b/src/Data/Serialize/Get.hs index b8017e3..476eb01 100644 --- a/src/Data/Serialize/Get.hs +++ b/src/Data/Serialize/Get.hs @@ -484,10 +484,17 @@ isolateLazy n parser = do pure a Partial cont -> do pos <- bytesRead + -- Sometimes, the amount of bytes left in the isolated stream + -- will be zero (ie. `n - (pos - initialBytesRead) = 0`) + -- In this case we give the current continuation an empty + -- bytestring. We do this so that the inner parser can determine + -- the final result. + -- If, however, the inner parser returns another continuation, + -- then presumably it will keep doing so, for as long as we provide + -- it with empty chunks of input. + -- In this case, we throw our own error (`isolationUnderSupply`), + -- to avoid recurring indefinitely. bs <- getAtMost $ n - (pos - initialBytesRead) - -- We want to give the inner parser a chance to determine - -- output, but if it returns a continuation, we'll throw - -- instead of recursing indefinitely if B.null bs then case cont bs of Partial _ -> isolationUnderSupply diff --git a/tests/GetTests.hs b/tests/GetTests.hs index 896d3aa..6914bf7 100644 --- a/tests/GetTests.hs +++ b/tests/GetTests.hs @@ -353,8 +353,6 @@ isolateLazyDeterminesLeftovers :: Assertion isolateLazyDeterminesLeftovers = do assertResultsMatch (runGetPartial (isolateLazy 1 getWord8) "123") (Just $ toEnum $ fromEnum '1', "23") assertResultsMatch (runGetPartial (isolateLazy 2 getWord8) "123") (Nothing, "3") - -- Note(414owen): I don't think this is the ideal behaviour, but it's the existing behaviour, so - -- I'll at least check that isolateLazy matches the behaviour of isolate... assertResultsMatch (runGetPartial (isolate 2 $ fail "no thanks" *> pure ()) "123") (Nothing, "12") assertResultsMatch (runGetPartial (isolateLazy 2 $ fail "no thanks" *> pure ()) "123") (Nothing, "12") From 94aaca094a02735e9d9a9c97c051bb5ccd0a35d9 Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Fri, 24 May 2024 10:02:03 +0200 Subject: [PATCH 16/19] chore: Add cabal.project.local to gitignore --- .gitignore | 1 + cabal.project.local | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) delete mode 100644 cabal.project.local diff --git a/.gitignore b/.gitignore index 2c50f15..c88a919 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ cabal.sandbox.config *.o *.hi *.swp +cabal.project.local diff --git a/cabal.project.local b/cabal.project.local deleted file mode 100644 index 376588b..0000000 --- a/cabal.project.local +++ /dev/null @@ -1,2 +0,0 @@ -ignore-project: False -test-show-details: direct From 6fdf787e4674114ba2fc3b9b21801aa6adaf52e2 Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Fri, 24 May 2024 16:26:50 +0200 Subject: [PATCH 17/19] chore: Add more tests of isolation behaviour in edge cases --- src/Data/Serialize/Get.hs | 16 +------------ tests/GetTests.hs | 47 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 46 insertions(+), 17 deletions(-) diff --git a/src/Data/Serialize/Get.hs b/src/Data/Serialize/Get.hs index 476eb01..ce5a798 100644 --- a/src/Data/Serialize/Get.hs +++ b/src/Data/Serialize/Get.hs @@ -377,7 +377,6 @@ runGetLazyState m lstr = case runGetLazy' m lstr of ensure :: Int -> Get B.ByteString ensure n | n < 0 = fail "Attempted to ensure negative amount of bytes" - | n == 0 = pure mempty | otherwise = ensure' n {-# INLINE ensure #-} @@ -427,21 +426,11 @@ isolationUnderParse = fail "Isolated parser didn't consume all input" isolationUnderSupply :: Get a isolationUnderSupply = fail "Too few bytes supplied to isolated parser" -isolate0 :: Get a -> Get a -isolate0 parser = do - rest <- get - cur <- bytesRead - put mempty cur - a <- parser - put rest cur - pure a - -- | Isolate an action to operating within a fixed block of bytes. The action -- is required to consume all the bytes that it is isolated to. isolate :: Int -> Get a -> Get a isolate n m | n < 0 = negativeIsolation - | n == 0 = isolate0 m | otherwise = do s <- ensure' n let (s',rest) = B.splitAt n s @@ -464,9 +453,7 @@ getAtMost n = do -- | An incremental version of 'isolate', which doesn't try to read the input -- into a buffer all at once. isolateLazy :: Int -> Get a -> Get a -isolateLazy n parser - | n < 0 = negativeIsolation - | n == 0 = isolate0 parser +isolateLazy n _ | n < 0 = negativeIsolation isolateLazy n parser = do initialBytesRead <- bytesRead bs <- getAtMost n @@ -609,7 +596,6 @@ getShortByteString n = do getBytes :: Int -> Get B.ByteString getBytes n | n < 0 = fail "getBytes: negative length requested" - | n == 0 = pure mempty | otherwise = do s <- ensure' n let consume = B.unsafeTake n s diff --git a/tests/GetTests.hs b/tests/GetTests.hs index 6914bf7..f3df9f8 100644 --- a/tests/GetTests.hs +++ b/tests/GetTests.hs @@ -15,11 +15,14 @@ import Data.Serialize.Get import Test.Framework (Test(),testGroup) import Test.Framework.Providers.QuickCheck2 (testProperty) import Test.Framework.Providers.HUnit (testCase) -import Test.HUnit (Assertion, (@=?), assertFailure) +import Test.HUnit (Assertion, (@=?), (@?=), assertFailure, (@?)) import Test.QuickCheck hiding (Result) import qualified Test.QuickCheck as QC import Data.List (isInfixOf) import Debug.Trace +import Data.Either (isLeft) +import Data.Bifunctor (bimap, Bifunctor (..)) +import Data.Functor (($>)) -- Data to express Get parser to generate @@ -338,6 +341,43 @@ isolateIsNotIncremental = go (runGetPartial parser $ BS.replicate 11 0) Fail failure _ -> assertFailure $ "Strict isolate was incremental: " <> failure Partial cont -> pure () +isolate0 :: Assertion +isolate0 = do + runGet parseSucceed "hello" @?= Right (42, "hello") + first (const ()) (runGet parseFail "hello") @?= Left () + + where + parseSucceed :: Get (Int, BS.ByteString) + parseSucceed = do + a <- isolate 0 $ pure 42 + b <- getByteString 5 + pure (a, b) + + parseFail :: Get (Word8, BS.ByteString) + parseFail = do + a <- isolate 0 getWord8 + b <- getByteString 5 + pure (a, b) + +isolate2 :: Assertion +isolate2 = runGet parser "hello" @?= Right ("he", "llo") + where + parser :: Get (BS.ByteString, BS.ByteString) + parser = do + a <- isolate 2 $ getByteString 2 + b <- getByteString 3 + pure (a, b) + +testEnsure :: Assertion +testEnsure = do + runGet parser "hello" @?= Right (replicate 3 "hello") + where + parser = do + a <- ensure 0 + b <- ensure 2 + c <- ensure 5 + pure [a, b, c] + -- Checks return values, leftovers, fails for continuations assertResultsMatch :: Eq a => Result a -> (Maybe a, BS.ByteString) -> Assertion assertResultsMatch r1 r2 = case (r1, r2) of @@ -365,7 +405,7 @@ tests = testGroup "GetTests" , testProperty "lazy - monad assoc" monadAssoc , testProperty "strict - monad assoc" monadAssoc' , testProperty "strict lazy - equality" eqStrictLazy - , testProperty "strict lazy - remaining equality"remainingStrictLazy + , testProperty "strict lazy - remaining equality" remainingStrictLazy , testProperty "lazy - two eof" eqEof , testProperty "strict - two eof" eqEof' , testProperty "lazy - alternative left Id" alterIdL @@ -377,6 +417,9 @@ tests = testGroup "GetTests" , testProperty "lazy - alternative distr" alterDistr , testProperty "strict - alternative distr" alterDistr' , testCase "isolate is not incremental" isolateIsNotIncremental + , testCase "ensure" testEnsure + , testCase "isolate 0" isolate0 + , testCase "isolate 2" isolate2 , testCase "isolateLazy is incremental" isolateLazyIsIncremental , testProperty "isolations are equivalent" isolateAndIsolateLazy , testCase "isolateLazy determines leftovers" isolateLazyDeterminesLeftovers From 81c393ae08cc815f27889e0157a77e2cee5b6c0f Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Fri, 24 May 2024 16:35:00 +0200 Subject: [PATCH 18/19] refactor: Don't pass empty bytestring to internal parser in isolateLazy --- src/Data/Serialize/Get.hs | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/src/Data/Serialize/Get.hs b/src/Data/Serialize/Get.hs index ce5a798..c5181b2 100644 --- a/src/Data/Serialize/Get.hs +++ b/src/Data/Serialize/Get.hs @@ -471,22 +471,12 @@ isolateLazy n parser = do pure a Partial cont -> do pos <- bytesRead - -- Sometimes, the amount of bytes left in the isolated stream - -- will be zero (ie. `n - (pos - initialBytesRead) = 0`) - -- In this case we give the current continuation an empty - -- bytestring. We do this so that the inner parser can determine - -- the final result. - -- If, however, the inner parser returns another continuation, - -- then presumably it will keep doing so, for as long as we provide - -- it with empty chunks of input. - -- In this case, we throw our own error (`isolationUnderSupply`), - -- to avoid recurring indefinitely. - bs <- getAtMost $ n - (pos - initialBytesRead) - if B.null bs - then case cont bs of - Partial _ -> isolationUnderSupply - a -> go initialBytesRead a - else go initialBytesRead $ cont bs + let bytesLeft = n - (pos - initialBytesRead) + if bytesLeft == 0 + then isolationUnderSupply + else do + bs <- getAtMost bytesLeft + go initialBytesRead $ cont bs failRaw :: String -> [String] -> Get a failRaw msg stack = Get (\s0 b0 m0 _ kf _ -> kf s0 b0 m0 stack msg) From c97df679ecc5c4724b56187b4752fabd6c1a78fe Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Wed, 24 Jul 2024 15:25:20 +0200 Subject: [PATCH 19/19] chore: More detailed isolationUnderParse error --- src/Data/Serialize/Get.hs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/Data/Serialize/Get.hs b/src/Data/Serialize/Get.hs index c5181b2..ecb65ef 100644 --- a/src/Data/Serialize/Get.hs +++ b/src/Data/Serialize/Get.hs @@ -421,7 +421,7 @@ negativeIsolation :: Get a negativeIsolation = fail "Attempted to isolate a negative number of bytes" isolationUnderParse :: Get a -isolationUnderParse = fail "Isolated parser didn't consume all input" +isolationUnderParse = fail "Isolated parser didn't consume all input" isolationUnderSupply :: Get a isolationUnderSupply = fail "Too few bytes supplied to isolated parser" @@ -467,7 +467,11 @@ isolateLazy n parser = do failRaw msg stack Done a bs -> do bytesRead' <- bytesRead - unless (bytesRead' - initialBytesRead == n && B.null bs) isolationUnderParse + unless (bytesRead' - initialBytesRead == n && B.null bs) + $ fail $ "Isolated parser didn't consume all input. " + <> "Internal leftovers: " <> show bs + <> ", bytesRead: " <> show (bytesRead' - initialBytesRead) + <> ", isolation amt: " <> show n pure a Partial cont -> do pos <- bytesRead