From 7ac788ccc331b0eb6b4d1ce5e5f601513a5f9d25 Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Wed, 24 Sep 2025 19:49:19 +0200 Subject: [PATCH 01/23] Add regression tests for #491 --- tests/Regressions.hs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/Regressions.hs b/tests/Regressions.hs index 52af1070..21f14040 100644 --- a/tests/Regressions.hs +++ b/tests/Regressions.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE BinaryLiterals #-} {-# LANGUAGE CPP #-} {-# LANGUAGE MagicHash #-} {-# LANGUAGE ScopedTypeVariables #-} @@ -262,6 +263,17 @@ issue420 = do assert $ k1 `HS.member` s1 assert $ k2 `HS.member` s1 +------------------------------------------------------------------------ +-- Issue 491 + +issue491 :: TestTree +issue491 = testGroup "issue491" $ + [ testCase "1" $ assert $ m [0, -1] `HML.isSubmapOf` m [0, -1] + , testCase "2" $ assert $ m [1, 0b11111] `HML.isSubmapOf` m [1, 0b11111] + , testCase "3" $ assert $ m [1, 0b11111] `HML.isSubmapOf` m [1, 0b11111, 42] + ] + where m = HS.toMap . HS.fromList @Int + ------------------------------------------------------------------------ -- * Test list @@ -292,4 +304,5 @@ tests = testGroup "Regression tests" , testCase "issue383" issue383 #endif , testCase "issue420" issue420 + , issue491 ] From 726d69c6be4120f762fc95a3afe86cc8047966d3 Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Wed, 24 Sep 2025 20:00:40 +0200 Subject: [PATCH 02/23] Update ghc-options for debugging --- unordered-containers.cabal | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/unordered-containers.cabal b/unordered-containers.cabal index a0123996..685df578 100644 --- a/unordered-containers.cabal +++ b/unordered-containers.cabal @@ -75,6 +75,9 @@ library ghc-options: -Wall -O2 -fwarn-tabs -ferror-spans + -- Make tight non-allocating loops interruptible for debugging + ghc-options: -fno-omit-yields + -- For dumping the generated code: -- ghc-options: -ddump-simpl -ddump-stg-final -ddump-cmm -ddump-asm -ddump-to-file -- ghc-options: -dsuppress-coercions -dsuppress-unfoldings -dsuppress-module-prefixes @@ -115,7 +118,8 @@ test-suite unordered-containers-tests nothunks >= 0.1.3 default-language: Haskell2010 - ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N + ghc-options: -Wall -threaded + -- -rtsopts -with-rtsopts=-N -- not sure why this is causing problems on 32bit Debian cpp-options: -DASSERTS benchmark benchmarks From d543f1898484a2ccb6bc1e08ad5e47b4d7febc8e Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Wed, 24 Sep 2025 20:29:02 +0200 Subject: [PATCH 03/23] Enhance the test key generator --- tests/Util/Key.hs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/Util/Key.hs b/tests/Util/Key.hs index a3d1476b..a76d951b 100644 --- a/tests/Util/Key.hs +++ b/tests/Util/Key.hs @@ -46,6 +46,9 @@ arbitraryHash = do [ (2, fromIntegral . QC.getLarge <$> arbitrary @(Large Word16)) , (1, QC.getSmall <$> arbitrary) , (1, QC.getLarge <$> arbitrary) + -- Hashes where the lowest `maxChildren` bits are set are interesting + -- edge cases. See #491. + , (1, QC.elements [-1, 0xFF, 0xFFF]) ] i <- QC.frequency gens moreCollisions' <- QC.elements [moreCollisions, id] From b23e396a9098f324bbde489cbd3f321585acd8f7 Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Wed, 24 Sep 2025 21:18:27 +0200 Subject: [PATCH 04/23] Add timeouts to hanging tests --- tests/Properties/HashMapLazy.hs | 23 +++++++++++++++++------ tests/Regressions.hs | 5 +++-- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/tests/Properties/HashMapLazy.hs b/tests/Properties/HashMapLazy.hs index ad80f22f..97a53489 100644 --- a/tests/Properties/HashMapLazy.hs +++ b/tests/Properties/HashMapLazy.hs @@ -255,31 +255,42 @@ tests = [ testProperty "model" $ \(x :: HMKI) y -> HM.isSubmapOf x y === M.isSubmapOf (toOrdMap x) (toOrdMap y) , testProperty "m ⊆ m" $ - \(x :: HMKI) -> HM.isSubmapOf x x + \(x :: HMKI) -> QC.within 1000000 $ HM.isSubmapOf x x , testProperty "m1 ⊆ m1 ∪ m2" $ - \(x :: HMKI) y -> HM.isSubmapOf x (HM.union x y) + \(x :: HMKI) y -> QC.within 1000000 $ HM.isSubmapOf x (HM.union x y) , testProperty "m1 ⊈ m2 ⇒ m1 ∪ m2 ⊈ m1" $ - \(m1 :: HMKI) m2 -> not (HM.isSubmapOf m1 m2) ==> HM.isSubmapOf m1 (HM.union m1 m2) + \(m1 :: HMKI) m2 -> + QC.within 1000000 $ + not (HM.isSubmapOf m1 m2) ==> HM.isSubmapOf m1 (HM.union m1 m2) , testProperty "m1\\m2 ⊆ m1" $ - \(m1 :: HMKI) (m2 :: HMKI) -> HM.isSubmapOf (HM.difference m1 m2) m1 + \(m1 :: HMKI) (m2 :: HMKI) -> + QC.within 1000000 $ + HM.isSubmapOf (HM.difference m1 m2) m1 , testProperty "m1 ∩ m2 ≠ ∅ ⇒ m1 ⊈ m1\\m2 " $ \(m1 :: HMKI) (m2 :: HMKI) -> + QC.within 1000000 $ not (HM.null (HM.intersection m1 m2)) ==> not (HM.isSubmapOf m1 (HM.difference m1 m2)) , testProperty "delete k m ⊆ m" $ \(m :: HMKI) -> + QC.within 1000000 $ not (HM.null m) ==> QC.forAll (QC.elements (HM.keys m)) $ \k -> HM.isSubmapOf (HM.delete k m) m , testProperty "m ⊈ delete k m " $ \(m :: HMKI) -> + QC.within 1000000 $ not (HM.null m) ==> QC.forAll (QC.elements (HM.keys m)) $ \k -> not (HM.isSubmapOf m (HM.delete k m)) , testProperty "k ∉ m ⇒ m ⊆ insert k v m" $ - \k v (m :: HMKI) -> not (HM.member k m) ==> HM.isSubmapOf m (HM.insert k v m) + \k v (m :: HMKI) -> + QC.within 1000000 $ + not (HM.member k m) ==> HM.isSubmapOf m (HM.insert k v m) , testProperty "k ∉ m ⇒ insert k v m ⊈ m" $ - \k v (m :: HMKI) -> not (HM.member k m) ==> not (HM.isSubmapOf (HM.insert k v m) m) + \k v (m :: HMKI) -> + QC.within 1000000 $ + not (HM.member k m) ==> not (HM.isSubmapOf (HM.insert k v m) m) ] -- Combine , testGroup "union" diff --git a/tests/Regressions.hs b/tests/Regressions.hs index 21f14040..632366c5 100644 --- a/tests/Regressions.hs +++ b/tests/Regressions.hs @@ -1,6 +1,7 @@ {-# LANGUAGE BinaryLiterals #-} {-# LANGUAGE CPP #-} {-# LANGUAGE MagicHash #-} +{-# LANGUAGE NumericUnderscores #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE UnboxedTuples #-} @@ -20,7 +21,7 @@ import System.Mem.Weak (deRefWeak, mkWeakPtr) import System.Random (randomIO) import Test.HUnit (Assertion, assert) import Test.QuickCheck -import Test.Tasty (TestTree, testGroup) +import Test.Tasty (TestTree, localOption, mkTimeout, testGroup) import Test.Tasty.HUnit (testCase) import Test.Tasty.QuickCheck (testProperty) @@ -267,7 +268,7 @@ issue420 = do -- Issue 491 issue491 :: TestTree -issue491 = testGroup "issue491" $ +issue491 = localOption (mkTimeout 1_000_000) $ testGroup "issue491" $ [ testCase "1" $ assert $ m [0, -1] `HML.isSubmapOf` m [0, -1] , testCase "2" $ assert $ m [1, 0b11111] `HML.isSubmapOf` m [1, 0b11111] , testCase "3" $ assert $ m [1, 0b11111] `HML.isSubmapOf` m [1, 0b11111, 42] From 295eb84f37840f4aa3dd3952e64735cac78ee54f Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Wed, 24 Sep 2025 21:34:07 +0200 Subject: [PATCH 05/23] Add FIXME --- unordered-containers.cabal | 1 + 1 file changed, 1 insertion(+) diff --git a/unordered-containers.cabal b/unordered-containers.cabal index 685df578..61fbf610 100644 --- a/unordered-containers.cabal +++ b/unordered-containers.cabal @@ -76,6 +76,7 @@ library ghc-options: -Wall -O2 -fwarn-tabs -ferror-spans -- Make tight non-allocating loops interruptible for debugging + -- FIXME: Remove this once #491 is fixed! ghc-options: -fno-omit-yields -- For dumping the generated code: From 6a50dd651f53cb971ae3d536390ad326e8a5cfcf Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Thu, 25 Sep 2025 18:08:51 +0200 Subject: [PATCH 06/23] Improve placement of `within` timeouts --- tests/Properties/HashMapLazy.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/Properties/HashMapLazy.hs b/tests/Properties/HashMapLazy.hs index 97a53489..2c440a67 100644 --- a/tests/Properties/HashMapLazy.hs +++ b/tests/Properties/HashMapLazy.hs @@ -273,15 +273,15 @@ tests = not (HM.isSubmapOf m1 (HM.difference m1 m2)) , testProperty "delete k m ⊆ m" $ \(m :: HMKI) -> - QC.within 1000000 $ not (HM.null m) ==> QC.forAll (QC.elements (HM.keys m)) $ \k -> + QC.within 1000000 $ HM.isSubmapOf (HM.delete k m) m , testProperty "m ⊈ delete k m " $ \(m :: HMKI) -> - QC.within 1000000 $ not (HM.null m) ==> QC.forAll (QC.elements (HM.keys m)) $ \k -> + QC.within 1000000 $ not (HM.isSubmapOf m (HM.delete k m)) , testProperty "k ∉ m ⇒ m ⊆ insert k v m" $ \k v (m :: HMKI) -> From db38c826badb5ef0315302fcdc329e38d0626654 Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Thu, 25 Sep 2025 18:31:10 +0200 Subject: [PATCH 07/23] Clarify `mask` definition --- tests/Util/Key.hs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/Util/Key.hs b/tests/Util/Key.hs index a76d951b..9088d110 100644 --- a/tests/Util/Key.hs +++ b/tests/Util/Key.hs @@ -56,10 +56,11 @@ arbitraryHash = do -- | Mask out most bits to produce more collisions moreCollisions :: Int -> Int -moreCollisions w = fromIntegral (w .&. mask) +moreCollisions w = fromIntegral (w .&. moreCollisionsMask) -mask :: Int -mask = sum [bit n | n <- [0, 3, 8, 14, 61]] +-- | Bitmask for @moreCollisions@ +moreCollisionsMask :: Int +moreCollisionsMask = sum [bit n | n <- [0, 3, 8, 14, 61]] keyToInt :: Key -> Int keyToInt (K h x) = h * fromEnum x From 753e21418a0774987a2de7c1b9b29ec7fb820d5a Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Thu, 25 Sep 2025 20:38:32 +0200 Subject: [PATCH 08/23] Remove use of NumericUnderscores for compatibility ...with GHC 8.2 and 8.4. --- tests/Regressions.hs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/Regressions.hs b/tests/Regressions.hs index 632366c5..7988b64d 100644 --- a/tests/Regressions.hs +++ b/tests/Regressions.hs @@ -1,7 +1,6 @@ {-# LANGUAGE BinaryLiterals #-} {-# LANGUAGE CPP #-} {-# LANGUAGE MagicHash #-} -{-# LANGUAGE NumericUnderscores #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE UnboxedTuples #-} @@ -268,7 +267,7 @@ issue420 = do -- Issue 491 issue491 :: TestTree -issue491 = localOption (mkTimeout 1_000_000) $ testGroup "issue491" $ +issue491 = localOption (mkTimeout 1000000) $ testGroup "issue491" $ [ testCase "1" $ assert $ m [0, -1] `HML.isSubmapOf` m [0, -1] , testCase "2" $ assert $ m [1, 0b11111] `HML.isSubmapOf` m [1, 0b11111] , testCase "3" $ assert $ m [1, 0b11111] `HML.isSubmapOf` m [1, 0b11111, 42] From 2b8fcae9326317f9b1fd6101ace723555d4183fe Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Thu, 25 Sep 2025 22:07:13 +0200 Subject: [PATCH 09/23] Add CI on 32bit Linux (Stolen from the bytestring project) --- .github/workflows/32bit-ci.yml | 71 ++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 .github/workflows/32bit-ci.yml diff --git a/.github/workflows/32bit-ci.yml b/.github/workflows/32bit-ci.yml new file mode 100644 index 00000000..268d6cea --- /dev/null +++ b/.github/workflows/32bit-ci.yml @@ -0,0 +1,71 @@ +name: 32bit-ci +on: + push: + branches: + - master + pull_request: {} # Validate all PRs + +defaults: + run: + shell: bash + +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: true + matrix: + os: [ubuntu-latest] + ghc: ['9.12'] + steps: + - uses: actions/checkout@v4 + - uses: haskell-actions/setup@v2 + id: setup-haskell-cabal + with: + ghc-version: ${{ matrix.ghc }} + - name: Update cabal package database + run: cabal update + - uses: actions/cache@v3 + name: Cache cabal stuff + with: + path: | + ${{ steps.setup-haskell-cabal.outputs.cabal-store }} + dist-newstyle + key: ${{ runner.os }}-${{ matrix.ghc }} + - name: Test + run: | + cabal sdist -z -o . + cabal get unordered-containers-*.tar.gz + cd unordered-containers-*/ + cabal build unordered-containers:tests --enable-tests --enable-benchmarks + cabal test --enable-tests --enable-benchmarks --test-show-details=direct all + - name: Bench + run: | + cd unordered-containers-*/ + cabal bench --enable-tests --enable-benchmarks --benchmark-option=-l all + - name: Haddock + run: | + cd unordered-containers-*/ + cabal haddock all + - name: Cabal check + run: | + cd unordered-containers-*/ + cabal check + + i386: + needs: build + runs-on: ubuntu-latest + container: + image: i386/ubuntu:bionic + steps: + - name: Install + run: | + apt-get update -y + apt-get install -y autoconf build-essential zlib1g-dev libgmp-dev curl libncurses5 libtinfo5 libncurses5-dev libtinfo-dev + curl --proto '=https' --tlsv1.2 -sSf https://get-ghcup.haskell.org | BOOTSTRAP_HASKELL_NONINTERACTIVE=1 BOOTSTRAP_HASKELL_INSTALL_NO_STACK=1 sh + - uses: actions/checkout@v1 #This version must stay old enough to remain compatible with the container image + - name: Test + run: | + source ~/.ghcup/env + cabal update + cabal test From 3958337bebc84ce97f5ff23b4b1d3af3ead1491f Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Fri, 26 Sep 2025 12:55:30 +0200 Subject: [PATCH 10/23] tests: Set utf8 locale ...to ensure Unicode characters can be printed --- tests/Main.hs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tests/Main.hs b/tests/Main.hs index 9e337ad2..5880ba72 100644 --- a/tests/Main.hs +++ b/tests/Main.hs @@ -1,5 +1,6 @@ module Main (main) where +import GHC.IO.Encoding (setLocaleEncoding, utf8) import Test.Tasty (defaultMain, testGroup) import qualified Properties @@ -7,8 +8,10 @@ import qualified Regressions import qualified Strictness main :: IO () -main = defaultMain $ testGroup "All" - [ Properties.tests - , Regressions.tests - , Strictness.tests - ] +main = do + setLocaleEncoding utf8 + defaultMain $ testGroup "All" + [ Properties.tests + , Regressions.tests + , Strictness.tests + ] From 008cab4db6478431739bad3c072183da40dadeca Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Fri, 26 Sep 2025 14:28:47 +0200 Subject: [PATCH 11/23] Update regression tests --- tests/Regressions.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/Regressions.hs b/tests/Regressions.hs index 7988b64d..49278312 100644 --- a/tests/Regressions.hs +++ b/tests/Regressions.hs @@ -270,7 +270,7 @@ issue491 :: TestTree issue491 = localOption (mkTimeout 1000000) $ testGroup "issue491" $ [ testCase "1" $ assert $ m [0, -1] `HML.isSubmapOf` m [0, -1] , testCase "2" $ assert $ m [1, 0b11111] `HML.isSubmapOf` m [1, 0b11111] - , testCase "3" $ assert $ m [1, 0b11111] `HML.isSubmapOf` m [1, 0b11111, 42] + , testCase "3" $ assert $ m [0, 1] `HML.isSubmapOf` m [0, 1, 0b11111] ] where m = HS.toMap . HS.fromList @Int From 52dcfb8f1c0d6324c0555314ca9defc80cfac092 Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Fri, 26 Sep 2025 14:35:12 +0200 Subject: [PATCH 12/23] Try to minimize the new CI config --- .github/workflows/32bit-ci.yml | 43 ---------------------------------- 1 file changed, 43 deletions(-) diff --git a/.github/workflows/32bit-ci.yml b/.github/workflows/32bit-ci.yml index 268d6cea..a37914d6 100644 --- a/.github/workflows/32bit-ci.yml +++ b/.github/workflows/32bit-ci.yml @@ -10,50 +10,7 @@ defaults: shell: bash jobs: - build: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: true - matrix: - os: [ubuntu-latest] - ghc: ['9.12'] - steps: - - uses: actions/checkout@v4 - - uses: haskell-actions/setup@v2 - id: setup-haskell-cabal - with: - ghc-version: ${{ matrix.ghc }} - - name: Update cabal package database - run: cabal update - - uses: actions/cache@v3 - name: Cache cabal stuff - with: - path: | - ${{ steps.setup-haskell-cabal.outputs.cabal-store }} - dist-newstyle - key: ${{ runner.os }}-${{ matrix.ghc }} - - name: Test - run: | - cabal sdist -z -o . - cabal get unordered-containers-*.tar.gz - cd unordered-containers-*/ - cabal build unordered-containers:tests --enable-tests --enable-benchmarks - cabal test --enable-tests --enable-benchmarks --test-show-details=direct all - - name: Bench - run: | - cd unordered-containers-*/ - cabal bench --enable-tests --enable-benchmarks --benchmark-option=-l all - - name: Haddock - run: | - cd unordered-containers-*/ - cabal haddock all - - name: Cabal check - run: | - cd unordered-containers-*/ - cabal check - i386: - needs: build runs-on: ubuntu-latest container: image: i386/ubuntu:bionic From 63b310bb9c337ec86dd1fc2b8b959e3ba89fa169 Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Fri, 26 Sep 2025 14:50:52 +0200 Subject: [PATCH 13/23] Add comment --- .github/workflows/32bit-ci.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/32bit-ci.yml b/.github/workflows/32bit-ci.yml index a37914d6..ccb416e5 100644 --- a/.github/workflows/32bit-ci.yml +++ b/.github/workflows/32bit-ci.yml @@ -1,3 +1,6 @@ +# This config is mostly copied from +# https://github.com/haskell/bytestring/blob/master/.github/workflows/ci.yml + name: 32bit-ci on: push: From 26f931d3b0c12083c6df3ca4c52a5144f49ff312 Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Fri, 26 Sep 2025 15:05:09 +0200 Subject: [PATCH 14/23] Check for overflow in submapBitmapIndexed Fixes #491. --- Data/HashMap/Internal.hs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index 2bb7029c..12ed529f 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -1556,6 +1556,12 @@ submapBitmapIndexed comp !b1 !ary1 !b2 !ary2 = subsetBitmaps && go 0 0 (b1Orb2 . go !i !j !m | m > b1Orb2 = True +#if (WORD_SIZE_IN_BITS == 32) + -- m can overflow to 0 on 32-bit platforms. + -- See #491. + | m == 0 = True +#endif + -- In case a key is both in ary1 and ary2, check ary1[i] <= ary2[j] and -- increment the indices i and j. | b1Andb2 .&. m /= 0 = comp (A.index ary1 i) (A.index ary2 j) && From a2080d994225ebbc490da2b740be55748192a070 Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Fri, 26 Sep 2025 16:18:34 +0200 Subject: [PATCH 15/23] Add comment --- .github/workflows/32bit-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/32bit-ci.yml b/.github/workflows/32bit-ci.yml index ccb416e5..c09a9287 100644 --- a/.github/workflows/32bit-ci.yml +++ b/.github/workflows/32bit-ci.yml @@ -29,3 +29,4 @@ jobs: source ~/.ghcup/env cabal update cabal test + # TODO: Consider testing with -fdebug From 1b5b1eb33881e8d724ed08fe4ef059f9c7a31ac9 Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Fri, 26 Sep 2025 16:21:38 +0200 Subject: [PATCH 16/23] Remove parens --- Data/HashMap/Internal.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index 12ed529f..2033d916 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -1556,7 +1556,7 @@ submapBitmapIndexed comp !b1 !ary1 !b2 !ary2 = subsetBitmaps && go 0 0 (b1Orb2 . go !i !j !m | m > b1Orb2 = True -#if (WORD_SIZE_IN_BITS == 32) +#if WORD_SIZE_IN_BITS == 32 -- m can overflow to 0 on 32-bit platforms. -- See #491. | m == 0 = True From b7a42ba1fe5266cb38089525db5eb6c2440387cb Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Fri, 26 Sep 2025 17:22:19 +0200 Subject: [PATCH 17/23] Add missing #include --- Data/HashMap/Internal.hs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index 2033d916..8234b912 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -15,6 +15,8 @@ {-# OPTIONS_GHC -fno-full-laziness -funbox-strict-fields #-} {-# OPTIONS_HADDOCK not-home #-} +#include "MachDeps.h" + -- | = WARNING -- -- This module is considered __internal__. From 4fbce332fb08d41fde907c38a9f010995c634134 Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Fri, 26 Sep 2025 17:31:14 +0200 Subject: [PATCH 18/23] Revert "Add FIXME" This reverts commit 295eb84f37840f4aa3dd3952e64735cac78ee54f. --- unordered-containers.cabal | 1 - 1 file changed, 1 deletion(-) diff --git a/unordered-containers.cabal b/unordered-containers.cabal index 61fbf610..685df578 100644 --- a/unordered-containers.cabal +++ b/unordered-containers.cabal @@ -76,7 +76,6 @@ library ghc-options: -Wall -O2 -fwarn-tabs -ferror-spans -- Make tight non-allocating loops interruptible for debugging - -- FIXME: Remove this once #491 is fixed! ghc-options: -fno-omit-yields -- For dumping the generated code: From 3ff364ac3dde0321c700c3b597f7b6c54654d56f Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Fri, 26 Sep 2025 17:31:30 +0200 Subject: [PATCH 19/23] Revert "Update ghc-options for debugging" This reverts commit 726d69c6be4120f762fc95a3afe86cc8047966d3. --- unordered-containers.cabal | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/unordered-containers.cabal b/unordered-containers.cabal index 685df578..a0123996 100644 --- a/unordered-containers.cabal +++ b/unordered-containers.cabal @@ -75,9 +75,6 @@ library ghc-options: -Wall -O2 -fwarn-tabs -ferror-spans - -- Make tight non-allocating loops interruptible for debugging - ghc-options: -fno-omit-yields - -- For dumping the generated code: -- ghc-options: -ddump-simpl -ddump-stg-final -ddump-cmm -ddump-asm -ddump-to-file -- ghc-options: -dsuppress-coercions -dsuppress-unfoldings -dsuppress-module-prefixes @@ -118,8 +115,7 @@ test-suite unordered-containers-tests nothunks >= 0.1.3 default-language: Haskell2010 - ghc-options: -Wall -threaded - -- -rtsopts -with-rtsopts=-N -- not sure why this is causing problems on 32bit Debian + ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N cpp-options: -DASSERTS benchmark benchmarks From 9a9abb9eb5b491e7c73bf7e80f48442a33bf78fc Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Fri, 26 Sep 2025 17:38:49 +0200 Subject: [PATCH 20/23] Remove timeouts from property tests --- tests/Properties/HashMapLazy.hs | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/tests/Properties/HashMapLazy.hs b/tests/Properties/HashMapLazy.hs index 29fc9b50..43ef2aa9 100644 --- a/tests/Properties/HashMapLazy.hs +++ b/tests/Properties/HashMapLazy.hs @@ -255,38 +255,29 @@ tests = [ testProperty "model" $ \(x :: HMKI) y -> HM.isSubmapOf x y === M.isSubmapOf (toOrdMap x) (toOrdMap y) , testProperty "m ⊆ m" $ - \(x :: HMKI) -> QC.within 1000000 $ HM.isSubmapOf x x + \(x :: HMKI) -> HM.isSubmapOf x x , testProperty "m1 ⊆ m1 ∪ m2" $ - \(x :: HMKI) y -> QC.within 1000000 $ HM.isSubmapOf x (HM.union x y) + \(x :: HMKI) y -> HM.isSubmapOf x (HM.union x y) , testProperty "m1\\m2 ⊆ m1" $ - \(m1 :: HMKI) (m2 :: HMKI) -> - QC.within 1000000 $ - HM.isSubmapOf (HM.difference m1 m2) m1 + \(m1 :: HMKI) (m2 :: HMKI) -> HM.isSubmapOf (HM.difference m1 m2) m1 , testProperty "m1 ∩ m2 ≠ ∅ ⇒ m1 ⊈ m1\\m2 " $ \(m1 :: HMKI) (m2 :: HMKI) -> - QC.within 1000000 $ not (HM.null (HM.intersection m1 m2)) ==> not (HM.isSubmapOf m1 (HM.difference m1 m2)) , testProperty "delete k m ⊆ m" $ \(m :: HMKI) -> not (HM.null m) ==> QC.forAll (QC.elements (HM.keys m)) $ \k -> - QC.within 1000000 $ HM.isSubmapOf (HM.delete k m) m , testProperty "m ⊈ delete k m " $ \(m :: HMKI) -> not (HM.null m) ==> QC.forAll (QC.elements (HM.keys m)) $ \k -> - QC.within 1000000 $ not (HM.isSubmapOf m (HM.delete k m)) , testProperty "k ∉ m ⇒ m ⊆ insert k v m" $ - \k v (m :: HMKI) -> - QC.within 1000000 $ - not (HM.member k m) ==> HM.isSubmapOf m (HM.insert k v m) + \k v (m :: HMKI) -> not (HM.member k m) ==> HM.isSubmapOf m (HM.insert k v m) , testProperty "k ∉ m ⇒ insert k v m ⊈ m" $ - \k v (m :: HMKI) -> - QC.within 1000000 $ - not (HM.member k m) ==> not (HM.isSubmapOf (HM.insert k v m) m) + \k v (m :: HMKI) -> not (HM.member k m) ==> not (HM.isSubmapOf (HM.insert k v m) m) ] -- Combine , testGroup "union" From 5385b0cf1af99b9b9e3283964791c0c0059399be Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Fri, 26 Sep 2025 17:56:19 +0200 Subject: [PATCH 21/23] Qualify for clarity --- tests/Regressions.hs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/Regressions.hs b/tests/Regressions.hs index 49278312..1b2d3c5c 100644 --- a/tests/Regressions.hs +++ b/tests/Regressions.hs @@ -20,13 +20,14 @@ import System.Mem.Weak (deRefWeak, mkWeakPtr) import System.Random (randomIO) import Test.HUnit (Assertion, assert) import Test.QuickCheck -import Test.Tasty (TestTree, localOption, mkTimeout, testGroup) +import Test.Tasty (TestTree, testGroup) import Test.Tasty.HUnit (testCase) import Test.Tasty.QuickCheck (testProperty) import qualified Data.HashMap.Lazy as HML import qualified Data.HashMap.Strict as HMS import qualified Data.HashSet as HS +import qualified Test.Tasty as Tasty #if MIN_VERSION_base(4,12,0) -- nothunks requires base >= 4.12 @@ -267,7 +268,7 @@ issue420 = do -- Issue 491 issue491 :: TestTree -issue491 = localOption (mkTimeout 1000000) $ testGroup "issue491" $ +issue491 = Tasty.localOption (Tasty.mkTimeout 1000000) $ testGroup "issue491" $ [ testCase "1" $ assert $ m [0, -1] `HML.isSubmapOf` m [0, -1] , testCase "2" $ assert $ m [1, 0b11111] `HML.isSubmapOf` m [1, 0b11111] , testCase "3" $ assert $ m [0, 1] `HML.isSubmapOf` m [0, 1, 0b11111] From dadb5906d1a4d0f0c7a69a618b2bdd4acef62962 Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Fri, 26 Sep 2025 20:51:27 +0200 Subject: [PATCH 22/23] Make bitsPerSubkey platform-dependent --- Data/HashMap/Internal.hs | 54 +++++++++++++++++---------------- Data/HashMap/Internal/Strict.hs | 10 +++--- Data/HashMap/Lazy.hs | 2 +- Data/HashMap/Strict.hs | 2 +- Data/HashSet.hs | 2 +- Data/HashSet/Internal.hs | 2 +- docs/developer-guide.md | 3 +- 7 files changed, 39 insertions(+), 36 deletions(-) diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index 8234b912..40f2b2f5 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -127,9 +127,9 @@ module Data.HashMap.Internal , sparseIndex , two , unionArrayBy - , update32 - , update32M - , update32With' + , updateFullArray + , updateFullArrayM + , updateFullArrayWith' , updateOrConcatWithKey , filterMapAux , equalKeys @@ -832,7 +832,7 @@ insert' h0 k0 v0 m0 = go h0 k0 v0 0 m0 !st' = go h k x (nextShift s) st in if st' `ptrEq` st then t - else Full (update32 ary i st') + else Full (updateFullArray ary i st') where i = index h s go h k x s t@(Collision hy v) | h == hy = Collision h (updateOrSnocWith (\a _ -> (# a #)) k x v) @@ -866,7 +866,7 @@ insertNewKey !h0 !k0 x0 !m0 = go h0 k0 x0 0 m0 go h k x s (Full ary) = let !st = A.index ary i !st' = go h k x (nextShift s) st - in Full (update32 ary i st') + in Full (updateFullArray ary i st') where i = index h s go h k x s t@(Collision hy v) | h == hy = Collision h (A.snoc v (L k x)) @@ -895,7 +895,7 @@ insertKeyExists !collPos0 !h0 !k0 x0 !m0 = go collPos0 h0 k0 x0 m0 go collPos shiftedHash k x (Full ary) = let !st = A.index ary i !st' = go collPos (shiftHash shiftedHash) k x st - in Full (update32 ary i st') + in Full (updateFullArray ary i st') where i = index' shiftedHash go collPos _shiftedHash k x (Collision h v) | collPos >= 0 = Collision h (setAtPosition collPos k x v) @@ -1043,7 +1043,7 @@ insertModifying x f k0 m0 = go h0 k0 0 m0 go h k s t@(Full ary) = let !st = A.index ary i !st' = go h k (nextShift s) st - ary' = update32 ary i $! st' + ary' = updateFullArray ary i $! st' in if ptrEq st st' then t else Full ary' @@ -1272,7 +1272,7 @@ adjust# f k0 m0 = go h0 k0 0 m0 let i = index h s !st = A.index ary i !st' = go h k (nextShift s) st - ary' = update32 ary i $! st' + ary' = updateFullArray ary i $! st' in if ptrEq st st' then t else Full ary' @@ -1558,12 +1558,6 @@ submapBitmapIndexed comp !b1 !ary1 !b2 !ary2 = subsetBitmaps && go 0 0 (b1Orb2 . go !i !j !m | m > b1Orb2 = True -#if WORD_SIZE_IN_BITS == 32 - -- m can overflow to 0 on 32-bit platforms. - -- See #491. - | m == 0 = True -#endif - -- In case a key is both in ary1 and ary2, check ary1[i] <= ary2[j] and -- increment the indices i and j. | b1Andb2 .&. m /= 0 = comp (A.index ary1 i) (A.index ary2 j) && @@ -1668,12 +1662,12 @@ unionWithKey f = go 0 go s (Full ary1) t2 = let h2 = leafHashCode t2 i = index h2 s - ary' = update32With' ary1 i $ \st1 -> go (nextShift s) st1 t2 + ary' = updateFullArrayWith' ary1 i $ \st1 -> go (nextShift s) st1 t2 in Full ary' go s t1 (Full ary2) = let h1 = leafHashCode t1 i = index h1 s - ary' = update32With' ary2 i $ \st2 -> go (nextShift s) t1 st2 + ary' = updateFullArrayWith' ary2 i $ \st2 -> go (nextShift s) t1 st2 in Full ary' leafHashCode (Leaf h _) = h @@ -2414,24 +2408,24 @@ subsetArray cmpV ary1 ary2 = A.length ary1 <= A.length ary2 && A.all inAry2 ary1 -- Manually unrolled loops -- | \(O(n)\) Update the element at the given position in this array. -update32 :: A.Array e -> Int -> e -> A.Array e -update32 ary idx b = runST (update32M ary idx b) -{-# INLINE update32 #-} +updateFullArray :: A.Array e -> Int -> e -> A.Array e +updateFullArray ary idx b = runST (updateFullArrayM ary idx b) +{-# INLINE updateFullArray #-} -- | \(O(n)\) Update the element at the given position in this array. -update32M :: A.Array e -> Int -> e -> ST s (A.Array e) -update32M ary idx b = do +updateFullArrayM :: A.Array e -> Int -> e -> ST s (A.Array e) +updateFullArrayM ary idx b = do mary <- clone ary A.write mary idx b A.unsafeFreeze mary -{-# INLINE update32M #-} +{-# INLINE updateFullArrayM #-} -- | \(O(n)\) Update the element at the given position in this array, by applying a function to it. -update32With' :: A.Array e -> Int -> (e -> e) -> A.Array e -update32With' ary idx f +updateFullArrayWith' :: A.Array e -> Int -> (e -> e) -> A.Array e +updateFullArrayWith' ary idx f | (# x #) <- A.index# ary idx - = update32 ary idx $! f x -{-# INLINE update32With' #-} + = updateFullArray ary idx $! f x +{-# INLINE updateFullArrayWith' #-} -- | Unsafely clone an array of (2^bitsPerSubkey) elements. The length of the input -- array is not checked. @@ -2448,8 +2442,16 @@ clone ary = -- | Number of bits that are inspected at each level of the hash tree. -- -- This constant is named /t/ in the original /Ideal Hash Trees/ paper. +-- +-- Note that this constant is platform-dependent. On 32-bit platforms we use +-- '4', because bitmaps using '2^5' bits turned out to be prone to integer +-- overflow bugs. See #491 for instance. bitsPerSubkey :: Int +#if WORD_SIZE_IN_BITS < 64 +bitsPerSubkey = 4 +#else bitsPerSubkey = 5 +#endif -- | The size of a 'Full' node, i.e. @2 ^ 'bitsPerSubkey'@. maxChildren :: Int diff --git a/Data/HashMap/Internal/Strict.hs b/Data/HashMap/Internal/Strict.hs index a76bffb8..ce9a48fa 100644 --- a/Data/HashMap/Internal/Strict.hs +++ b/Data/HashMap/Internal/Strict.hs @@ -39,7 +39,7 @@ -- strings. -- -- Many operations have a average-case complexity of \(O(\log n)\). The --- implementation uses a large base (i.e. 32) so in practice these +-- implementation uses a large base (i.e. 16 or 32) so in practice these -- operations are constant time. module Data.HashMap.Internal.Strict ( @@ -211,7 +211,7 @@ insertWith f k0 v0 m0 = go h0 k0 v0 0 m0 go h k x s (Full ary) = let st = A.index ary i st' = go h k x (nextShift s) st - ary' = HM.update32 ary i $! st' + ary' = HM.updateFullArray ary i $! st' in Full ary' where i = index h s go h k x s t@(Collision hy v) @@ -282,7 +282,7 @@ adjust f k0 m0 = go h0 k0 0 m0 let i = index h s st = A.index ary i st' = go h k (nextShift s) st - ary' = HM.update32 ary i $! st' + ary' = HM.updateFullArray ary i $! st' in Full ary' go h k _ t@(Collision hy v) | h == hy = Collision h (updateWith f k v) @@ -516,12 +516,12 @@ unionWithKey f = go 0 go s (Full ary1) t2 = let h2 = leafHashCode t2 i = index h2 s - ary' = HM.update32With' ary1 i $ \st1 -> go (nextShift s) st1 t2 + ary' = HM.updateFullArrayWith' ary1 i $ \st1 -> go (nextShift s) st1 t2 in Full ary' go s t1 (Full ary2) = let h1 = leafHashCode t1 i = index h1 s - ary' = HM.update32With' ary2 i $ \st2 -> go (nextShift s) t1 st2 + ary' = HM.updateFullArrayWith' ary2 i $ \st2 -> go (nextShift s) t1 st2 in Full ary' leafHashCode (Leaf h _) = h diff --git a/Data/HashMap/Lazy.hs b/Data/HashMap/Lazy.hs index 82697c0f..80e3894e 100644 --- a/Data/HashMap/Lazy.hs +++ b/Data/HashMap/Lazy.hs @@ -20,7 +20,7 @@ -- strings. -- -- Many operations have a average-case complexity of \(O(\log n)\). The --- implementation uses a large base (i.e. 32) so in practice these +-- implementation uses a large base (i.e. 16 or 32) so in practice these -- operations are constant time. module Data.HashMap.Lazy ( diff --git a/Data/HashMap/Strict.hs b/Data/HashMap/Strict.hs index cd5baa6c..c1d30e88 100644 --- a/Data/HashMap/Strict.hs +++ b/Data/HashMap/Strict.hs @@ -19,7 +19,7 @@ -- strings. -- -- Many operations have a average-case complexity of \(O(\log n)\). The --- implementation uses a large base (i.e. 32) so in practice these +-- implementation uses a large base (i.e. 16 or 32) so in practice these -- operations are constant time. module Data.HashMap.Strict ( diff --git a/Data/HashSet.hs b/Data/HashSet.hs index 6d589189..330af38a 100644 --- a/Data/HashSet.hs +++ b/Data/HashSet.hs @@ -87,7 +87,7 @@ especially when value comparisons are expensive, as in the case of strings. Many operations have a average-case complexity of \(O(\log n)\). The -implementation uses a large base (i.e. 16) so in practice these +implementation uses a large base (i.e. 16 or 32) so in practice these operations are constant time. -} diff --git a/Data/HashSet/Internal.hs b/Data/HashSet/Internal.hs index be1af480..38cf1828 100644 --- a/Data/HashSet/Internal.hs +++ b/Data/HashSet/Internal.hs @@ -37,7 +37,7 @@ -- strings. -- -- Many operations have a average-case complexity of \(O(\log n)\). The --- implementation uses a large base (i.e. 32) so in practice these +-- implementation uses a large base (i.e. 16 or 32) so in practice these -- operations are constant time. module Data.HashSet.Internal diff --git a/docs/developer-guide.md b/docs/developer-guide.md index 5af0c515..5faad514 100644 --- a/docs/developer-guide.md +++ b/docs/developer-guide.md @@ -103,7 +103,8 @@ Here's a quick overview in order of simplicity: it contains *2^B* elements. The number of bits of the hash value to use at each level of the tree, *B*, is a -compile time constant, currently 5. In general a larger *B* improves lookup +compile time constant, currently 5 on 64-bit platforms, and 4 on platforms with +a smaller word size. In general a larger *B* improves lookup performance (shallower tree) but hurts modification (large nodes to copy when updating the spine of the tree). From 1607e714d42fbbf6ac5fdbff4a8bc824ee656092 Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Sat, 27 Sep 2025 12:50:36 +0200 Subject: [PATCH 23/23] Add a note to submapBitmapIndexed --- Data/HashMap/Internal.hs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index 40f2b2f5..61c0edd2 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -1556,6 +1556,9 @@ submapBitmapIndexed comp !b1 !ary1 !b2 !ary2 = subsetBitmaps && go 0 0 (b1Orb2 . where go :: Int -> Int -> Bitmap -> Bool go !i !j !m + + -- Note: m can overflow to 0 when maxChildren == WORD_SIZE_IN_BITS. See + -- #491. In that case there needs to be a check '| m == 0 = True' | m > b1Orb2 = True -- In case a key is both in ary1 and ary2, check ary1[i] <= ary2[j] and