From b704b590060441b3318c504935e7cf41e5c43209 Mon Sep 17 00:00:00 2001 From: Matthew Pickering Date: Thu, 13 Jun 2024 15:58:26 +0100 Subject: [PATCH] Fix quadratic memory usage in GetLocatedImports At startup `GetLocatedImports` is called on all known files. Say you have 10000 modules in your project then this leads to 10000 calls to GetLocatedImports running concurrently. In `GetLocatedImports` the known targets are consulted and the targetsMap is created by mapping the known targets. This map is used for introducing sharing amongst filepaths. This operation copies a local copy of the `target` map which is local to the rule. ``` let targetsMap = HMap.mapWithKey const targets ``` So now each rule has a hashmap of size 10000 held locally to it and depending on how the threads are scheduled there will be 10000^2 elements in total allocated in hashmaps. This used a lot of memory. Solution: Return the normalising map in the result of the `GetKnownTargets` rule so it is shared across threads. Fixes #4317 --- .../session-loader/Development/IDE/Session.hs | 5 +- ghcide/src/Development/IDE/Core/Rules.hs | 3 +- ghcide/src/Development/IDE/Core/Shake.hs | 4 +- .../src/Development/IDE/Plugin/Completions.hs | 2 +- .../src/Development/IDE/Types/KnownTargets.hs | 53 +++++++++++++++++-- 5 files changed, 56 insertions(+), 11 deletions(-) diff --git a/ghcide/session-loader/Development/IDE/Session.hs b/ghcide/session-loader/Development/IDE/Session.hs index 81cada0455..613052edf1 100644 --- a/ghcide/session-loader/Development/IDE/Session.hs +++ b/ghcide/session-loader/Development/IDE/Session.hs @@ -505,13 +505,12 @@ loadSessionWithOptions recorder SessionLoadingOptions{..} rootDir que = do return [(targetTarget, Set.fromList found)] hasUpdate <- atomically $ do known <- readTVar knownTargetsVar - let known' = flip mapHashed known $ \k -> - HM.unionWith (<>) k $ HM.fromList knownTargets + let known' = flip mapHashed known $ \k -> unionKnownTargets k (mkKnownTargets knownTargets) hasUpdate = if known /= known' then Just (unhashed known') else Nothing writeTVar knownTargetsVar known' pure hasUpdate for_ hasUpdate $ \x -> - logWith recorder Debug $ LogKnownFilesUpdated x + logWith recorder Debug $ LogKnownFilesUpdated (targetMap x) return $ toNoFileKey GetKnownTargets -- Create a new HscEnv from a hieYaml root and a set of options diff --git a/ghcide/src/Development/IDE/Core/Rules.hs b/ghcide/src/Development/IDE/Core/Rules.hs index a10323f3fe..13f6db6f69 100644 --- a/ghcide/src/Development/IDE/Core/Rules.hs +++ b/ghcide/src/Development/IDE/Core/Rules.hs @@ -321,8 +321,7 @@ getLocatedImportsRule :: Recorder (WithPriority Log) -> Rules () getLocatedImportsRule recorder = define (cmapWithPrio LogShake recorder) $ \GetLocatedImports file -> do ModSummaryResult{msrModSummary = ms} <- use_ GetModSummaryWithoutTimestamps file - targets <- useNoFile_ GetKnownTargets - let targetsMap = HM.mapWithKey const targets + (KnownTargets targets targetsMap) <- useNoFile_ GetKnownTargets let imports = [(False, imp) | imp <- ms_textual_imps ms] ++ [(True, imp) | imp <- ms_srcimps ms] env_eq <- use_ GhcSession file let env = hscEnvWithImportPaths env_eq diff --git a/ghcide/src/Development/IDE/Core/Shake.hs b/ghcide/src/Development/IDE/Core/Shake.hs index d426ba34f8..25493da9a4 100644 --- a/ghcide/src/Development/IDE/Core/Shake.hs +++ b/ghcide/src/Development/IDE/Core/Shake.hs @@ -24,7 +24,7 @@ module Development.IDE.Core.Shake( IdeState, shakeSessionInit, shakeExtras, shakeDb, rootDir, ShakeExtras(..), getShakeExtras, getShakeExtrasRules, - KnownTargets, Target(..), toKnownFiles, + KnownTargets(..), Target(..), toKnownFiles, unionKnownTargets, mkKnownTargets, IdeRule, IdeResult, GetModificationTime(GetModificationTime, GetModificationTime_, missingFileDiagnostics), shakeOpen, shakeShut, @@ -691,7 +691,7 @@ shakeOpen recorder lspEnv defaultConfig idePlugins debouncer publishedDiagnostics <- STM.newIO semanticTokensCache <- STM.newIO positionMapping <- STM.newIO - knownTargetsVar <- newTVarIO $ hashed HMap.empty + knownTargetsVar <- newTVarIO $ hashed emptyKnownTargets let restartShakeSession = shakeRestart recorder ideState persistentKeys <- newTVarIO mempty indexPending <- newTVarIO HMap.empty diff --git a/ghcide/src/Development/IDE/Plugin/Completions.hs b/ghcide/src/Development/IDE/Plugin/Completions.hs index ad9f4fe6f5..98ca6dc592 100644 --- a/ghcide/src/Development/IDE/Plugin/Completions.hs +++ b/ghcide/src/Development/IDE/Plugin/Completions.hs @@ -180,7 +180,7 @@ getCompletionsLSP ide plId pm <- useWithStaleFast GetParsedModule npath binds <- fromMaybe (mempty, zeroMapping) <$> useWithStaleFast GetBindings npath knownTargets <- liftIO $ runAction "Completion" ide $ useNoFile GetKnownTargets - let localModules = maybe [] Map.keys knownTargets + let localModules = maybe [] (Map.keys . targetMap) knownTargets let lModules = mempty{importableModules = map toModueNameText localModules} -- set up the exports map including both package and project-level identifiers packageExportsMapIO <- fmap(envPackageExports . fst) <$> useWithStaleFast GhcSession npath diff --git a/ghcide/src/Development/IDE/Types/KnownTargets.hs b/ghcide/src/Development/IDE/Types/KnownTargets.hs index 5e14816c7f..6ae6d52ba3 100644 --- a/ghcide/src/Development/IDE/Types/KnownTargets.hs +++ b/ghcide/src/Development/IDE/Types/KnownTargets.hs @@ -1,6 +1,11 @@ {-# LANGUAGE DeriveAnyClass #-} {-# LANGUAGE DerivingStrategies #-} -module Development.IDE.Types.KnownTargets (KnownTargets, Target(..), toKnownFiles) where +module Development.IDE.Types.KnownTargets ( KnownTargets(..) + , emptyKnownTargets + , mkKnownTargets + , unionKnownTargets + , Target(..) + , toKnownFiles) where import Control.DeepSeq import Data.Hashable @@ -14,11 +19,53 @@ import Development.IDE.Types.Location import GHC.Generics -- | A mapping of module name to known files -type KnownTargets = HashMap Target (HashSet NormalizedFilePath) +data KnownTargets = KnownTargets + { targetMap :: !(HashMap Target (HashSet NormalizedFilePath)) + -- | 'normalisingMap' is a cached copy of `HMap.mapKey const targetMap` + -- + -- At startup 'GetLocatedImports' is called on all known files. Say you have 10000 + -- modules in your project then this leads to 10000 calls to 'GetLocatedImports' + -- running concurrently. + -- + -- In `GetLocatedImports` the known targets are consulted and the targetsMap + -- is created by mapping the known targets. This map is used for introducing + -- sharing amongst filepaths. This operation copies a local copy of the `target` + -- map which is local to the rule. + -- + -- @ + -- let targetsMap = HMap.mapWithKey const targets + -- @ + -- + -- So now each rule has a 'HashMap' of size 10000 held locally to it and depending + -- on how the threads are scheduled there will be 10000^2 elements in total + -- allocated in 'HashMap's. This used a lot of memory. + -- + -- Solution: Return the 'normalisingMap' in the result of the `GetKnownTargets` rule so it is shared across threads. + , normalisingMap :: !(HashMap Target Target) } deriving Show + + +unionKnownTargets :: KnownTargets -> KnownTargets -> KnownTargets +unionKnownTargets (KnownTargets tm nm) (KnownTargets tm' nm') = + KnownTargets (HMap.unionWith (<>) tm tm') (HMap.union nm nm') + +mkKnownTargets :: [(Target, HashSet NormalizedFilePath)] -> KnownTargets +mkKnownTargets vs = KnownTargets (HMap.fromList vs) (HMap.fromList [(k,k) | (k,_) <- vs ]) + +instance NFData KnownTargets where + rnf (KnownTargets tm nm) = rnf tm `seq` rnf nm `seq` () + +instance Eq KnownTargets where + k1 == k2 = targetMap k1 == targetMap k2 + +instance Hashable KnownTargets where + hashWithSalt s (KnownTargets hm _) = hashWithSalt s hm + +emptyKnownTargets :: KnownTargets +emptyKnownTargets = KnownTargets HMap.empty HMap.empty data Target = TargetModule ModuleName | TargetFile NormalizedFilePath deriving ( Eq, Ord, Generic, Show ) deriving anyclass (Hashable, NFData) toKnownFiles :: KnownTargets -> HashSet NormalizedFilePath -toKnownFiles = HSet.unions . HMap.elems +toKnownFiles = HSet.unions . HMap.elems . targetMap