Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize semantic token extraction logic #4050

Merged
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import Control.Monad.Trans (lift)
import Control.Monad.Trans.Except (runExceptT)
import qualified Data.Map.Strict as M
import qualified Data.Set as S

Check warning on line 21 in plugins/hls-semantic-tokens-plugin/src/Ide/Plugin/SemanticTokens/Internal.hs

View workflow job for this annotation

GitHub Actions / flags (9.8, ubuntu-latest)

The qualified import of ‘Data.Set’ is redundant

Check warning on line 21 in plugins/hls-semantic-tokens-plugin/src/Ide/Plugin/SemanticTokens/Internal.hs

View workflow job for this annotation

GitHub Actions / flags (9.2, ubuntu-latest)

The qualified import of ‘Data.Set’ is redundant

Check warning on line 21 in plugins/hls-semantic-tokens-plugin/src/Ide/Plugin/SemanticTokens/Internal.hs

View workflow job for this annotation

GitHub Actions / flags (9.4, ubuntu-latest)

The qualified import of ‘Data.Set’ is redundant

Check warning on line 21 in plugins/hls-semantic-tokens-plugin/src/Ide/Plugin/SemanticTokens/Internal.hs

View workflow job for this annotation

GitHub Actions / flags (9.6, ubuntu-latest)

The qualified import of ‘Data.Set’ is redundant
import Development.IDE (Action,
GetDocMap (GetDocMap),
GetHieAst (GetHieAst),
Expand Down Expand Up @@ -48,7 +48,7 @@
import Ide.Plugin.SemanticTokens.Mappings
import Ide.Plugin.SemanticTokens.Query
import Ide.Plugin.SemanticTokens.SemanticConfig (mkSemanticConfigFunctions)
import Ide.Plugin.SemanticTokens.Tokenize (hieAstSpanIdentifiers)
import Ide.Plugin.SemanticTokens.Tokenize (computeRangeHsSemanticTokenTypeList)
import Ide.Plugin.SemanticTokens.Types
import Ide.Types
import qualified Language.LSP.Protocol.Lens as L
Expand All @@ -69,8 +69,8 @@
computeSemanticTokens recorder pid _ nfp = do
config <- lift $ useSemanticConfigAction pid
logWith recorder Debug (LogConfig config)
(RangeHsSemanticTokenTypes {rangeSemanticMap}, mapping) <- useWithStaleE GetSemanticTokens nfp
withExceptT PluginInternalError $ liftEither $ rangeSemanticMapSemanticTokens config mapping rangeSemanticMap
(RangeHsSemanticTokenTypes {rangeSemanticList}, mapping) <- useWithStaleE GetSemanticTokens nfp
withExceptT PluginInternalError $ liftEither $ rangeSemanticsSemanticTokens config mapping rangeSemanticList

semanticTokensFull :: Recorder (WithPriority SemanticLog) -> PluginMethodHandler IdeState 'Method_TextDocumentSemanticTokensFull
semanticTokensFull recorder state pid param = do
Expand All @@ -96,26 +96,8 @@
(DKMap {getTyThingMap}, _) <- lift $ useWithStale_ GetDocMap nfp
ast <- handleMaybe (LogNoAST $ show nfp) $ getAsts hieAst M.!? (HiePath . mkFastString . fromNormalizedFilePath) nfp
virtualFile <- handleMaybeM LogNoVF $ getVirtualFile nfp
-- get current location from the old ones
let spanIdMap = M.filter (not . null) $ hieAstSpanIdentifiers virtualFile ast
let names = S.unions $ M.elems spanIdMap
let localSemanticMap = mkLocalIdSemanticFromAst names (hieKindFunMasksKind hieKind) refMap
-- get imported name semantic map
let importedIdSemanticMap = M.mapMaybe id
$ M.fromSet (getTypeThing getTyThingMap) (names `S.difference` M.keysSet localSemanticMap)
let sMap = M.unionWith (<>) importedIdSemanticMap localSemanticMap
let rangeTokenType = extractSemanticTokensFromNames sMap spanIdMap
return $ RangeHsSemanticTokenTypes rangeTokenType
where
getTypeThing ::
NameEnv TyThing ->
Identifier ->
Maybe HsSemanticTokenType
getTypeThing tyThingMap n
| (Right name) <- n =
let tyThing = lookupNameEnv tyThingMap name
in (tyThing >>= tyThingSemantic)
| otherwise = Nothing
let hsFinder = idSemantic getTyThingMap (hieKindFunMasksKind hieKind) refMap
return $ computeRangeHsSemanticTokenTypeList hsFinder virtualFile ast

-- | Persistent rule to ensure that semantic tokens doesn't block on startup
persistentGetSemanticTokensRule :: Rules ()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,6 @@ import Language.LSP.VFS hiding (line)

-- * 0. Mapping name to Hs semantic token type.

idInfixOperator :: Identifier -> Maybe HsSemanticTokenType
idInfixOperator (Right name) = nameInfixOperator name
idInfixOperator _ = Nothing

nameInfixOperator :: Name -> Maybe HsSemanticTokenType
nameInfixOperator name | isSymOcc (nameOccName name) = Just TOperator
nameInfixOperator _ = Nothing
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
-- The query module is used to query the semantic tokens from the AST
module Ide.Plugin.SemanticTokens.Query where

import Control.Applicative ((<|>))
import Data.Foldable (fold)
import qualified Data.Map.Strict as M
import Data.Maybe (listToMaybe, mapMaybe)
import Data.Set (Set)
import qualified Data.Set as Set
import Data.Text (Text)
import Development.IDE.Core.PositionMapping (PositionMapping,
Expand All @@ -17,8 +17,7 @@ import Development.IDE.GHC.Compat
import Ide.Plugin.SemanticTokens.Mappings
import Ide.Plugin.SemanticTokens.Types (HieFunMaskKind,
HsSemanticTokenType (TModule),
IdSemanticMap,
RangeIdSetMap,
RangeSemanticTokenTypeList,
SemanticTokensConfig)
import Language.LSP.Protocol.Types (Position (Position),
Range (Range),
Expand All @@ -30,44 +29,48 @@ import Prelude hiding (length, span)

---------------------------------------------------------

-- * extract semantic map from HieAst for local variables
-- * extract semantic

---------------------------------------------------------

mkLocalIdSemanticFromAst :: Set Identifier -> HieFunMaskKind a -> RefMap a -> IdSemanticMap
mkLocalIdSemanticFromAst names hieKind rm = M.mapMaybe (idIdSemanticFromHie hieKind rm) $ M.fromSet id names
idSemantic :: forall a. NameEnv TyThing -> HieFunMaskKind a -> RefMap a -> Identifier -> Maybe HsSemanticTokenType
idSemantic _ _ _ (Left _) = Just TModule
idSemantic tyThingMap hieKind rm (Right n) =
nameSemanticFromHie hieKind rm n -- local name
<|> (lookupNameEnv tyThingMap n >>= tyThingSemantic) -- global name

idIdSemanticFromHie :: forall a. HieFunMaskKind a -> RefMap a -> Identifier -> Maybe HsSemanticTokenType
idIdSemanticFromHie _ _ (Left _) = Just TModule
idIdSemanticFromHie hieKind rm ns = do
idSemanticFromRefMap rm ns

---------------------------------------------------------

-- * extract semantic from HieAst for local variables

---------------------------------------------------------

nameSemanticFromHie :: forall a. HieFunMaskKind a -> RefMap a -> Name -> Maybe HsSemanticTokenType
nameSemanticFromHie hieKind rm n = do
idSemanticFromRefMap rm (Right n)
where
idSemanticFromRefMap :: RefMap a -> Identifier -> Maybe HsSemanticTokenType
idSemanticFromRefMap rm' name' = do
spanInfos <- M.lookup name' rm'
let typeTokenType = foldMap (typeSemantic hieKind) $ listToMaybe $ mapMaybe (identType . snd) spanInfos
contextInfoTokenType <- foldMap (contextInfosMaybeTokenType . identInfo . snd) spanInfos
fold [typeTokenType, Just contextInfoTokenType, idInfixOperator ns]
fold [typeTokenType, Just contextInfoTokenType, nameInfixOperator n]

contextInfosMaybeTokenType :: Set.Set ContextInfo -> Maybe HsSemanticTokenType
contextInfosMaybeTokenType details = foldMap infoTokenType (Set.toList details)


-------------------------------------------------

-- * extract semantic tokens from IdSemanticMap
-- * extract lsp semantic tokens from RangeSemanticTokenTypeList

-------------------------------------------------

extractSemanticTokensFromNames :: IdSemanticMap -> RangeIdSetMap -> M.Map Range HsSemanticTokenType
extractSemanticTokensFromNames nsm = M.mapMaybe (foldMap (`M.lookup` nsm))

rangeSemanticMapSemanticTokens :: SemanticTokensConfig -> PositionMapping -> M.Map Range HsSemanticTokenType -> Either Text SemanticTokens
rangeSemanticMapSemanticTokens stc mapping =
rangeSemanticsSemanticTokens :: SemanticTokensConfig -> PositionMapping -> RangeSemanticTokenTypeList -> Either Text SemanticTokens
rangeSemanticsSemanticTokens stc mapping =
makeSemanticTokens defaultSemanticTokensLegend
. mapMaybe (\(range, ty) -> flip toAbsSemanticToken ty <$> range)
. M.toAscList
. M.mapKeys (toCurrentRange mapping)
. mapMaybe (\(ran, tk) -> toAbsSemanticToken <$> toCurrentRange mapping ran <*> return tk)
where
toAbsSemanticToken :: Range -> HsSemanticTokenType -> SemanticTokenAbsolute
toAbsSemanticToken (Range (Position startLine startColumn) (Position _endLine endColumn)) tokenType =
Expand Down
Original file line number Diff line number Diff line change
@@ -1,18 +1,17 @@
{-# LANGUAGE OverloadedRecordDot #-}
{-# LANGUAGE OverloadedStrings #-}

module Ide.Plugin.SemanticTokens.Tokenize (hieAstSpanIdentifiers) where
module Ide.Plugin.SemanticTokens.Tokenize (computeRangeHsSemanticTokenTypeList) where

import Control.Lens (Identity (runIdentity))
import Control.Monad (forM_, guard)
import Control.Monad.State.Strict (MonadState (get),
MonadTrans (lift),
execStateT, modify, put)
import Control.Monad.Trans.State.Strict (StateT)
import Control.Monad.Trans.State.Strict (StateT, modify')
import Data.Char (isAlphaNum)
import qualified Data.Map.Strict as M
import qualified Data.Map.Strict as Map
import qualified Data.Set as S
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Rope as Char
Expand All @@ -22,42 +21,50 @@ import Data.Text.Utf16.Rope.Mixed (Rope)
import qualified Data.Text.Utf16.Rope.Mixed as Rope
import Development.IDE.GHC.Compat
import Development.IDE.GHC.Error (realSrcSpanToCodePointRange)
import Ide.Plugin.SemanticTokens.Types (RangeIdSetMap)
import Ide.Plugin.SemanticTokens.Types (HsSemanticTokenType (TModule),
RangeHsSemanticTokenTypes (..))
import Language.LSP.Protocol.Types (Position (Position),
Range (Range), UInt, mkRange)
import Language.LSP.VFS hiding (line)
import Prelude hiding (length, span)

type Tokenizer m a = StateT PTokenState m a
type HsSemanticLookup = Identifier -> Maybe HsSemanticTokenType


data PTokenState = PTokenState
{ rangeIdSetMap :: !RangeIdSetMap,
rope :: !Rope, -- the remains of rope we are working on
cursor :: !Char.Position, -- the cursor position of the current rope to the start of the original file in code point position
columnsInUtf16 :: !UInt -- the column of the start of the current rope in utf16
{
rope :: !Rope -- the remains of rope we are working on
, cursor :: !Char.Position -- the cursor position of the current rope to the start of the original file in code point position
, columnsInUtf16 :: !UInt -- the column of the start of the current rope in utf16
, rangeHsSemanticList :: [(Range, HsSemanticTokenType)] -- (range, token type) in reverse order
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we move this out the state? It seems like all the other Tokenizer functions return (), instead we could just return [(Range, HsSemanticTokenType)]

Copy link
Collaborator Author

@soulomoon soulomoon Feb 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, maybe we can do a CPS to collect them one by one or switch to a structure with better concatenation time complexity.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DList fit into it. I've switch to Dlist and remove rangeHsSemanticList out the state

}

runTokenizer :: (Monad m) => Tokenizer m a -> PTokenState -> m RangeIdSetMap
runTokenizer p st = rangeIdSetMap <$> execStateT p st
runTokenizer :: (Monad m) => Tokenizer m a -> PTokenState -> m [(Range, HsSemanticTokenType)]
runTokenizer p st = reverse . rangeHsSemanticList <$> execStateT p st

data SplitResult
= NoSplit (Text, Range) -- does not need to split, token text, token range
| Split (Text, Range, Range) -- token text, prefix range(module range), token range
deriving (Show)

getSplitTokenText :: SplitResult -> Text
getSplitTokenText (NoSplit (t, _)) = t
getSplitTokenText (Split (t, _, _)) = t


mkPTokenState :: VirtualFile -> PTokenState
mkPTokenState vf =
PTokenState
{ rangeIdSetMap = mempty,
{
rope = Rope.fromText $ toText vf._file_text,
cursor = Char.Position 0 0,
columnsInUtf16 = 0
columnsInUtf16 = 0,
rangeHsSemanticList = []
}

addRangeIdSetMap :: (Monad m) => Range -> Identifier -> Tokenizer m ()
addRangeIdSetMap r i = modify $ \s -> s {rangeIdSetMap = Map.insertWith (<>) r (S.singleton i) $ rangeIdSetMap s}
addRangeHsSemanticList :: (Monad m) => (Range, HsSemanticTokenType) -> Tokenizer m ()
addRangeHsSemanticList r = modify' $ \s -> s {rangeHsSemanticList = r : rangeHsSemanticList s}

-- lift a Tokenizer Maybe () to Tokenizer m (),
-- if the Maybe is Nothing, do nothing, recover the state
Expand All @@ -67,18 +74,19 @@ liftMaybeM p = do
st <- get
forM_ (execStateT p st) put

hieAstSpanIdentifiers :: VirtualFile -> HieAST a -> RangeIdSetMap
hieAstSpanIdentifiers vf ast = runIdentity $ runTokenizer (foldAst ast) (mkPTokenState vf)
computeRangeHsSemanticTokenTypeList :: HsSemanticLookup -> VirtualFile -> HieAST a -> RangeHsSemanticTokenTypes
computeRangeHsSemanticTokenTypeList lookupHsTokenType vf ast =
RangeHsSemanticTokenTypes $ runIdentity $ runTokenizer (foldAst lookupHsTokenType ast) (mkPTokenState vf)

-- | foldAst
-- visit every leaf node in the ast in depth first order
foldAst :: (Monad m) => HieAST t -> Tokenizer m ()
foldAst ast = if null (nodeChildren ast)
then liftMaybeM (visitLeafIds ast)
else mapM_ foldAst $ nodeChildren ast
foldAst :: (Monad m) => HsSemanticLookup -> HieAST t -> Tokenizer m ()
foldAst lookupHsTokenType ast = if null (nodeChildren ast)
then liftMaybeM (visitLeafIds lookupHsTokenType ast)
else mapM_ (foldAst lookupHsTokenType) $ nodeChildren ast

visitLeafIds :: HieAST t -> Tokenizer Maybe ()
visitLeafIds leaf = liftMaybeM $ do
visitLeafIds :: HsSemanticLookup -> HieAST t -> Tokenizer Maybe ()
visitLeafIds lookupHsTokenType leaf = liftMaybeM $ do
let span = nodeSpan leaf
(ran, token) <- focusTokenAt leaf
-- if `focusTokenAt` succeed, we can safely assume we have shift the cursor correctly
Expand All @@ -87,30 +95,37 @@ visitLeafIds leaf = liftMaybeM $ do
-- only handle the leaf node with single column token
guard $ srcSpanStartLine span == srcSpanEndLine span
splitResult <- lift $ splitRangeByText token ran
mapM_ (combineNodeIds ran splitResult) $ Map.filterWithKey (\k _ -> k == SourceInfo) $ getSourcedNodeInfo $ sourcedNodeInfo leaf
mapM_ (combineNodeIds lookupHsTokenType ran splitResult) $ Map.filterWithKey (\k _ -> k == SourceInfo) $ getSourcedNodeInfo $ sourcedNodeInfo leaf
where
combineNodeIds :: (Monad m) => Range -> SplitResult -> NodeInfo a -> Tokenizer m ()
combineNodeIds ran ranSplit (NodeInfo _ _ bd) = mapM_ (getIdentifier ran ranSplit) (M.keys bd)
getIdentifier :: (Monad m) => Range -> SplitResult -> Identifier -> Tokenizer m ()
getIdentifier ran ranSplit idt = liftMaybeM $ do
combineNodeIds :: (Monad m) => HsSemanticLookup -> Range -> SplitResult -> NodeInfo a -> Tokenizer m ()
combineNodeIds lookupHsTokenType ran ranSplit (NodeInfo _ _ bd) =
case (maybeTokenType, ranSplit) of
(Nothing, _) -> return ()
(Just TModule, _) -> addRangeHsSemanticList (ran, TModule)
(Just tokenType, NoSplit (_, tokenRan)) -> addRangeHsSemanticList (tokenRan, tokenType)
(Just tokenType, Split (_, ranPrefix, tokenRan)) -> do
addRangeHsSemanticList (ranPrefix, TModule)
addRangeHsSemanticList (tokenRan, tokenType)
where
maybeTokenType = foldMap (getIdentifier lookupHsTokenType ranSplit) (M.keys bd)

-- takeHsSemanticType :: HsSemanticLookup -> SplitResult -> Identifier -> Maybe HsSemanticTokenType

getIdentifier :: HsSemanticLookup -> SplitResult -> Identifier -> Maybe HsSemanticTokenType
getIdentifier lookupHsTokenType ranSplit idt = do
case idt of
Left _moduleName -> addRangeIdSetMap ran idt
Left _moduleName -> Just TModule
Right name -> do
occStr <- lift $ T.pack <$> case (occNameString . nameOccName) name of
occStr <- T.pack <$> case (occNameString . nameOccName) name of
-- the generated selector name with {-# LANGUAGE DuplicateRecordFields #-}
'$' : 's' : 'e' : 'l' : ':' : xs -> Just $ takeWhile (/= ':') xs
-- other generated names that should not be visible
'$' : c : _ | isAlphaNum c -> Nothing
c : ':' : _ | isAlphaNum c -> Nothing
ns -> Just ns
case ranSplit of
(NoSplit (tk, r)) -> do
guard $ tk == occStr
addRangeIdSetMap r idt
(Split (tk, r1, r2)) -> do
guard $ tk == occStr
addRangeIdSetMap r1 (Left $ mkModuleName "")
addRangeIdSetMap r2 idt
guard $ getSplitTokenText ranSplit == occStr
lookupHsTokenType idt


focusTokenAt ::
-- | leaf node we want to focus on
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import qualified Data.Array as A
import Data.Default (Default (def))
import Data.Generics (Typeable)
import qualified Data.Map.Strict as M
import Development.IDE (Pretty (pretty), RuleResult)
import qualified Development.IDE.Core.Shake as Shake
import Development.IDE.GHC.Compat hiding (loc)
Expand All @@ -19,8 +18,8 @@
import GHC.Generics (Generic)
import Language.LSP.Protocol.Types
-- import template haskell
import Data.Map.Strict (Map)

Check warning on line 21 in plugins/hls-semantic-tokens-plugin/src/Ide/Plugin/SemanticTokens/Types.hs

View workflow job for this annotation

GitHub Actions / flags (9.8, ubuntu-latest)

The import of ‘Data.Map.Strict’ is redundant

Check warning on line 21 in plugins/hls-semantic-tokens-plugin/src/Ide/Plugin/SemanticTokens/Types.hs

View workflow job for this annotation

GitHub Actions / flags (9.2, ubuntu-latest)

The import of ‘Data.Map.Strict’ is redundant

Check warning on line 21 in plugins/hls-semantic-tokens-plugin/src/Ide/Plugin/SemanticTokens/Types.hs

View workflow job for this annotation

GitHub Actions / flags (9.4, ubuntu-latest)

The import of ‘Data.Map.Strict’ is redundant

Check warning on line 21 in plugins/hls-semantic-tokens-plugin/src/Ide/Plugin/SemanticTokens/Types.hs

View workflow job for this annotation

GitHub Actions / flags (9.6, ubuntu-latest)

The import of ‘Data.Map.Strict’ is redundant
import Data.Set (Set)

Check warning on line 22 in plugins/hls-semantic-tokens-plugin/src/Ide/Plugin/SemanticTokens/Types.hs

View workflow job for this annotation

GitHub Actions / flags (9.8, ubuntu-latest)

The import of ‘Data.Set’ is redundant

Check warning on line 22 in plugins/hls-semantic-tokens-plugin/src/Ide/Plugin/SemanticTokens/Types.hs

View workflow job for this annotation

GitHub Actions / flags (9.2, ubuntu-latest)

The import of ‘Data.Set’ is redundant

Check warning on line 22 in plugins/hls-semantic-tokens-plugin/src/Ide/Plugin/SemanticTokens/Types.hs

View workflow job for this annotation

GitHub Actions / flags (9.4, ubuntu-latest)

The import of ‘Data.Set’ is redundant

Check warning on line 22 in plugins/hls-semantic-tokens-plugin/src/Ide/Plugin/SemanticTokens/Types.hs

View workflow job for this annotation

GitHub Actions / flags (9.6, ubuntu-latest)

The import of ‘Data.Set’ is redundant
import Language.Haskell.TH.Syntax (Lift)


Expand Down Expand Up @@ -108,25 +107,28 @@
instance Show Loc where
show (Loc line startChar len) = show line <> ":" <> show startChar <> "-" <> show (startChar + len)

type RangeIdSetMap = Map Range (Set Identifier)

type IdSemanticMap = Map Identifier HsSemanticTokenType

data GetSemanticTokens = GetSemanticTokens
deriving (Eq, Show, Typeable, Generic)

instance Hashable GetSemanticTokens

instance NFData GetSemanticTokens

newtype RangeHsSemanticTokenTypes = RangeHsSemanticTokenTypes {rangeSemanticMap :: M.Map Range HsSemanticTokenType}
type RangeSemanticTokenTypeList = [(Range, HsSemanticTokenType)]

newtype RangeHsSemanticTokenTypes = RangeHsSemanticTokenTypes {rangeSemanticList :: RangeSemanticTokenTypeList}

instance NFData RangeHsSemanticTokenTypes where
rnf :: RangeHsSemanticTokenTypes -> ()
rnf (RangeHsSemanticTokenTypes a) = rwhnf a

instance Show RangeHsSemanticTokenTypes where
show = const "RangeHsSemanticTokenTypes"
show (RangeHsSemanticTokenTypes xs) = unlines $ map showRangeToken xs

showRangeToken :: (Range, HsSemanticTokenType) -> String
showRangeToken (ran, tk) = showRange ran <> " " <> show tk
showRange :: Range -> String
showRange (Range (Position l1 c1) (Position l2 c2)) = show l1 <> ":" <> show c1 <> "-" <> show l2 <> ":" <> show c2

type instance RuleResult GetSemanticTokens = RangeHsSemanticTokenTypes

Expand Down
Loading