Skip to content

Commit 3c511b0

Browse files
authored
Optimize semantic token extraction logic (#4050)
A follow up of #3958 , we have added a tokenizor to walk the hieAst along with the file rope, it means we no longer need to do the detour of storing temperal result as Map Range (Set identifier), instead we can optimize by fusing most of the logic into tokenizer and return [(Range, HsSemanticTokenType)] directly.
1 parent 2f33f8f commit 3c511b0

File tree

6 files changed

+90
-100
lines changed

6 files changed

+90
-100
lines changed

haskell-language-server.cabal

+1
Original file line numberDiff line numberDiff line change
@@ -1592,6 +1592,7 @@ library hls-semantic-tokens-plugin
15921592
, syb
15931593
, array
15941594
, deepseq
1595+
, dlist
15951596
, hls-graph == 2.6.0.0
15961597
, template-haskell
15971598
, data-default

plugins/hls-semantic-tokens-plugin/src/Ide/Plugin/SemanticTokens/Internal.hs

+5-23
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ import Ide.Plugin.Error (PluginError (PluginIn
4848
import Ide.Plugin.SemanticTokens.Mappings
4949
import Ide.Plugin.SemanticTokens.Query
5050
import Ide.Plugin.SemanticTokens.SemanticConfig (mkSemanticConfigFunctions)
51-
import Ide.Plugin.SemanticTokens.Tokenize (hieAstSpanIdentifiers)
51+
import Ide.Plugin.SemanticTokens.Tokenize (computeRangeHsSemanticTokenTypeList)
5252
import Ide.Plugin.SemanticTokens.Types
5353
import Ide.Types
5454
import qualified Language.LSP.Protocol.Lens as L
@@ -69,8 +69,8 @@ computeSemanticTokens :: Recorder (WithPriority SemanticLog) -> PluginId -> IdeS
6969
computeSemanticTokens recorder pid _ nfp = do
7070
config <- lift $ useSemanticConfigAction pid
7171
logWith recorder Debug (LogConfig config)
72-
(RangeHsSemanticTokenTypes {rangeSemanticMap}, mapping) <- useWithStaleE GetSemanticTokens nfp
73-
withExceptT PluginInternalError $ liftEither $ rangeSemanticMapSemanticTokens config mapping rangeSemanticMap
72+
(RangeHsSemanticTokenTypes {rangeSemanticList}, mapping) <- useWithStaleE GetSemanticTokens nfp
73+
withExceptT PluginInternalError $ liftEither $ rangeSemanticsSemanticTokens config mapping rangeSemanticList
7474

7575
semanticTokensFull :: Recorder (WithPriority SemanticLog) -> PluginMethodHandler IdeState 'Method_TextDocumentSemanticTokensFull
7676
semanticTokensFull recorder state pid param = do
@@ -96,26 +96,8 @@ getSemanticTokensRule recorder =
9696
(DKMap {getTyThingMap}, _) <- lift $ useWithStale_ GetDocMap nfp
9797
ast <- handleMaybe (LogNoAST $ show nfp) $ getAsts hieAst M.!? (HiePath . mkFastString . fromNormalizedFilePath) nfp
9898
virtualFile <- handleMaybeM LogNoVF $ getVirtualFile nfp
99-
-- get current location from the old ones
100-
let spanIdMap = M.filter (not . null) $ hieAstSpanIdentifiers virtualFile ast
101-
let names = S.unions $ M.elems spanIdMap
102-
let localSemanticMap = mkLocalIdSemanticFromAst names (hieKindFunMasksKind hieKind) refMap
103-
-- get imported name semantic map
104-
let importedIdSemanticMap = M.mapMaybe id
105-
$ M.fromSet (getTypeThing getTyThingMap) (names `S.difference` M.keysSet localSemanticMap)
106-
let sMap = M.unionWith (<>) importedIdSemanticMap localSemanticMap
107-
let rangeTokenType = extractSemanticTokensFromNames sMap spanIdMap
108-
return $ RangeHsSemanticTokenTypes rangeTokenType
109-
where
110-
getTypeThing ::
111-
NameEnv TyThing ->
112-
Identifier ->
113-
Maybe HsSemanticTokenType
114-
getTypeThing tyThingMap n
115-
| (Right name) <- n =
116-
let tyThing = lookupNameEnv tyThingMap name
117-
in (tyThing >>= tyThingSemantic)
118-
| otherwise = Nothing
99+
let hsFinder = idSemantic getTyThingMap (hieKindFunMasksKind hieKind) refMap
100+
return $ computeRangeHsSemanticTokenTypeList hsFinder virtualFile ast
119101

120102
-- | Persistent rule to ensure that semantic tokens doesn't block on startup
121103
persistentGetSemanticTokensRule :: Rules ()

plugins/hls-semantic-tokens-plugin/src/Ide/Plugin/SemanticTokens/Mappings.hs

-4
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,6 @@ import Language.LSP.VFS hiding (line)
3232

3333
-- * 0. Mapping name to Hs semantic token type.
3434

35-
idInfixOperator :: Identifier -> Maybe HsSemanticTokenType
36-
idInfixOperator (Right name) = nameInfixOperator name
37-
idInfixOperator _ = Nothing
38-
3935
nameInfixOperator :: Name -> Maybe HsSemanticTokenType
4036
nameInfixOperator name | isSymOcc (nameOccName name) = Just TOperator
4137
nameInfixOperator _ = Nothing

plugins/hls-semantic-tokens-plugin/src/Ide/Plugin/SemanticTokens/Query.hs

+23-20
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@
55
-- The query module is used to query the semantic tokens from the AST
66
module Ide.Plugin.SemanticTokens.Query where
77

8+
import Control.Applicative ((<|>))
89
import Data.Foldable (fold)
910
import qualified Data.Map.Strict as M
1011
import Data.Maybe (listToMaybe, mapMaybe)
11-
import Data.Set (Set)
1212
import qualified Data.Set as Set
1313
import Data.Text (Text)
1414
import Development.IDE.Core.PositionMapping (PositionMapping,
@@ -17,8 +17,7 @@ import Development.IDE.GHC.Compat
1717
import Ide.Plugin.SemanticTokens.Mappings
1818
import Ide.Plugin.SemanticTokens.Types (HieFunMaskKind,
1919
HsSemanticTokenType (TModule),
20-
IdSemanticMap,
21-
RangeIdSetMap,
20+
RangeSemanticTokenTypeList,
2221
SemanticTokensConfig)
2322
import Language.LSP.Protocol.Types (Position (Position),
2423
Range (Range),
@@ -30,44 +29,48 @@ import Prelude hiding (length, span)
3029

3130
---------------------------------------------------------
3231

33-
-- * extract semantic map from HieAst for local variables
32+
-- * extract semantic
3433

3534
---------------------------------------------------------
3635

37-
mkLocalIdSemanticFromAst :: Set Identifier -> HieFunMaskKind a -> RefMap a -> IdSemanticMap
38-
mkLocalIdSemanticFromAst names hieKind rm = M.mapMaybe (idIdSemanticFromHie hieKind rm) $ M.fromSet id names
36+
idSemantic :: forall a. NameEnv TyThing -> HieFunMaskKind a -> RefMap a -> Identifier -> Maybe HsSemanticTokenType
37+
idSemantic _ _ _ (Left _) = Just TModule
38+
idSemantic tyThingMap hieKind rm (Right n) =
39+
nameSemanticFromHie hieKind rm n -- local name
40+
<|> (lookupNameEnv tyThingMap n >>= tyThingSemantic) -- global name
3941

40-
idIdSemanticFromHie :: forall a. HieFunMaskKind a -> RefMap a -> Identifier -> Maybe HsSemanticTokenType
41-
idIdSemanticFromHie _ _ (Left _) = Just TModule
42-
idIdSemanticFromHie hieKind rm ns = do
43-
idSemanticFromRefMap rm ns
42+
43+
---------------------------------------------------------
44+
45+
-- * extract semantic from HieAst for local variables
46+
47+
---------------------------------------------------------
48+
49+
nameSemanticFromHie :: forall a. HieFunMaskKind a -> RefMap a -> Name -> Maybe HsSemanticTokenType
50+
nameSemanticFromHie hieKind rm n = do
51+
idSemanticFromRefMap rm (Right n)
4452
where
4553
idSemanticFromRefMap :: RefMap a -> Identifier -> Maybe HsSemanticTokenType
4654
idSemanticFromRefMap rm' name' = do
4755
spanInfos <- M.lookup name' rm'
4856
let typeTokenType = foldMap (typeSemantic hieKind) $ listToMaybe $ mapMaybe (identType . snd) spanInfos
4957
contextInfoTokenType <- foldMap (contextInfosMaybeTokenType . identInfo . snd) spanInfos
50-
fold [typeTokenType, Just contextInfoTokenType, idInfixOperator ns]
58+
fold [typeTokenType, Just contextInfoTokenType, nameInfixOperator n]
5159

5260
contextInfosMaybeTokenType :: Set.Set ContextInfo -> Maybe HsSemanticTokenType
5361
contextInfosMaybeTokenType details = foldMap infoTokenType (Set.toList details)
5462

5563

5664
-------------------------------------------------
5765

58-
-- * extract semantic tokens from IdSemanticMap
66+
-- * extract lsp semantic tokens from RangeSemanticTokenTypeList
5967

6068
-------------------------------------------------
6169

62-
extractSemanticTokensFromNames :: IdSemanticMap -> RangeIdSetMap -> M.Map Range HsSemanticTokenType
63-
extractSemanticTokensFromNames nsm = M.mapMaybe (foldMap (`M.lookup` nsm))
64-
65-
rangeSemanticMapSemanticTokens :: SemanticTokensConfig -> PositionMapping -> M.Map Range HsSemanticTokenType -> Either Text SemanticTokens
66-
rangeSemanticMapSemanticTokens stc mapping =
70+
rangeSemanticsSemanticTokens :: SemanticTokensConfig -> PositionMapping -> RangeSemanticTokenTypeList -> Either Text SemanticTokens
71+
rangeSemanticsSemanticTokens stc mapping =
6772
makeSemanticTokens defaultSemanticTokensLegend
68-
. mapMaybe (\(range, ty) -> flip toAbsSemanticToken ty <$> range)
69-
. M.toAscList
70-
. M.mapKeys (toCurrentRange mapping)
73+
. mapMaybe (\(ran, tk) -> toAbsSemanticToken <$> toCurrentRange mapping ran <*> return tk)
7174
where
7275
toAbsSemanticToken :: Range -> HsSemanticTokenType -> SemanticTokenAbsolute
7376
toAbsSemanticToken (Range (Position startLine startColumn) (Position _endLine endColumn)) tokenType =

plugins/hls-semantic-tokens-plugin/src/Ide/Plugin/SemanticTokens/Tokenize.hs

+52-46
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,19 @@
11
{-# LANGUAGE OverloadedRecordDot #-}
22
{-# LANGUAGE OverloadedStrings #-}
33

4-
module Ide.Plugin.SemanticTokens.Tokenize (hieAstSpanIdentifiers) where
4+
module Ide.Plugin.SemanticTokens.Tokenize (computeRangeHsSemanticTokenTypeList) where
55

66
import Control.Lens (Identity (runIdentity))
7-
import Control.Monad (forM_, guard)
7+
import Control.Monad (foldM, guard)
88
import Control.Monad.State.Strict (MonadState (get),
99
MonadTrans (lift),
10-
execStateT, modify, put)
11-
import Control.Monad.Trans.State.Strict (StateT)
10+
evalStateT, modify, put)
11+
import Control.Monad.Trans.State.Strict (StateT, runStateT)
1212
import Data.Char (isAlphaNum)
13+
import Data.DList (DList)
14+
import qualified Data.DList as DL
1315
import qualified Data.Map.Strict as M
1416
import qualified Data.Map.Strict as Map
15-
import qualified Data.Set as S
1617
import Data.Text (Text)
1718
import qualified Data.Text as T
1819
import qualified Data.Text.Rope as Char
@@ -22,95 +23,100 @@ import Data.Text.Utf16.Rope.Mixed (Rope)
2223
import qualified Data.Text.Utf16.Rope.Mixed as Rope
2324
import Development.IDE.GHC.Compat
2425
import Development.IDE.GHC.Error (realSrcSpanToCodePointRange)
25-
import Ide.Plugin.SemanticTokens.Types (RangeIdSetMap)
26+
import Ide.Plugin.SemanticTokens.Types (HsSemanticTokenType (TModule),
27+
RangeHsSemanticTokenTypes (..))
2628
import Language.LSP.Protocol.Types (Position (Position),
2729
Range (Range), UInt, mkRange)
2830
import Language.LSP.VFS hiding (line)
2931
import Prelude hiding (length, span)
3032

3133
type Tokenizer m a = StateT PTokenState m a
34+
type HsSemanticLookup = Identifier -> Maybe HsSemanticTokenType
3235

3336

3437
data PTokenState = PTokenState
35-
{ rangeIdSetMap :: !RangeIdSetMap,
36-
rope :: !Rope, -- the remains of rope we are working on
37-
cursor :: !Char.Position, -- the cursor position of the current rope to the start of the original file in code point position
38-
columnsInUtf16 :: !UInt -- the column of the start of the current rope in utf16
38+
{
39+
rope :: !Rope -- the remains of rope we are working on
40+
, cursor :: !Char.Position -- the cursor position of the current rope to the start of the original file in code point position
41+
, columnsInUtf16 :: !UInt -- the column of the start of the current rope in utf16
3942
}
4043

41-
runTokenizer :: (Monad m) => Tokenizer m a -> PTokenState -> m RangeIdSetMap
42-
runTokenizer p st = rangeIdSetMap <$> execStateT p st
43-
4444
data SplitResult
4545
= NoSplit (Text, Range) -- does not need to split, token text, token range
4646
| Split (Text, Range, Range) -- token text, prefix range(module range), token range
4747
deriving (Show)
4848

49+
getSplitTokenText :: SplitResult -> Text
50+
getSplitTokenText (NoSplit (t, _)) = t
51+
getSplitTokenText (Split (t, _, _)) = t
52+
4953

5054
mkPTokenState :: VirtualFile -> PTokenState
5155
mkPTokenState vf =
5256
PTokenState
53-
{ rangeIdSetMap = mempty,
57+
{
5458
rope = Rope.fromText $ toText vf._file_text,
5559
cursor = Char.Position 0 0,
5660
columnsInUtf16 = 0
5761
}
5862

59-
addRangeIdSetMap :: (Monad m) => Range -> Identifier -> Tokenizer m ()
60-
addRangeIdSetMap r i = modify $ \s -> s {rangeIdSetMap = Map.insertWith (<>) r (S.singleton i) $ rangeIdSetMap s}
61-
62-
-- lift a Tokenizer Maybe () to Tokenizer m (),
63-
-- if the Maybe is Nothing, do nothing, recover the state
64-
-- if the Maybe is Just (), do the action, and keep the state
65-
liftMaybeM :: (Monad m) => Tokenizer Maybe () -> Tokenizer m ()
63+
-- lift a Tokenizer Maybe a to Tokenizer m a,
64+
-- if the Maybe is Nothing, do nothing, recover the state, and return the mempty value
65+
-- if the Maybe is Just x, do the action, and keep the state, and return x
66+
liftMaybeM :: (Monad m, Monoid a) => Tokenizer Maybe a -> Tokenizer m a
6667
liftMaybeM p = do
6768
st <- get
68-
forM_ (execStateT p st) put
69+
maybe (return mempty) (\(ans, st') -> put st' >> return ans) $ runStateT p st
6970

70-
hieAstSpanIdentifiers :: VirtualFile -> HieAST a -> RangeIdSetMap
71-
hieAstSpanIdentifiers vf ast = runIdentity $ runTokenizer (foldAst ast) (mkPTokenState vf)
71+
foldMapM :: (Monad m, Monoid b, Foldable t) => (a -> m b) -> t a -> m b
72+
foldMapM f ta = foldM (\b a -> mappend b <$> f a) mempty ta
7273

74+
computeRangeHsSemanticTokenTypeList :: HsSemanticLookup -> VirtualFile -> HieAST a -> RangeHsSemanticTokenTypes
75+
computeRangeHsSemanticTokenTypeList lookupHsTokenType vf ast =
76+
RangeHsSemanticTokenTypes $ DL.toList $ runIdentity $ evalStateT (foldAst lookupHsTokenType ast) (mkPTokenState vf)
7377
-- | foldAst
7478
-- visit every leaf node in the ast in depth first order
75-
foldAst :: (Monad m) => HieAST t -> Tokenizer m ()
76-
foldAst ast = if null (nodeChildren ast)
77-
then liftMaybeM (visitLeafIds ast)
78-
else mapM_ foldAst $ nodeChildren ast
79+
foldAst :: (Monad m) => HsSemanticLookup -> HieAST t -> Tokenizer m (DList (Range, HsSemanticTokenType))
80+
foldAst lookupHsTokenType ast = if null (nodeChildren ast)
81+
then liftMaybeM (visitLeafIds lookupHsTokenType ast)
82+
else foldMapM (foldAst lookupHsTokenType) $ nodeChildren ast
7983

80-
visitLeafIds :: HieAST t -> Tokenizer Maybe ()
81-
visitLeafIds leaf = liftMaybeM $ do
84+
visitLeafIds :: HsSemanticLookup -> HieAST t -> Tokenizer Maybe (DList (Range, HsSemanticTokenType))
85+
visitLeafIds lookupHsTokenType leaf = liftMaybeM $ do
8286
let span = nodeSpan leaf
8387
(ran, token) <- focusTokenAt leaf
8488
-- if `focusTokenAt` succeed, we can safely assume we have shift the cursor correctly
8589
-- we do not need to recover the cursor state, even if the following computation failed
8690
liftMaybeM $ do
8791
-- only handle the leaf node with single column token
8892
guard $ srcSpanStartLine span == srcSpanEndLine span
89-
splitResult <- lift $ splitRangeByText token ran
90-
mapM_ (combineNodeIds ran splitResult) $ Map.filterWithKey (\k _ -> k == SourceInfo) $ getSourcedNodeInfo $ sourcedNodeInfo leaf
93+
splitResult <- lift $ splitRangeByText token ran
94+
foldMapM (combineNodeIds lookupHsTokenType ran splitResult) $ Map.filterWithKey (\k _ -> k == SourceInfo) $ getSourcedNodeInfo $ sourcedNodeInfo leaf
9195
where
92-
combineNodeIds :: (Monad m) => Range -> SplitResult -> NodeInfo a -> Tokenizer m ()
93-
combineNodeIds ran ranSplit (NodeInfo _ _ bd) = mapM_ (getIdentifier ran ranSplit) (M.keys bd)
94-
getIdentifier :: (Monad m) => Range -> SplitResult -> Identifier -> Tokenizer m ()
95-
getIdentifier ran ranSplit idt = liftMaybeM $ do
96+
combineNodeIds :: (Monad m) => HsSemanticLookup -> Range -> SplitResult -> NodeInfo a -> Tokenizer m (DList (Range, HsSemanticTokenType))
97+
combineNodeIds lookupHsTokenType ran ranSplit (NodeInfo _ _ bd) =
98+
case (maybeTokenType, ranSplit) of
99+
(Nothing, _) -> return mempty
100+
(Just TModule, _) -> return $ DL.singleton (ran, TModule)
101+
(Just tokenType, NoSplit (_, tokenRan)) -> return $ DL.singleton (tokenRan, tokenType)
102+
(Just tokenType, Split (_, ranPrefix, tokenRan)) -> return $ DL.fromList [(ranPrefix, TModule),(tokenRan, tokenType)]
103+
where maybeTokenType = foldMap (getIdentifier lookupHsTokenType ranSplit) (M.keys bd)
104+
105+
getIdentifier :: HsSemanticLookup -> SplitResult -> Identifier -> Maybe HsSemanticTokenType
106+
getIdentifier lookupHsTokenType ranSplit idt = do
96107
case idt of
97-
Left _moduleName -> addRangeIdSetMap ran idt
108+
Left _moduleName -> Just TModule
98109
Right name -> do
99-
occStr <- lift $ T.pack <$> case (occNameString . nameOccName) name of
110+
occStr <- T.pack <$> case (occNameString . nameOccName) name of
100111
-- the generated selector name with {-# LANGUAGE DuplicateRecordFields #-}
101112
'$' : 's' : 'e' : 'l' : ':' : xs -> Just $ takeWhile (/= ':') xs
102113
-- other generated names that should not be visible
103114
'$' : c : _ | isAlphaNum c -> Nothing
104115
c : ':' : _ | isAlphaNum c -> Nothing
105116
ns -> Just ns
106-
case ranSplit of
107-
(NoSplit (tk, r)) -> do
108-
guard $ tk == occStr
109-
addRangeIdSetMap r idt
110-
(Split (tk, r1, r2)) -> do
111-
guard $ tk == occStr
112-
addRangeIdSetMap r1 (Left $ mkModuleName "")
113-
addRangeIdSetMap r2 idt
117+
guard $ getSplitTokenText ranSplit == occStr
118+
lookupHsTokenType idt
119+
114120

115121
focusTokenAt ::
116122
-- | leaf node we want to focus on

plugins/hls-semantic-tokens-plugin/src/Ide/Plugin/SemanticTokens/Types.hs

+9-7
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import Control.DeepSeq (NFData (rnf), rwhnf)
1111
import qualified Data.Array as A
1212
import Data.Default (Default (def))
1313
import Data.Generics (Typeable)
14-
import qualified Data.Map.Strict as M
1514
import Development.IDE (Pretty (pretty), RuleResult)
1615
import qualified Development.IDE.Core.Shake as Shake
1716
import Development.IDE.GHC.Compat hiding (loc)
@@ -108,25 +107,28 @@ data Loc = Loc
108107
instance Show Loc where
109108
show (Loc line startChar len) = show line <> ":" <> show startChar <> "-" <> show (startChar + len)
110109

111-
type RangeIdSetMap = Map Range (Set Identifier)
112-
113-
type IdSemanticMap = Map Identifier HsSemanticTokenType
114-
115110
data GetSemanticTokens = GetSemanticTokens
116111
deriving (Eq, Show, Typeable, Generic)
117112

118113
instance Hashable GetSemanticTokens
119114

120115
instance NFData GetSemanticTokens
121116

122-
newtype RangeHsSemanticTokenTypes = RangeHsSemanticTokenTypes {rangeSemanticMap :: M.Map Range HsSemanticTokenType}
117+
type RangeSemanticTokenTypeList = [(Range, HsSemanticTokenType)]
118+
119+
newtype RangeHsSemanticTokenTypes = RangeHsSemanticTokenTypes {rangeSemanticList :: RangeSemanticTokenTypeList}
123120

124121
instance NFData RangeHsSemanticTokenTypes where
125122
rnf :: RangeHsSemanticTokenTypes -> ()
126123
rnf (RangeHsSemanticTokenTypes a) = rwhnf a
127124

128125
instance Show RangeHsSemanticTokenTypes where
129-
show = const "RangeHsSemanticTokenTypes"
126+
show (RangeHsSemanticTokenTypes xs) = unlines $ map showRangeToken xs
127+
128+
showRangeToken :: (Range, HsSemanticTokenType) -> String
129+
showRangeToken (ran, tk) = showRange ran <> " " <> show tk
130+
showRange :: Range -> String
131+
showRange (Range (Position l1 c1) (Position l2 c2)) = show l1 <> ":" <> show c1 <> "-" <> show l2 <> ":" <> show c2
130132

131133
type instance RuleResult GetSemanticTokens = RangeHsSemanticTokenTypes
132134

0 commit comments

Comments
 (0)