haskell · mergify · Feb 19, 2022 · Jan 25, 2022 · Jan 29, 2022 · Feb 16, 2022
@@ -50,7 +50,6 @@ library
         dlist,
         exceptions,
         extra >= 1.7.4,
-        fuzzy,
         filepath,
         fingertree,
         focus,
@@ -205,6 +204,7 @@ library
         Development.IDE.Plugin.HLS.GhcIde
         Development.IDE.Plugin.Test
         Development.IDE.Plugin.TypeLenses
+        Text.Fuzzy.Parallel
 
     other-modules:
         Development.IDE.Core.FileExists
@@ -216,7 +216,6 @@ library
         Development.IDE.Plugin.Completions.Logic
         Development.IDE.Session.VersionCheck
         Development.IDE.Types.Action
-        Text.Fuzzy.Parallel
 
     ghc-options:
                 -Wall
@@ -371,6 +370,7 @@ test-suite ghcide-tests
         directory,
         extra,
         filepath,
+        fuzzy,
         --------------------------------------------------------------
         -- The MIN_VERSION_ghc macro relies on MIN_VERSION pragmas
         -- which require depending on ghc. So the tests need to depend
@@ -385,11 +385,13 @@ test-suite ghcide-tests
         lsp,
         lsp-types,
         hls-plugin-api,
-        network-uri,
         lens,
         list-t,
         lsp-test ^>= 0.14,
+        monoid-subclasses,
+        network-uri,
         optparse-applicative,
+        parallel,
         process,
         QuickCheck,
         quickcheck-instances,
@@ -410,6 +412,7 @@ test-suite ghcide-tests
         tasty-rerun,
         text,
         unordered-containers,
+        vector,
     if (impl(ghc >= 8.6) && impl(ghc < 9.2))
       build-depends:
           record-dot-preprocessor,
@@ -423,6 +426,7 @@ test-suite ghcide-tests
         Development.IDE.Test.Runfiles
         Experiments
         Experiments.Types
+        FuzzySearch
         Progress
         HieDbRetry
     default-extensions:

@@ -63,7 +63,7 @@ import           Ide.Types                                (CommandId (..),
 import           Language.LSP.Types
 import           Language.LSP.Types.Capabilities
 import qualified Language.LSP.VFS                         as VFS
-import           Text.Fuzzy.Parallel                      (Scored (score_),
+import           Text.Fuzzy.Parallel                      (Scored (score),
                                                            original)
 
 -- Chunk size used for parallelizing fuzzy matching
@@ -590,7 +590,7 @@ getCompletions plId ideOpts CC {allModNamesAsNS, anyQualCompls, unqualCompls, qu
           $ (if T.null enteredQual then id else mapMaybe (T.stripPrefix enteredQual))
             allModNamesAsNS
 
-      filtCompls = Fuzzy.filter chunkSize maxC prefixText ctxCompls "" "" (label . snd)
+      filtCompls = Fuzzy.filter chunkSize maxC prefixText ctxCompls (label . snd)
         where
 
           mcc = case maybe_parsed of
@@ -668,7 +668,7 @@ getCompletions plId ideOpts CC {allModNamesAsNS, anyQualCompls, unqualCompls, qu
         return $
           (fmap.fmap) snd $
           sortBy (compare `on` lexicographicOrdering) $
-          mergeListsBy (flip compare `on` score_)
+          mergeListsBy (flip compare `on` score)
             [ (fmap.fmap) (notQual,) filtModNameCompls
             , (fmap.fmap) (notQual,) filtKeywordCompls
             , (fmap.fmap.fmap) (toggleSnippets caps config) compls
@@ -681,11 +681,11 @@ getCompletions plId ideOpts CC {allModNamesAsNS, anyQualCompls, unqualCompls, qu
         --  3. In-scope completions rank next
         --  4. label alphabetical ordering next
         --  4. detail alphabetical ordering (proxy for module)
-        lexicographicOrdering Fuzzy.Scored{score_, original} =
+        lexicographicOrdering Fuzzy.Scored{score, original} =
           case original of
             (isQual, CompletionItem{_label,_detail}) -> do
               let isLocal = maybe False (":" `T.isPrefixOf`) _detail
-              (Down isQual, Down score_, Down isLocal, _label, _detail)
+              (Down isQual, Down score, Down isLocal, _label, _detail)
 
 
 

@@ -1,96 +1,91 @@
 -- | Parallel versions of 'filter' and 'simpleFilter'
+
 module Text.Fuzzy.Parallel
 (   filter,
     simpleFilter,
-    Scored(..),
-    -- reexports
-    Fuzzy,
+    match,
+    Scored(..)
 ) where
 
-import           Control.Monad.ST            (runST)
-import           Control.Parallel.Strategies (Eval, Strategy, evalTraversable,
-                                              parTraversable, rseq, using)
-import           Data.Monoid.Textual         (TextualMonoid)
-import           Data.Vector                 (Vector, (!))
-import qualified Data.Vector                 as V
--- need to use a stable sort
-import           Data.Bifunctor              (second)
-import           Data.Char                   (toLower)
-import           Data.Maybe                  (fromMaybe)
-import qualified Data.Monoid.Textual         as T
+import           Control.Parallel.Strategies (rseq, using, parList, evalList)
+import           Data.Bits                   ((.|.))
+import           Data.Maybe                  (fromMaybe, mapMaybe)
+import qualified Data.Text                   as T
+import qualified Data.Text.Internal          as T
+import qualified Data.Text.Array             as TA
 import           Prelude                     hiding (filter)
-import           Text.Fuzzy                  (Fuzzy (..))
 
-data Scored a = Scored {score_ :: !Int, original:: !a}
-  deriving (Functor,Show)
+data Scored a = Scored {score :: !Int, original:: !a}
+  deriving (Functor, Show)
 
 -- | Returns the rendered output and the
 -- matching score for a pattern and a text.
 -- Two examples are given below:
 --
--- >>> match "fnt" "infinite" "" "" id True
--- Just ("infinite",3)
+-- >>> match "fnt" "infinite"
+-- Just 3
 --
--- >>> match "hsk" ("Haskell",1995) "<" ">" fst False
--- Just ("<h>a<s><k>ell",5)
+-- >>> match "hsk" "Haskell"
+-- Just 5
 --
 {-# INLINABLE match #-}
 
-match :: (T.TextualMonoid s)
-      => s        -- ^ Pattern in lowercase except for first character
-      -> t        -- ^ The value containing the text to search in.
-      -> s        -- ^ The text to add before each match.
-      -> s        -- ^ The text to add after each match.
-      -> (t -> s) -- ^ The function to extract the text from the container.
-      -> Maybe (Fuzzy t s) -- ^ The original value, rendered string and score.
-match pattern t pre post extract =
-    if null pat then Just (Fuzzy t result totalScore) else Nothing
+match :: T.Text    -- ^ Pattern in lowercase except for first character
+      -> T.Text    -- ^ The text to search in.
+      -> Maybe Int -- ^ The score
+match (T.Text pArr pOff pLen) (T.Text sArr sOff sLen) = go 0 1 pOff sOff
   where
-    null :: (T.TextualMonoid s) => s -> Bool
-    null = not . T.any (const True)
-
-    s = extract t
-    (totalScore, _currScore, result, pat, _) =
-      T.foldl'
-        undefined
-        (\(tot, cur, res, pat, isFirst) c ->
-            case T.splitCharacterPrefix pat of
-              Nothing -> (tot, 0, res <> T.singleton c, pat, isFirst)
-              Just (x, xs) ->
-                -- the case of the first character has to match
-                -- otherwise use lower case since the pattern is assumed lower
-                let !c' = if isFirst then c else toLower c in
-                if x == c' then
-                  let cur' = cur * 2 + 1 in
-                  (tot + cur', cur', res <> pre <> T.singleton c <> post, xs, False)
-                else (tot, 0, res <> T.singleton c, pat, isFirst)
-        ) ( 0
-          , 1 -- matching at the start gives a bonus (cur = 1)
-          , mempty, pattern, True) s
+    pTotal = pOff + pLen
+    sDelta = sOff + sLen - pTotal
+
+    go !totalScore !currScore !currPOff !currSOff
+      -- If pattern has been matched in full
+      | currPOff >= pTotal
+      = Just totalScore
+      -- If there is not enough left to match the rest of the pattern, equivalent to
+      -- (sOff + sLen - currSOff) < (pOff + pLen - currPOff)
+      | currSOff > currPOff + sDelta
+      = Nothing
+      -- This is slightly broken for non-ASCII:
+      -- 1. If code units, consisting a single pattern code point, are found as parts
+      --    of different code points, it counts as a match. Unless you use a ton of emojis
+      --    as identifiers, such false positives should not be be a big deal,
+      --    and anyways HLS does not currently support such use cases, because it uses
+      --    code point and UTF-16 code unit positions interchangeably.
+      -- 2. Case conversions is not applied to non-ASCII code points, because one has
+      --    to call T.toLower (not T.map toLower), reallocating the string in full, which
+      --    is too much of performance penalty for fuzzy search. Again, anyway HLS does not
+      --    attempt to do justice to Unicode: proper Unicode text matching requires
+      --    `unicode-transforms` and friends.
+      -- Altogether we sacrifice correctness for the sake of performance, which
+      -- is a right trade-off for fuzzy search.
+      | pByte <- TA.unsafeIndex pArr currPOff
+      , sByte <- TA.unsafeIndex sArr currSOff
+      -- First byte (currPOff == pOff) should match exactly, otherwise - up to case.
+      , pByte == sByte || (currPOff /= pOff && pByte == toLowerAscii sByte)
+      = let curr = currScore * 2 + 1 in
+        go (totalScore + curr) curr (currPOff + 1) (currSOff + 1)
+      | otherwise
+      = go totalScore 0 currPOff (currSOff + 1)
+
+    toLowerAscii w = if (w - 65) < 26 then w .|. 0x20 else w
 
 -- | The function to filter a list of values by fuzzy search on the text extracted from them.
-filter :: (TextualMonoid s)
-       => Int      -- ^ Chunk size. 1000 works well.
-       -> Int      -- ^ Max. number of results wanted
-       -> s        -- ^ Pattern.
-       -> [t]      -- ^ The list of values containing the text to search in.
-       -> s        -- ^ The text to add before each match.
-       -> s        -- ^ The text to add after each match.
-       -> (t -> s) -- ^ The function to extract the text from the container.
-       -> [Scored t] -- ^ The list of results, sorted, highest score first.
-filter chunkSize maxRes pattern ts pre post extract = runST $ do
-  let v = V.mapMaybe id
-             (V.map (\t -> match pattern' t pre post extract) (V.fromList ts)
-             `using`
-             parVectorChunk chunkSize (evalTraversable forceScore))
-      perfectScore = score $ fromMaybe (error $ T.toString undefined pattern) $
-        match pattern' pattern' "" "" id
-  return $ partialSortByAscScore maxRes perfectScore v
+filter :: Int           -- ^ Chunk size. 1000 works well.
+       -> Int           -- ^ Max. number of results wanted
+       -> T.Text        -- ^ Pattern.
+       -> [t]           -- ^ The list of values containing the text to search in.
+       -> (t -> T.Text) -- ^ The function to extract the text from the container.
+       -> [Scored t]    -- ^ The list of results, sorted, highest score first.
+filter chunkSize maxRes pattern ts extract = partialSortByAscScore maxRes perfectScore (concat vss)
   where
       -- Preserve case for the first character, make all others lowercase
-      pattern' = case T.splitCharacterPrefix pattern of
-          Just (c, rest) -> T.singleton c <> T.map toLower rest
-          _              -> pattern
+      pattern' = case T.uncons pattern of
+        Just (c, rest) -> T.cons c (T.toLower rest)
+        _              -> pattern
+      vss = map (mapMaybe (\t -> flip Scored t <$> match pattern' (extract t))) (chunkList chunkSize ts)
+        `using` parList (evalList rseq)
+      perfectScore = fromMaybe (error $ T.unpack pattern) $ match pattern' pattern'
 
 -- | Return all elements of the list that have a fuzzy
 -- match against the pattern. Runs with default settings where
@@ -99,84 +94,44 @@ filter chunkSize maxRes pattern ts pre post extract = runST $ do
 -- >>> simpleFilter "vm" ["vim", "emacs", "virtual machine"]
 -- ["vim","virtual machine"]
 {-# INLINABLE simpleFilter #-}
-simpleFilter :: (TextualMonoid s)
-             => Int -- ^ Chunk size. 1000 works well.
-             -> Int -- ^ Max. number of results wanted
-             -> s   -- ^ Pattern to look for.
-             -> [s] -- ^ List of texts to check.
-             -> [Scored s] -- ^ The ones that match.
+simpleFilter :: Int      -- ^ Chunk size. 1000 works well.
+             -> Int      -- ^ Max. number of results wanted
+             -> T.Text   -- ^ Pattern to look for.
+             -> [T.Text] -- ^ List of texts to check.
+             -> [Scored T.Text] -- ^ The ones that match.
 simpleFilter chunk maxRes pattern xs =
-  filter chunk maxRes pattern xs mempty mempty id
-
---------------------------------------------------------------------------------
-
--- | Evaluation that forces the 'score' field
-forceScore :: TextualMonoid s => Fuzzy t s -> Eval(Fuzzy t s)
-forceScore it@Fuzzy{score} = do
-  score' <- rseq score
-  return it{score = score'}
+  filter chunk maxRes pattern xs id
 
 --------------------------------------------------------------------------------
 
--- | Divides a vector in chunks, applies the strategy in parallel to each chunk.
-parVectorChunk :: Int -> Strategy a -> Vector a -> Eval (Vector a)
-parVectorChunk chunkSize st v =
-    V.concat <$> parTraversable (evalTraversable st) (chunkVector chunkSize v)
-
--- >>> chunkVector 3 (V.fromList [0..10])
--- >>> chunkVector 3 (V.fromList [0..11])
--- >>> chunkVector 3 (V.fromList [0..12])
--- [[0,1,2],[3,4,5],[6,7,8],[9,10]]
--- [[0,1,2],[3,4,5],[6,7,8],[9,10,11]]
--- [[0,1,2],[3,4,5],[6,7,8],[9,10,11],[12]]
-chunkVector :: Int -> Vector a -> [Vector a]
-chunkVector chunkSize v = do
-    let indices = chunkIndices chunkSize (0,V.length v)
-    [V.slice l (h-l+1) v | (l,h) <- indices]
-
--- >>> chunkIndices 3 (0,9)
--- >>> chunkIndices 3 (0,10)
--- >>> chunkIndices 3 (0,11)
--- [(0,2),(3,5),(6,8)]
--- [(0,2),(3,5),(6,8),(9,9)]
--- [(0,2),(3,5),(6,8),(9,10)]
-chunkIndices :: Int -> (Int,Int) -> [(Int,Int)]
-chunkIndices chunkSize (from,to) =
-  map (second pred) $
-  pairwise $
-  [from, from+chunkSize .. to-1] ++ [to]
-
-pairwise :: [a] -> [(a,a)]
-pairwise []       = []
-pairwise [_]      = []
-pairwise (x:y:xs) = (x,y) : pairwise (y:xs)
+chunkList :: Int -> [a] -> [[a]]
+chunkList chunkSize = go
+  where
+    go [] = []
+    go xs = ys : go zs
+      where
+        (ys, zs) = splitAt chunkSize xs
 
 -- | A stable partial sort ascending by score. O(N) best case, O(wanted*N) worst case
-partialSortByAscScore :: TextualMonoid s
-            => Int  -- ^ Number of items needed
+partialSortByAscScore
+            :: Int  -- ^ Number of items needed
             -> Int  -- ^ Value of a perfect score
-            -> Vector (Fuzzy t s)
             -> [Scored t]
-partialSortByAscScore wantedCount perfectScore v = loop 0 (SortState minBound perfectScore 0) [] where
-  l = V.length v
-  loop index st@SortState{..} acc
+            -> [Scored t]
+partialSortByAscScore wantedCount perfectScore orig = loop orig (SortState minBound perfectScore 0) [] where
+  loop [] st@SortState{..} acc
     | foundCount == wantedCount = reverse acc
-    | index == l
--- ProgressCancelledException
-    = if bestScoreSeen < scoreWanted
-        then loop 0 st{scoreWanted = bestScoreSeen, bestScoreSeen = minBound} acc
+    | otherwise = if bestScoreSeen < scoreWanted
+        then loop orig st{scoreWanted = bestScoreSeen, bestScoreSeen = minBound} acc
         else reverse acc
-    | otherwise =
-      case v!index of
-        x | score x == scoreWanted
-          -> loop (index+1) st{foundCount = foundCount+1} (toScored x:acc)
-          | score x < scoreWanted && score x > bestScoreSeen
-          -> loop (index+1) st{bestScoreSeen = score x} acc
-          | otherwise
-          -> loop (index+1) st acc
-
-toScored :: TextualMonoid s => Fuzzy t s -> Scored t
-toScored Fuzzy{..} = Scored score original
+  loop (x : xs) st@SortState{..} acc
+    | foundCount == wantedCount = reverse acc
+    | score x == scoreWanted
+    = loop xs st{foundCount = foundCount+1} (x:acc)
+    | score x < scoreWanted && score x > bestScoreSeen
+    = loop xs st{bestScoreSeen = score x} acc
+    | otherwise
+    = loop xs st acc
 
 data SortState a = SortState
   { bestScoreSeen :: !Int