Skip to content

Commit

Permalink
Implement measure_off in pure Haskell
Browse files Browse the repository at this point in the history
  • Loading branch information
hsyl20 authored and Bodigrim committed Dec 13, 2023
1 parent 30637ae commit 0a72ea0
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 7 deletions.
8 changes: 2 additions & 6 deletions src/Data/Text.hs
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,7 @@ import Data.Monoid (Monoid(..))
import Data.Semigroup (Semigroup(..))
import Data.String (IsString(..))
import Data.Text.Internal.Reverse (reverse)
import Data.Text.Internal.Measure (measure_off)
import Data.Text.Internal.Encoding.Utf8 (utf8Length, utf8LengthByLeader, chr2, chr3, chr4, ord2, ord3, ord4)
import qualified Data.Text.Internal.Fusion as S
import Data.Text.Internal.Fusion.CaseMapping (foldMapping, lowerMapping, upperMapping)
Expand Down Expand Up @@ -1387,12 +1388,7 @@ take n t@(Text arr off len)
measureOff :: Int -> Text -> Int
measureOff !n (Text (A.ByteArray arr) off len) = if len == 0 then 0 else
cSsizeToInt $
c_measure_off arr (intToCSize off) (intToCSize len) (intToCSize n)

-- | The input buffer (arr :: ByteArray#, off :: CSize, len :: CSize)
-- must specify a valid UTF-8 sequence, this condition is not checked.
foreign import ccall unsafe "_hs_text_measure_off" c_measure_off
:: ByteArray# -> CSize -> CSize -> CSize -> CSsize
measure_off arr (intToCSize off) (intToCSize len) (intToCSize n)

-- | /O(n)/ 'takeEnd' @n@ @t@ returns the suffix remaining after
-- taking @n@ characters from the end of @t@.
Expand Down
53 changes: 53 additions & 0 deletions src/Data/Text/Internal/Measure.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE MagicHash #-}

#if defined(PURE_HASKELL)
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MultiWayIf #-}
#endif

#if !defined(PURE_HASKELL)
{-# LANGUAGE UnliftedFFITypes #-}
#endif

{-# OPTIONS_HADDOCK not-home #-}

-- | Implements 'measure_off', using efficient C routines by default.
module Data.Text.Internal.Measure
( measure_off
)
where

import GHC.Exts

#if defined(PURE_HASKELL)
import GHC.Word
import Data.Text.Internal.Encoding.Utf8 (utf8LengthByLeader)
#endif

import Foreign.C.Types (CSize(..))
import System.Posix.Types (CSsize(..))

#if defined(PURE_HASKELL)

measure_off :: ByteArray# -> CSize -> CSize -> CSize -> CSsize
measure_off ba off len cnt = go 0 0
where
go !cc !i
-- return the number of bytes for the first cnt codepoints,
| cc == cnt = fromIntegral i
-- return negated number of codepoints if there are fewer than cnt
| i >= len = negate (fromIntegral cc)
| otherwise =
let !(I# o) = fromIntegral (off+i)
!b = indexWord8Array# ba o
in go (cc+1) (i + fromIntegral (utf8LengthByLeader (W8# b)))

#else

-- | The input buffer (arr :: ByteArray#, off :: CSize, len :: CSize)
-- must specify a valid UTF-8 sequence, this condition is not checked.
foreign import ccall unsafe "_hs_text_measure_off" measure_off
:: ByteArray# -> CSize -> CSize -> CSize -> CSsize

#endif
2 changes: 1 addition & 1 deletion text.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,6 @@ library
if arch(javascript) || flag(pure-haskell)
cpp-options: -DPURE_HASKELL
c-sources: cbits/is_ascii.c
cbits/measure_off.c
cbits/utils.c
else
c-sources: cbits/is_ascii.c
Expand Down Expand Up @@ -207,6 +206,7 @@ library
other-modules:
Data.Text.Show
Data.Text.Internal.Reverse
Data.Text.Internal.Measure

build-depends:
array >= 0.3 && < 0.6,
Expand Down

0 comments on commit 0a72ea0

Please sign in to comment.