Skip to content

Commit

Permalink
Remove code duplication
Browse files Browse the repository at this point in the history
Use `unsafeDrop` and friends instead of explicitly constructing values
with `Data.ByteString.Internal.BS`.

- All those primitives are marked with `INLINE`.
- This does not change the generated core.
  • Loading branch information
sol committed Sep 30, 2023
1 parent 6c880f3 commit 4c33cfb
Show file tree
Hide file tree
Showing 8 changed files with 74 additions and 47 deletions.
6 changes: 6 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
[0.12.1.0]:

* API additions:
* New primitives `Data.ByteString.Unsafe.unsafeTakeEnd` and
`Data.ByteString.Unsafe.unsafeDropEnd`

[0.12.0.2] — August 2023

* Bug fixes:
Expand Down
58 changes: 30 additions & 28 deletions Data/ByteString.hs
Original file line number Diff line number Diff line change
Expand Up @@ -423,19 +423,19 @@ head (BS x l)
--
-- This is a partial function, consider using 'uncons' instead.
tail :: HasCallStack => ByteString -> ByteString
tail (BS p l)
| l <= 0 = errorEmptyList "tail"
| otherwise = BS (plusForeignPtr p 1) (l-1)
tail ps
| length ps <= 0 = errorEmptyList "tail"
| otherwise = unsafeDrop 1 ps
{-# INLINE tail #-}

-- | /O(1)/ Extract the 'head' and 'tail' of a ByteString, returning 'Nothing'
-- if it is empty.
uncons :: ByteString -> Maybe (Word8, ByteString)
uncons (BS x l)
uncons ps@(BS x l)
| l <= 0 = Nothing
| otherwise = Just (accursedUnutterablePerformIO $ unsafeWithForeignPtr x
$ \p -> peek p,
BS (plusForeignPtr x 1) (l-1))
unsafeDrop 1 ps)
{-# INLINE uncons #-}

-- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
Expand All @@ -454,17 +454,17 @@ last ps@(BS x l)
--
-- This is a partial function, consider using 'unsnoc' instead.
init :: HasCallStack => ByteString -> ByteString
init ps@(BS p l)
init ps
| null ps = errorEmptyList "init"
| otherwise = BS p (l-1)
| otherwise = unsafeDropEnd 1 ps
{-# INLINE init #-}

-- | /O(1)/ Extract the 'init' and 'last' of a ByteString, returning 'Nothing'
-- if it is empty.
unsnoc :: ByteString -> Maybe (ByteString, Word8)
unsnoc (BS x l)
unsnoc ps@(BS x l)
| l <= 0 = Nothing
| otherwise = Just (BS x (l-1),
| otherwise = Just (unsafeDropEnd 1 ps,
accursedUnutterablePerformIO $
unsafeWithForeignPtr x $ \p -> peekByteOff p (l-1))
{-# INLINE unsnoc #-}
Expand Down Expand Up @@ -921,10 +921,10 @@ unfoldrN i f x0
-- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
-- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
take :: Int -> ByteString -> ByteString
take n ps@(BS x l)
take n ps@(BS _ l)
| n <= 0 = empty
| n >= l = ps
| otherwise = BS x n
| otherwise = unsafeTake n ps
{-# INLINE take #-}

-- | /O(1)/ @'takeEnd' n xs@ is equivalent to @'drop' ('length' xs - n) xs@.
Expand All @@ -939,19 +939,19 @@ take n ps@(BS x l)
--
-- @since 0.11.1.0
takeEnd :: Int -> ByteString -> ByteString
takeEnd n ps@(BS x len)
takeEnd n ps@(BS _ len)
| n >= len = ps
| n <= 0 = empty
| otherwise = BS (plusForeignPtr x (len - n)) n
| otherwise = unsafeTakeEnd n ps
{-# INLINE takeEnd #-}

-- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
-- elements, or 'empty' if @n > 'length' xs@.
drop :: Int -> ByteString -> ByteString
drop n ps@(BS x l)
drop n ps@(BS _ l)
| n <= 0 = ps
| n >= l = empty
| otherwise = BS (plusForeignPtr x n) (l-n)
| otherwise = unsafeDrop n ps
{-# INLINE drop #-}

-- | /O(1)/ @'dropEnd' n xs@ is equivalent to @'take' ('length' xs - n) xs@.
Expand All @@ -966,18 +966,18 @@ drop n ps@(BS x l)
--
-- @since 0.11.1.0
dropEnd :: Int -> ByteString -> ByteString
dropEnd n ps@(BS x len)
dropEnd n ps@(BS _ len)
| n <= 0 = ps
| n >= len = empty
| otherwise = BS x (len - n)
| otherwise = unsafeDropEnd n ps
{-# INLINE dropEnd #-}

-- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
splitAt :: Int -> ByteString -> (ByteString, ByteString)
splitAt n ps@(BS x l)
splitAt n ps@(BS _ l)
| n <= 0 = (empty, ps)
| n >= l = (ps, empty)
| otherwise = (BS x n, BS (plusForeignPtr x n) (l-n))
| otherwise = (unsafeTake n ps, unsafeDrop n ps)
{-# INLINE splitAt #-}

-- | Similar to 'Prelude.takeWhile',
Expand Down Expand Up @@ -1151,18 +1151,17 @@ splitWith _ (BS _ 0) = []
splitWith predicate (BS fp len) = splitWith0 0 len fp
where splitWith0 !off' !len' !fp' =
accursedUnutterablePerformIO $
splitLoop fp 0 off' len' fp'
splitLoop 0 off' len' fp'

splitLoop :: ForeignPtr Word8
-> Int -> Int -> Int
splitLoop :: Int -> Int -> Int
-> ForeignPtr Word8
-> IO [ByteString]
splitLoop p idx2 off' len' fp' = go idx2
splitLoop idx2 off' len' fp' = go idx2
where
go idx'
| idx' >= len' = return [BS (plusForeignPtr fp' off') idx']
| otherwise = do
w <- peekFpByteOff p (off'+idx')
w <- peekFpByteOff fp (off'+idx')
if predicate w
then return (BS (plusForeignPtr fp' off') idx' :
splitWith0 (off'+idx'+1) (len'-idx'-1) fp')
Expand All @@ -1188,19 +1187,22 @@ splitWith predicate (BS fp len) = splitWith0 0 len fp
--
split :: Word8 -> ByteString -> [ByteString]
split _ (BS _ 0) = []
split w (BS x l) = loop 0
split w ps@(BS x l) = loop 0
where
loop !n =
let q = accursedUnutterablePerformIO $ unsafeWithForeignPtr x $ \p ->
memchr (p `plusPtr` n)
w (fromIntegral (l-n))
in if q == nullPtr
then [BS (plusForeignPtr x n) (l-n)]
then [unsafeDrop n ps]
else let i = q `minusPtr` unsafeForeignPtrToPtr x
in BS (plusForeignPtr x n) (i-n) : loop (i+1)
in unsafeSlice n i ps : loop (i+1)

{-# INLINE split #-}

unsafeSlice :: Int -> Int -> ByteString -> ByteString
unsafeSlice a b (BS x _) = BS (plusForeignPtr x a) (b - a)
{-# INLINE unsafeSlice #-}

-- | The 'group' function takes a ByteString and returns a list of
-- ByteStrings such that the concatenation of the result is equal to the
Expand Down Expand Up @@ -1716,7 +1718,7 @@ inits bs = NE.toList $! initsNE bs
-- @since 0.11.4.0
initsNE :: ByteString -> NonEmpty ByteString
-- see Note [Avoid NonEmpty combinators]
initsNE (BS x len) = empty :| [BS x n | n <- [1..len]]
initsNE ps = empty :| [unsafeTake n ps | n <- [1..length ps]]

-- | /O(n)/ Returns all final segments of the given 'ByteString', longest first.
tails :: ByteString -> [ByteString]
Expand Down
12 changes: 6 additions & 6 deletions Data/ByteString/Char8.hs
Original file line number Diff line number Diff line change
Expand Up @@ -874,12 +874,12 @@ unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
-- > break isSpace == breakSpace
--
breakSpace :: ByteString -> (ByteString,ByteString)
breakSpace (BS x l) = accursedUnutterablePerformIO $ unsafeWithForeignPtr x $ \p -> do
breakSpace ps@(BS x l) = accursedUnutterablePerformIO $ unsafeWithForeignPtr x $ \p -> do
i <- firstspace p 0 l
return $! case () of {_
| i == 0 -> (empty, BS x l)
| i == l -> (BS x l, empty)
| otherwise -> (BS x i, BS (plusForeignPtr x i) (l-i))
| i == 0 -> (empty, ps)
| i == l -> (ps, empty)
| otherwise -> B.splitAt i ps
}
{-# INLINE breakSpace #-}

Expand All @@ -897,9 +897,9 @@ firstspace !ptr !n !m
--
-- @since 0.10.12.0
dropSpace :: ByteString -> ByteString
dropSpace (BS x l) = accursedUnutterablePerformIO $ unsafeWithForeignPtr x $ \p -> do
dropSpace ps@(BS x l) = accursedUnutterablePerformIO $ unsafeWithForeignPtr x $ \p -> do
i <- firstnonspace p 0 l
return $! if i == l then empty else BS (plusForeignPtr x i) (l-i)
return $! if i == l then empty else B.unsafeDrop i ps
{-# INLINE dropSpace #-}

firstnonspace :: Ptr Word8 -> Int -> Int -> IO Int
Expand Down
4 changes: 2 additions & 2 deletions Data/ByteString/Lazy.hs
Original file line number Diff line number Diff line change
Expand Up @@ -1462,8 +1462,8 @@ initsNE = (Empty :|) . inits' id
inits' :: (ByteString -> ByteString) -> ByteString -> [ByteString]
-- inits' f bs === map f (tail (inits bs))
inits' _ Empty = []
inits' f (Chunk c@(S.BS x len) cs)
= [f (S.BS x n `Chunk` Empty) | n <- [1..len]]
inits' f (Chunk c cs)
= [f (S.unsafeTake n c `Chunk` Empty) | n <- [1..S.length c]]
++ inits' (f . Chunk c) cs

-- | /O(n)/ Returns all final segments of the given 'ByteString', longest first.
Expand Down
2 changes: 1 addition & 1 deletion Data/ByteString/Lazy/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ toStrict = \cs -> goLen0 cs cs
-- closures which would result in unnecessary closure allocation.
where
-- It's still possible that the result is empty
goLen0 _ Empty = S.BS S.nullForeignPtr 0
goLen0 _ Empty = S.empty
goLen0 cs0 (Chunk c cs) = goLen1 cs0 c cs

-- It's still possible that the result is a single chunk
Expand Down
11 changes: 6 additions & 5 deletions Data/ByteString/Lazy/ReadInt.hs
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,15 @@ module Data.ByteString.Lazy.ReadInt
) where

import qualified Data.ByteString.Internal as BI
import Data.ByteString.Unsafe
#ifdef BYTESTRING_STRICT
import Data.ByteString
#else
import Data.ByteString.Lazy
import Data.ByteString.Lazy.Internal
#endif
import Data.Bits (FiniteBits, isSigned)
import Data.ByteString.Internal (pattern BS, plusForeignPtr)
import Data.ByteString.Internal (pattern BS)
import Data.Int
import Data.Word
import Foreign.ForeignPtr (ForeignPtr)
Expand Down Expand Up @@ -177,18 +178,18 @@ _readDecimal !r = consume
where
consume :: ByteString -> Word64 -> Maybe (a, ByteString)
#ifdef BYTESTRING_STRICT
consume (BS fp len) a = case _digits q r fp len a of
consume ps@(BS fp len) a = case _digits q r fp len a of
Result used acc
| used == len
-> convert acc empty
| otherwise
-> convert acc $ BS (fp `plusForeignPtr` used) (len - used)
-> convert acc $ unsafeDrop used ps
_ -> Nothing
#else
-- All done
consume Empty acc = convert acc Empty
-- Process next chunk
consume (Chunk (BS fp len) cs) acc
consume (Chunk ps@(BS fp len) cs) acc
= case _digits q r fp len acc of
Result used acc'
| used == len
Expand All @@ -197,7 +198,7 @@ _readDecimal !r = consume
| otherwise
-- ran into a non-digit
-> convert acc' $
Chunk (BS (fp `plusForeignPtr` used) (len - used)) cs
Chunk (unsafeDrop used ps) cs
_ -> Nothing
#endif
convert :: Word64 -> ByteString -> Maybe (a, ByteString)
Expand Down
11 changes: 6 additions & 5 deletions Data/ByteString/Lazy/ReadNat.hs
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,15 @@ module Data.ByteString.Lazy.ReadNat
) where

import qualified Data.ByteString.Internal as BI
import Data.ByteString.Unsafe
#ifdef BYTESTRING_STRICT
import Data.ByteString
#else
import Data.ByteString.Lazy
import Data.ByteString.Lazy.Internal
#endif
import Data.Bits (finiteBitSize)
import Data.ByteString.Internal (pattern BS, plusForeignPtr)
import Data.ByteString.Internal (pattern BS)
import Data.Word
import Foreign.ForeignPtr (ForeignPtr)
import Foreign.Ptr (Ptr, minusPtr, plusPtr)
Expand Down Expand Up @@ -127,7 +128,7 @@ _readDecimal =
consume :: [Natural] -> Int -> Word -> ByteString
-> (Natural, ByteString)
#ifdef BYTESTRING_STRICT
consume ns cnt acc (BS fp len) =
consume ns cnt acc ps@(BS fp len) =
-- Having read one digit, we're about to read the 2nd
-- So the digit count up to 'safeLog' starts at 2.
case natdigits fp len acc cnt ns of
Expand All @@ -136,18 +137,18 @@ _readDecimal =
-> convert acc' cnt' ns' $ empty
| otherwise
-> convert acc' cnt' ns' $
BS (fp `plusForeignPtr` used) (len - used)
unsafeDrop used ps
#else
-- All done
consume ns cnt acc Empty = convert acc cnt ns Empty
-- Process next chunk
consume ns cnt acc (Chunk (BS fp len) cs)
consume ns cnt acc (Chunk ps@(BS fp len) cs)
= case natdigits fp len acc cnt ns of
Result used acc' cnt' ns'
| used == len -- process more chunks
-> consume ns' cnt' acc' cs
| otherwise -- ran into a non-digit
-> let c = Chunk (BS (fp `plusForeignPtr` used) (len - used)) cs
-> let c = Chunk (unsafeDrop used ps) cs
in convert acc' cnt' ns' c
#endif
convert !acc !cnt !ns rest =
Expand Down
17 changes: 17 additions & 0 deletions Data/ByteString/Unsafe.hs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ module Data.ByteString.Unsafe (
unsafeLast,
unsafeIndex,
unsafeTake,
unsafeTakeEnd,
unsafeDrop,
unsafeDropEnd,

-- * Low level interaction with CStrings
-- ** Using ByteStrings with functions for CStrings
Expand Down Expand Up @@ -113,12 +115,27 @@ unsafeTake :: Int -> ByteString -> ByteString
unsafeTake n (BS x l) = assert (0 <= n && n <= l) $ BS x n
{-# INLINE unsafeTake #-}

-- | A variety of 'takeEnd' which omits the checks on @n@ so there is an
-- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@.
--
-- @since 0.12.1.0
unsafeTakeEnd :: Int -> ByteString -> ByteString
unsafeTakeEnd n (BS x l) = assert (0 <= n && n <= l) $ BS (plusForeignPtr x (l-n)) n
{-# INLINE unsafeTakeEnd #-}

-- | A variety of 'drop' which omits the checks on @n@ so there is an
-- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@.
unsafeDrop :: Int -> ByteString -> ByteString
unsafeDrop n (BS x l) = assert (0 <= n && n <= l) $ BS (plusForeignPtr x n) (l-n)
{-# INLINE unsafeDrop #-}

-- | A variety of 'dropEnd' which omits the checks on @n@ so there is an
-- obligation on the programmer to provide a proof that @0 <= n <= 'length' xs@.
--
-- @since 0.12.1.0
unsafeDropEnd :: Int -> ByteString -> ByteString
unsafeDropEnd n (BS x l) = assert (0 <= n && n <= l) $ BS x (l-n)
{-# INLINE unsafeDropEnd #-}

-- | /O(1)/ 'unsafePackAddressLen' provides constant-time construction of
-- 'ByteString's, which is ideal for string literals. It packs a sequence
Expand Down

0 comments on commit 4c33cfb

Please sign in to comment.