Skip to content

Commit

Permalink
Streamly prototype
Browse files Browse the repository at this point in the history
  • Loading branch information
hasufell committed Apr 6, 2021
1 parent 8a53ab5 commit bd21bf9
Show file tree
Hide file tree
Showing 7 changed files with 238 additions and 74 deletions.
2 changes: 2 additions & 0 deletions Codec/Archive/Tar.hs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ module Codec.Archive.Tar (
-- This is because 'read' accepts common format variations while 'write'
-- produces the standard format.
read,
read',
write,

-- * Packing and unpacking files to\/from internal representation
Expand All @@ -108,6 +109,7 @@ module Codec.Archive.Tar (
-- device files.
pack,
unpack,
unpack',

-- * Types
-- ** Tar entry type
Expand Down
147 changes: 106 additions & 41 deletions Codec/Archive/Tar/Read.hs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{-# LANGUAGE CPP, DeriveDataTypeable, BangPatterns #-}
{-# LANGUAGE CPP, DeriveDataTypeable, BangPatterns, MultiWayIf #-}
-----------------------------------------------------------------------------
-- |
-- Module : Codec.Archive.Tar.Read
Expand All @@ -12,7 +12,7 @@
-- Portability : portable
--
-----------------------------------------------------------------------------
module Codec.Archive.Tar.Read (read, FormatError(..)) where
module Codec.Archive.Tar.Read (read, read', FormatError(..)) where

import Codec.Archive.Tar.Types

Expand All @@ -23,60 +23,32 @@ import Control.Exception (Exception(..))
import Data.Typeable (Typeable)
import Control.Applicative
import Control.Monad
import Control.Monad.Trans.Except
import Control.Monad.IO.Class (liftIO)
import Control.DeepSeq
import Streamly
import Streamly.Prelude (cons, consM)

import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BS.Char8
import qualified Data.ByteString.Unsafe as BS
import qualified Data.ByteString.Lazy as LBS
import qualified Streamly.Prelude as S
import qualified Streamly.FileSystem.Handle as FH
import qualified Streamly.Internal.Data.Unfold as SU
import qualified Streamly.Internal.FileSystem.Handle as IFH
import qualified Streamly.Internal.Memory.ArrayStream as AS
import qualified Streamly.Memory.Array as MA

import Prelude hiding (read)
import Data.Word (Word8)

#if !MIN_VERSION_bytestring(0,10,0)
import Data.Monoid (Monoid(..))
import qualified Data.ByteString.Lazy.Internal as LBS
#endif

-- | Errors that can be encountered when parsing a Tar archive.
data FormatError
= TruncatedArchive
| ShortTrailer
| BadTrailer
| TrailingJunk
| ChecksumIncorrect
| NotTarFormat
| UnrecognisedTarFormat
| HeaderBadNumericEncoding
#if MIN_VERSION_base(4,8,0)
deriving (Eq, Show, Typeable)

instance Exception FormatError where
displayException TruncatedArchive = "truncated tar archive"
displayException ShortTrailer = "short tar trailer"
displayException BadTrailer = "bad tar trailer"
displayException TrailingJunk = "tar file has trailing junk"
displayException ChecksumIncorrect = "tar checksum error"
displayException NotTarFormat = "data is not in tar format"
displayException UnrecognisedTarFormat = "tar entry not in a recognised format"
displayException HeaderBadNumericEncoding = "tar header is malformed (bad numeric encoding)"
#else
deriving (Eq, Typeable)

instance Show FormatError where
show TruncatedArchive = "truncated tar archive"
show ShortTrailer = "short tar trailer"
show BadTrailer = "bad tar trailer"
show TrailingJunk = "tar file has trailing junk"
show ChecksumIncorrect = "tar checksum error"
show NotTarFormat = "data is not in tar format"
show UnrecognisedTarFormat = "tar entry not in a recognised format"
show HeaderBadNumericEncoding = "tar header is malformed (bad numeric encoding)"

instance Exception FormatError
#endif

instance NFData FormatError where
rnf !_ = () -- enumerations are fully strict by construction

-- | Convert a data stream in the tar file format into an internal data
-- structure. Decoding errors are reported by the 'Fail' constructor of the
Expand All @@ -87,6 +59,99 @@ instance NFData FormatError where
read :: LBS.ByteString -> Entries FormatError
read = unfoldEntries getEntry

read' :: SerialT IO Word8 -> SerialT (ExceptT FormatError IO) Entry
read' = S.unfoldrM getEntry'

-- unfoldrM :: (IsStream t, MonadAsync m) => (b -> m (Maybe (a, b))) -> b -> t m a

getEntry' :: SerialT IO Word8
-> ExceptT FormatError IO (Maybe (Entry, SerialT IO Word8))
getEntry' stream = do
liftIO $ putStrLn $ "start"
header <- fmap BS.pack $ liftIO $ S.toList $ S.take 512 stream

let name = getString 0 100 header
mode_ = partial $ getOct 100 8 header
uid_ = partial $ getOct 108 8 header
gid_ = partial $ getOct 116 8 header
size_ = partial $ getOct 124 12 header
mtime_ = partial $ getOct 136 12 header
chksum_ = partial $ getOct 148 8 header
typecode = getByte 156 header
linkname = getString 157 100 header
magic = getChars 257 8 header
uname = getString 265 32 header
gname = getString 297 32 header
devmajor_ = partial $ getOct 329 8 header
devminor_ = partial $ getOct 337 8 header
prefix = getString 345 155 header
format_
| magic == ustarMagic = Right UstarFormat
| magic == gnuMagic = Right GnuFormat
| magic == v7Magic = Right V7Format
| otherwise = Left UnrecognisedTarFormat


liftIO $ putStrLn $ "mid"
head' <- liftIO $ S.head stream
when (BS.length header < 512) $ throwE TruncatedArchive
if | head' == Just 0 -> do
let (end, trailing) = splitAt' 1024 stream
liftIO (S.length end) >>= \lEnd -> when (lEnd /= 1024) $ throwE ShortTrailer
-- liftIO (S.all (== 0) end) >>= \b -> when (not b) $ throwE BadTrailer
-- liftIO (S.all (== 0) trailing) >>= \b -> when (not b) $ throwE TrailingJunk
pure Nothing
| otherwise -> do
case (chksum_, format_) of
(Right chksum, _ ) | correctChecksum header chksum -> return ()
(Right _, Right _) -> throwE ChecksumIncorrect
_ -> throwE NotTarFormat

-- These fields are partial, have to check them
format <- except format_; mode <- except mode_;
uid <- except uid_; gid <- except gid_;
size <- except size_; mtime <- except mtime_;
devmajor <- except devmajor_; devminor <- except devminor_;

liftIO $ putStrLn $ show (TarPath name prefix)

let content = S.take (fromIntegral size) (S.drop 512 stream)
padding = (512 - size) `mod` 512
restStream = S.drop (512 + size + padding) stream


entry = Entry {
entryTarPath = TarPath name prefix,
entryContent = case typecode of
'\0' -> NormalFileS content (fromIntegral size)
'0' -> NormalFileS content (fromIntegral size)
'1' -> HardLink (LinkTarget linkname)
'2' -> SymbolicLink (LinkTarget linkname)
_ | format == V7Format
-> OtherEntryTypeS typecode content (fromIntegral size)
'3' -> CharacterDevice devmajor devminor
'4' -> BlockDevice devmajor devminor
'5' -> Directory
'6' -> NamedPipe
'7' -> NormalFileS content (fromIntegral size)
_ -> OtherEntryTypeS typecode content (fromIntegral size),
entryPermissions = mode,
entryOwnership = Ownership (BS.Char8.unpack uname)
(BS.Char8.unpack gname) uid gid,
entryTime = mtime,
entryFormat = format
}

return (Just (entry, restStream))
where
splitAt' :: (Monad m, IsStream t) => Int -> t m a -> (t m a, t m a)
splitAt' i s =
let head' = S.take i s
tail' = S.drop i s
in (head', tail')
{-# NOINLINE getEntry' #-}


getEntry :: LBS.ByteString -> Either FormatError (Maybe (Entry, LBS.ByteString))
getEntry bs
| BS.length header < 512 = Left TruncatedArchive
Expand Down
66 changes: 61 additions & 5 deletions Codec/Archive/Tar/Types.hs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{-# LANGUAGE CPP, GeneralizedNewtypeDeriving, BangPatterns #-}
{-# LANGUAGE CPP, LambdaCase, GeneralizedNewtypeDeriving, BangPatterns #-}
-----------------------------------------------------------------------------
-- |
-- Module : Codec.Archive.Tar.Types
Expand Down Expand Up @@ -28,6 +28,7 @@ module Codec.Archive.Tar.Types (
DevMinor,
Format(..),
These(..),
FormatError(..),
these,

simpleEntry,
Expand Down Expand Up @@ -60,6 +61,7 @@ module Codec.Archive.Tar.Types (
foldEntries,
foldlEntries,
unfoldEntries,
unfoldEntriesM,

#ifdef TESTS
limitToV7FormatCompat
Expand All @@ -69,9 +71,11 @@ module Codec.Archive.Tar.Types (
import Data.Int (Int64)
import Data.Monoid (Monoid(..))
import Data.Semigroup as Sem
import Streamly
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BS.Char8
import qualified Data.ByteString.Lazy as LBS
import qualified Streamly.Memory.Array as MA
import Control.DeepSeq

import qualified System.FilePath as FilePath.Native
Expand All @@ -84,11 +88,14 @@ import qualified System.FilePath.Windows as FilePath.Windows
import System.Posix.Types
( FileMode )

import Data.Word (Word8)
#ifdef TESTS
import Test.QuickCheck
import Control.Applicative ((<$>), (<*>), pure)
import Data.Word (Word16)
#endif
import Data.Data (Typeable(..))
import Control.Exception (Exception(..))

type FileSize = Int64
-- | The number of seconds since the UNIX epoch
Expand Down Expand Up @@ -122,7 +129,6 @@ data Entry = Entry {
-- | The tar format the archive is using.
entryFormat :: !Format
}
deriving (Eq, Show)

-- | Native 'FilePath' of the file or directory within the archive.
--
Expand All @@ -134,6 +140,7 @@ entryPath = fromTarPath . entryTarPath
-- Portable archives should contain only 'NormalFile' and 'Directory'.
--
data EntryContent = NormalFile LBS.ByteString {-# UNPACK #-} !FileSize
| NormalFileS (SerialT IO Word8) {-# UNPACK #-} !FileSize
| Directory
| SymbolicLink !LinkTarget
| HardLink !LinkTarget
Expand All @@ -144,7 +151,49 @@ data EntryContent = NormalFile LBS.ByteString {-# UNPACK #-} !FileSize
| NamedPipe
| OtherEntryType {-# UNPACK #-} !TypeCode LBS.ByteString
{-# UNPACK #-} !FileSize
deriving (Eq, Ord, Show)
| OtherEntryTypeS {-# UNPACK #-} !TypeCode (SerialT IO Word8)
{-# UNPACK #-} !FileSize

-- | Errors that can be encountered when parsing a Tar archive.
data FormatError
= TruncatedArchive
| ShortTrailer
| BadTrailer
| TrailingJunk
| ChecksumIncorrect
| NotTarFormat
| UnrecognisedTarFormat
| HeaderBadNumericEncoding
#if MIN_VERSION_base(4,8,0)
deriving (Eq, Show, Typeable)

instance Exception FormatError where
displayException TruncatedArchive = "truncated tar archive"
displayException ShortTrailer = "short tar trailer"
displayException BadTrailer = "bad tar trailer"
displayException TrailingJunk = "tar file has trailing junk"
displayException ChecksumIncorrect = "tar checksum error"
displayException NotTarFormat = "data is not in tar format"
displayException UnrecognisedTarFormat = "tar entry not in a recognised format"
displayException HeaderBadNumericEncoding = "tar header is malformed (bad numeric encoding)"
#else
deriving (Eq, Typeable)

instance Show FormatError where
show TruncatedArchive = "truncated tar archive"
show ShortTrailer = "short tar trailer"
show BadTrailer = "bad tar trailer"
show TrailingJunk = "tar file has trailing junk"
show ChecksumIncorrect = "tar checksum error"
show NotTarFormat = "data is not in tar format"
show UnrecognisedTarFormat = "tar entry not in a recognised format"
show HeaderBadNumericEncoding = "tar header is malformed (bad numeric encoding)"

instance Exception FormatError
#endif

instance NFData FormatError where
rnf !_ = () -- enumerations are fully strict by construction

data Ownership = Ownership {
-- | The owner user name. Should be set to @\"\"@ if unknown.
Expand Down Expand Up @@ -515,7 +564,6 @@ fromLinkTargetToWindowsPath (LinkTarget pathbs) = adjustDirectory $
data Entries e = Next Entry (Entries e)
| Done
| Fail e
deriving (Eq, Show)

infixr 5 `Next`

Expand All @@ -534,6 +582,15 @@ unfoldEntries f = unfold
Right Nothing -> Done
Right (Just (e, x')) -> Next e (unfold x')

unfoldEntriesM :: Monad m => (a -> m (Either e (Maybe (Entry, a)))) -> a -> m (Entries e)
unfoldEntriesM f = unfold
where
unfold x = f x >>= \case
Left err -> pure $ Fail err
Right Nothing -> pure $ Done
Right (Just (e, x')) -> Next e <$> (unfold x')
{-# INLINABLE unfoldEntriesM #-}

-- | This is like the standard 'foldr' function on lists, but for 'Entries'.
-- Compared to 'foldr' it takes an extra function to account for the
-- possibility of failure.
Expand Down Expand Up @@ -752,4 +809,3 @@ limitToV7FormatCompat entry@Entry { entryFormat = V7Format } =
limitToV7FormatCompat entry = entry

#endif

Loading

0 comments on commit bd21bf9

Please sign in to comment.