From 96d84e8d90c9794ea9f68c3ba536be55a336e11b Mon Sep 17 00:00:00 2001 From: Kirill Zaborsky Date: Tue, 12 Feb 2019 10:51:14 +0300 Subject: [PATCH] Retrieve (and cache) archive once when doing location completion --- subs/pantry/src/Pantry.hs | 42 +++++++++++++------------------ subs/pantry/src/Pantry/Archive.hs | 42 ++++++++++++++++++++----------- subs/pantry/src/Pantry/Hackage.hs | 2 +- subs/pantry/src/Pantry/Repo.hs | 2 +- 4 files changed, 47 insertions(+), 41 deletions(-) diff --git a/subs/pantry/src/Pantry.hs b/subs/pantry/src/Pantry.hs index d6d08fcf7d..a27bda97d4 100644 --- a/subs/pantry/src/Pantry.hs +++ b/subs/pantry/src/Pantry.hs @@ -694,7 +694,7 @@ loadPackage => PackageLocationImmutable -> RIO env Package loadPackage (PLIHackage ident cfHash tree) = getHackageTarball (pirForHash ident cfHash) (Just tree) -loadPackage pli@(PLIArchive archive pm) = getArchive (toRawPLI pli) (toRawArchive archive) (toRawPM pm) +loadPackage pli@(PLIArchive archive pm) = getArchivePackage (toRawPLI pli) (toRawArchive archive) (toRawPM pm) loadPackage (PLIRepo repo pm) = getRepo repo (toRawPM pm) -- | Load a 'Package' from a 'RawPackageLocationImmutable'. @@ -705,7 +705,7 @@ loadPackageRaw => RawPackageLocationImmutable -> RIO env Package loadPackageRaw (RPLIHackage pir mtree) = getHackageTarball pir mtree -loadPackageRaw rpli@(RPLIArchive archive pm) = getArchive rpli archive pm +loadPackageRaw rpli@(RPLIArchive archive pm) = getArchivePackage rpli archive pm loadPackageRaw (RPLIRepo repo rpm) = getRepo repo rpm -- | Fill in optional fields in a 'PackageLocationImmutable' for more reproducible builds. @@ -732,24 +732,17 @@ completePackageLocation (RPLIHackage pir0@(PackageIdentifierRevision name versio pure (pir, BlobKey sha size) treeKey <- getHackageTarballKey pir pure $ PLIHackage (PackageIdentifier name version) cfKey treeKey -completePackageLocation pl@(RPLIArchive archive pm) = - PLIArchive <$> completeArchive archive <*> completePM pl pm +completePackageLocation pl@(RPLIArchive archive rpm) = do + -- getArchive checks archive and package metadata + (sha, size, package) <- getArchive pl archive rpm + let RawArchive loc _ _ subdir = archive + pure $ PLIArchive (Archive loc sha size subdir) (packagePM package) completePackageLocation pl@(RPLIRepo repo rpm) = do unless (isSHA1 (repoCommit repo)) $ throwIO $ CannotCompleteRepoNonSHA1 repo PLIRepo repo <$> completePM pl rpm where isSHA1 t = T.length t == 40 && T.all isHexDigit t -completeArchive - :: (HasPantryConfig env, HasLogFunc env) - => RawArchive - -> RIO env Archive -completeArchive (RawArchive loc (Just sha) (Just size) subdir) = - pure $ Archive loc sha size subdir -completeArchive a@(RawArchive loc _ _ subdir) = - withArchiveLoc a $ \_fp sha size -> - pure $ Archive loc sha size subdir - completePM :: (HasPantryConfig env, HasLogFunc env, HasProcessContext env) => RawPackageLocationImmutable @@ -759,16 +752,8 @@ completePM plOrig rpm@(RawPackageMetadata mn mv mtk mc) | Just n <- mn, Just v <- mv, Just tk <- mtk, Just c <- mc = pure $ PackageMetadata (PackageIdentifier n v) tk c | otherwise = do - package <- loadPackageRaw plOrig - let pm = PackageMetadata - { pmIdent = packageIdent package - , pmTreeKey = packageTreeKey package - , pmCabal = teBlob $ case packageCabalEntry package of - PCCabalFile cfile -> cfile - PCHpack hfile -> phGenerated hfile - } - - isSame x (Just y) = x == y + pm <- packagePM <$> loadPackageRaw plOrig + let isSame x (Just y) = x == y isSame _ _ = True allSame = @@ -780,6 +765,15 @@ completePM plOrig rpm@(RawPackageMetadata mn mv mtk mc) then pure pm else throwIO $ CompletePackageMetadataMismatch plOrig pm +packagePM :: Package -> PackageMetadata +packagePM package = PackageMetadata + { pmIdent = packageIdent package + , pmTreeKey = packageTreeKey package + , pmCabal = teBlob $ case packageCabalEntry package of + PCCabalFile cfile -> cfile + PCHpack hfile -> phGenerated hfile + } + -- | Add in hashes to make a 'SnapshotLocation' reproducible. -- -- @since 0.1.0.0 diff --git a/subs/pantry/src/Pantry/Archive.hs b/subs/pantry/src/Pantry/Archive.hs index d341210887..f847c75ce2 100644 --- a/subs/pantry/src/Pantry/Archive.hs +++ b/subs/pantry/src/Pantry/Archive.hs @@ -4,11 +4,11 @@ {-# LANGUAGE ScopedTypeVariables #-} -- | Logic for loading up trees from HTTPS archives. module Pantry.Archive - ( getArchive + ( getArchivePackage + , getArchive , getArchiveKey , fetchArchivesRaw , fetchArchives - , withArchiveLoc ) where import RIO @@ -64,20 +64,32 @@ getArchiveKey -> RawArchive -> RawPackageMetadata -> RIO env TreeKey -getArchiveKey rpli archive rpm = packageTreeKey <$> getArchive rpli archive rpm -- potential optimization +getArchiveKey rpli archive rpm = + packageTreeKey <$> getArchivePackage rpli archive rpm -- potential optimization -getArchive - :: forall env. (HasPantryConfig env, HasLogFunc env, HasProcessContext env) +thd3 :: (a, b, c) -> c +thd3 (_, _, z) = z + +getArchivePackage + :: forall env. (HasPantryConfig env, HasLogFunc env, HasProcessContext env, HasCallStack) => RawPackageLocationImmutable -- ^ for exceptions -> RawArchive -> RawPackageMetadata -> RIO env Package +getArchivePackage rpli archive rpm = thd3 <$> getArchive rpli archive rpm + +getArchive + :: forall env. (HasPantryConfig env, HasLogFunc env, HasProcessContext env, HasCallStack) + => RawPackageLocationImmutable -- ^ for exceptions + -> RawArchive + -> RawPackageMetadata + -> RIO env (SHA256, FileSize, Package) getArchive rpli archive rpm = do -- Check if the value is in the archive, and use it if possible - mpa <- loadCache rpli archive - pa <- - case mpa of - Just pa -> pure pa + mcached <- loadCache rpli archive + cached@(_, _, pa) <- + case mcached of + Just stored -> pure stored -- Not in the archive. Load the archive. Completely ignore the -- PackageMetadata for now, we'll check that the Package -- info matches next. @@ -86,9 +98,9 @@ getArchive rpli archive rpm = do -- Storing in the cache exclusively uses information we have -- about the archive itself, not metadata from the user. storeCache archive sha size pa - pure pa + pure (sha, size, pa) - either throwIO pure $ checkPackageMetadata rpli rpm pa + either throwIO (\_ -> pure cached) $ checkPackageMetadata rpli rpm pa storeCache :: forall env. (HasPantryConfig env, HasLogFunc env) @@ -106,7 +118,7 @@ loadCache :: forall env. (HasPantryConfig env, HasLogFunc env, HasProcessContext env) => RawPackageLocationImmutable -> RawArchive - -> RIO env (Maybe Package) + -> RIO env (Maybe (SHA256, FileSize, Package)) loadCache rpli archive = case loc of ALFilePath _ -> pure Nothing -- TODO can we do something intelligent here? @@ -132,7 +144,7 @@ loadCache rpli archive = logWarn $ "Cached hash is " <> display sha <> ", file size " <> display size logWarn "For security and reproducibility, please add a hash and file size to your configuration" ALFilePath _ -> pure () - loadFromCache tid + fmap (sha, size,) <$> loadFromCache tid Just sha' | sha == sha' -> case msize of @@ -142,9 +154,9 @@ loadCache rpli archive = logWarn $ "Archive from " <> display url <> " does not specify a size" logWarn $ "To avoid an overflow attack, please add the file size to your configuration: " <> display size ALFilePath _ -> pure () - loadFromCache tid + fmap (sha, size,) <$> loadFromCache tid Just size' - | size == size' -> loadFromCache tid + | size == size' -> fmap (sha, size,) <$> loadFromCache tid | otherwise -> do logWarn $ "Archive from " <> display loc <> " has a matching hash but mismatched size" diff --git a/subs/pantry/src/Pantry/Hackage.hs b/subs/pantry/src/Pantry/Hackage.hs index 1c6c218f60..4286c7da75 100644 --- a/subs/pantry/src/Pantry/Hackage.hs +++ b/subs/pantry/src/Pantry/Hackage.hs @@ -482,7 +482,7 @@ getHackageTarball pir@(PackageIdentifierRevision name ver _cfi) mtreeKey = do , T.pack $ Distribution.Text.display ver , ".tar.gz" ] - package <- getArchive + package <- getArchivePackage rpli RawArchive { raLocation = ALUrl url diff --git a/subs/pantry/src/Pantry/Repo.hs b/subs/pantry/src/Pantry/Repo.hs index d036fe2a6c..95e989288e 100644 --- a/subs/pantry/src/Pantry/Repo.hs +++ b/subs/pantry/src/Pantry/Repo.hs @@ -115,7 +115,7 @@ getRepo' repo@(Repo url commit repoType' subdir) rpm = when osIsWindows $ void $ liftIO $ hSupportsANSIWithoutEmulation stdout runCommand archiveArgs abs' <- resolveFile' tarball - getArchive + getArchivePackage (RPLIRepo repo rpm) RawArchive { raLocation = ALFilePath $ ResolvedPath