From 42373e53086719a8c73ea52ef8e848c8bd5a0cda Mon Sep 17 00:00:00 2001 From: John Ericson Date: Thu, 18 Jan 2024 23:57:26 -0500 Subject: [PATCH] Avoid creating temporary store object for git over the wire Instead, serialize as NAR and send that over, then rehash sever side. This is alorithmically simpler, but comes at the cost of a newer parameter to `Store::addToStoreFromDump`. --- src/libexpr/primops.cc | 2 +- src/libstore/binary-cache-store.cc | 27 +++++--- src/libstore/binary-cache-store.hh | 3 +- src/libstore/build/local-derivation-goal.cc | 5 +- src/libstore/daemon.cc | 59 ++++++++-------- src/libstore/derivations.cc | 2 +- src/libstore/dummy-store.cc | 3 +- src/libstore/legacy-ssh-store.hh | 3 +- src/libstore/local-store.cc | 77 ++++++++------------- src/libstore/local-store.hh | 3 +- src/libstore/remote-store.cc | 20 +++++- src/libstore/remote-store.hh | 3 +- src/libstore/store-api.cc | 47 ++++--------- src/libstore/store-api.hh | 17 +++-- src/nix-env/user-env.cc | 2 +- src/nix/develop.cc | 2 +- 16 files changed, 137 insertions(+), 138 deletions(-) diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index 9ea266cf9f37..78f7f71ed847 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -2092,7 +2092,7 @@ static void prim_toFile(EvalState & state, const PosIdx pos, Value * * args, Val }) : ({ StringSource s { contents }; - state.store->addToStoreFromDump(s, name, TextIngestionMethod {}, HashAlgorithm::SHA256, refs, state.repair); + state.store->addToStoreFromDump(s, name, FileSerialisationMethod::Flat, TextIngestionMethod {}, HashAlgorithm::SHA256, refs, state.repair); }); /* Note: we don't need to add `context' to the context of the diff --git a/src/libstore/binary-cache-store.cc b/src/libstore/binary-cache-store.cc index d6047dd7e67d..25bb291d1412 100644 --- a/src/libstore/binary-cache-store.cc +++ b/src/libstore/binary-cache-store.cc @@ -305,7 +305,8 @@ void BinaryCacheStore::addToStore(const ValidPathInfo & info, Source & narSource StorePath BinaryCacheStore::addToStoreFromDump( Source & dump, std::string_view name, - ContentAddressMethod method, + FileSerialisationMethod dumpMethod, + ContentAddressMethod hashMethod, HashAlgorithm hashAlgo, const StorePathSet & references, RepairFlag repair) @@ -313,17 +314,26 @@ StorePath BinaryCacheStore::addToStoreFromDump( std::optional caHash; std::string nar; + // Calculating Git hash from NAR stream not yet implemented. May not + // be possible to implement in single-pass in NAR is in an + // inconvenient order. Could fetch after uploading, however. + if (hashMethod.getFileIngestionMethod() == FileIngestionMethod::Git) + unsupported("addToStoreFromDump"); + if (auto * dump2p = dynamic_cast(&dump)) { auto & dump2 = *dump2p; // Hack, this gives us a "replayable" source so we can compute // multiple hashes more easily. - caHash = hashString(HashAlgorithm::SHA256, dump2.s); - switch (method.getFileIngestionMethod()) { - case FileIngestionMethod::Recursive: + // + // Only calculate if the dump is in the right format, however. + if (static_cast(dumpMethod) == hashMethod.getFileIngestionMethod()) + caHash = hashString(HashAlgorithm::SHA256, dump2.s); + switch (dumpMethod) { + case FileSerialisationMethod::Recursive: // The dump is already NAR in this case, just use it. nar = dump2.s; break; - case FileIngestionMethod::Flat: + case FileSerialisationMethod::Flat: { // The dump is Flat, so we need to convert it to NAR with a // single file. @@ -332,14 +342,11 @@ StorePath BinaryCacheStore::addToStoreFromDump( nar = std::move(s.s); break; } - case FileIngestionMethod::Git: - unsupported("addToStoreFromDump"); - break; } } else { // Otherwise, we have to do th same hashing as NAR so our single // hash will suffice for both purposes. - if (method != FileIngestionMethod::Recursive || hashAlgo != HashAlgorithm::SHA256) + if (dumpMethod != FileSerialisationMethod::Recursive || hashAlgo != HashAlgorithm::SHA256) unsupported("addToStoreFromDump"); } StringSource narDump { nar }; @@ -354,7 +361,7 @@ StorePath BinaryCacheStore::addToStoreFromDump( *this, name, ContentAddressWithReferences::fromParts( - method, + hashMethod, caHash ? *caHash : nar.first, { .others = references, diff --git a/src/libstore/binary-cache-store.hh b/src/libstore/binary-cache-store.hh index 76de2d11addf..7c282830933f 100644 --- a/src/libstore/binary-cache-store.hh +++ b/src/libstore/binary-cache-store.hh @@ -125,7 +125,8 @@ public: StorePath addToStoreFromDump( Source & dump, std::string_view name, - ContentAddressMethod method, + FileSerialisationMethod dumpMethod, + ContentAddressMethod hashMethod, HashAlgorithm hashAlgo, const StorePathSet & references, RepairFlag repair) override; diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index d92966a7432d..a9b8de123603 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -1312,12 +1312,13 @@ struct RestrictedStore : public virtual RestrictedStoreConfig, public virtual In StorePath addToStoreFromDump( Source & dump, std::string_view name, - ContentAddressMethod method, + FileSerialisationMethod dumpMethod, + ContentAddressMethod hashMethod, HashAlgorithm hashAlgo, const StorePathSet & references, RepairFlag repair) override { - auto path = next->addToStoreFromDump(dump, name, method, hashAlgo, references, repair); + auto path = next->addToStoreFromDump(dump, name, dumpMethod, hashMethod, hashAlgo, references, repair); goal.addDependency(path); return path; } diff --git a/src/libstore/daemon.cc b/src/libstore/daemon.cc index 873065e14744..e1337f51df3c 100644 --- a/src/libstore/daemon.cc +++ b/src/libstore/daemon.cc @@ -401,11 +401,23 @@ static void performOp(TunnelLogger * logger, ref store, logger->startWork(); auto pathInfo = [&]() { // NB: FramedSource must be out of scope before logger->stopWork(); - auto [contentAddressMethod, hashAlgo_] = ContentAddressMethod::parseWithAlgo(camStr); - auto hashAlgo = hashAlgo_; // work around clang bug + auto [contentAddressMethod, hashAlgo] = ContentAddressMethod::parseWithAlgo(camStr); FramedSource source(from); + FileSerialisationMethod dumpMethod; + switch (contentAddressMethod.getFileIngestionMethod()) { + case FileIngestionMethod::Flat: + dumpMethod = FileSerialisationMethod::Flat; + break; + case FileIngestionMethod::Recursive: + dumpMethod = FileSerialisationMethod::Recursive; + break; + case FileIngestionMethod::Git: + // Use NAR; Git is not a serialization method + dumpMethod = FileSerialisationMethod::Recursive; + break; + } // TODO these two steps are essentially RemoteStore::addCAToStore. Move it up to Store. - auto path = store->addToStoreFromDump(source, name, contentAddressMethod, hashAlgo, refs, repair); + auto path = store->addToStoreFromDump(source, name, dumpMethod, contentAddressMethod, hashAlgo, refs, repair); return store->queryPathInfo(path); }(); logger->stopWork(); @@ -431,34 +443,23 @@ static void performOp(TunnelLogger * logger, ref store, hashAlgo = parseHashAlgo(hashAlgoRaw); } + // Old protocol always sends NAR, regardless of hashing method auto dumpSource = sinkToSource([&](Sink & saved) { - if (method == FileIngestionMethod::Recursive) { - /* We parse the NAR dump through into `saved` unmodified, - so why all this extra work? We still parse the NAR so - that we aren't sending arbitrary data to `saved` - unwittingly`, and we know when the NAR ends so we don't - consume the rest of `from` and can't parse another - command. (We don't trust `addToStoreFromDump` to not - eagerly consume the entire stream it's given, past the - length of the Nar. */ - TeeSource savedNARSource(from, saved); - NullFileSystemObjectSink sink; /* just parse the NAR */ - parseDump(sink, savedNARSource); - } else if (method == FileIngestionMethod::Flat) { - /* Incrementally parse the NAR file, stripping the - metadata, and streaming the sole file we expect into - `saved`. */ - RegularFileSink savedRegular { saved }; - parseDump(savedRegular, from); - if (!savedRegular.regular) throw Error("regular file expected"); - } else { - /* Should have validated above that no other file ingestion - method was used. */ - assert(false); - } + /* We parse the NAR dump through into `saved` unmodified, + so why all this extra work? We still parse the NAR so + that we aren't sending arbitrary data to `saved` + unwittingly`, and we know when the NAR ends so we don't + consume the rest of `from` and can't parse another + command. (We don't trust `addToStoreFromDump` to not + eagerly consume the entire stream it's given, past the + length of the Nar. */ + TeeSource savedNARSource(from, saved); + NullFileSystemObjectSink sink; /* just parse the NAR */ + parseDump(sink, savedNARSource); }); logger->startWork(); - auto path = store->addToStoreFromDump(*dumpSource, baseName, method, hashAlgo); + auto path = store->addToStoreFromDump( + *dumpSource, baseName, FileSerialisationMethod::Recursive, method, hashAlgo); logger->stopWork(); to << store->printStorePath(path); @@ -490,7 +491,7 @@ static void performOp(TunnelLogger * logger, ref store, logger->startWork(); auto path = ({ StringSource source { s }; - store->addToStoreFromDump(source, suffix, TextIngestionMethod {}, HashAlgorithm::SHA256, refs, NoRepair); + store->addToStoreFromDump(source, suffix, FileSerialisationMethod::Flat, TextIngestionMethod {}, HashAlgorithm::SHA256, refs, NoRepair); }); logger->stopWork(); to << store->printStorePath(path); diff --git a/src/libstore/derivations.cc b/src/libstore/derivations.cc index 305ed5b4291d..df14e979fb86 100644 --- a/src/libstore/derivations.cc +++ b/src/libstore/derivations.cc @@ -150,7 +150,7 @@ StorePath writeDerivation(Store & store, }) : ({ StringSource s { contents }; - store.addToStoreFromDump(s, suffix, TextIngestionMethod {}, HashAlgorithm::SHA256, references, repair); + store.addToStoreFromDump(s, suffix, FileSerialisationMethod::Flat, TextIngestionMethod {}, HashAlgorithm::SHA256, references, repair); }); } diff --git a/src/libstore/dummy-store.cc b/src/libstore/dummy-store.cc index e4f13b8f4100..30f23cff91a4 100644 --- a/src/libstore/dummy-store.cc +++ b/src/libstore/dummy-store.cc @@ -61,7 +61,8 @@ struct DummyStore : public virtual DummyStoreConfig, public virtual Store virtual StorePath addToStoreFromDump( Source & dump, std::string_view name, - ContentAddressMethod method = FileIngestionMethod::Recursive, + FileSerialisationMethod dumpMethod = FileSerialisationMethod::Recursive, + ContentAddressMethod hashMethod = FileIngestionMethod::Recursive, HashAlgorithm hashAlgo = HashAlgorithm::SHA256, const StorePathSet & references = StorePathSet(), RepairFlag repair = NoRepair) override diff --git a/src/libstore/legacy-ssh-store.hh b/src/libstore/legacy-ssh-store.hh index ae890177b3d5..ca2f115d25e4 100644 --- a/src/libstore/legacy-ssh-store.hh +++ b/src/libstore/legacy-ssh-store.hh @@ -72,7 +72,8 @@ struct LegacySSHStore : public virtual LegacySSHStoreConfig, public virtual Stor virtual StorePath addToStoreFromDump( Source & dump, std::string_view name, - ContentAddressMethod method = FileIngestionMethod::Recursive, + FileSerialisationMethod dumpMethod = FileSerialisationMethod::Recursive, + ContentAddressMethod hashMethod = FileIngestionMethod::Recursive, HashAlgorithm hashAlgo = HashAlgorithm::SHA256, const StorePathSet & references = StorePathSet(), RepairFlag repair = NoRepair) override diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index 5f35cf3a856b..56f8c5dd85c9 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -1148,7 +1148,8 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source, StorePath LocalStore::addToStoreFromDump( Source & source0, std::string_view name, - ContentAddressMethod method, + FileSerialisationMethod dumpMethod, + ContentAddressMethod hashMethod, HashAlgorithm hashAlgo, const StorePathSet & references, RepairFlag repair) @@ -1201,7 +1202,13 @@ StorePath LocalStore::addToStoreFromDump( Path tempDir; AutoCloseFD tempDirFd; - if (!inMemory) { + bool methodsMatch = (FileIngestionMethod) dumpMethod == hashMethod; + + /* If the methods don't match, our streaming hash of the dump is the + wrong sort, and we need to rehash. */ + bool inMemoryAndDontNeedRestore = inMemory && methodsMatch; + + if (!inMemoryAndDontNeedRestore) { /* Drain what we pulled so far, and then keep on pulling */ StringSource dumpSource { dump }; ChainSource bothSource { dumpSource, source }; @@ -1210,40 +1217,23 @@ StorePath LocalStore::addToStoreFromDump( delTempDir = std::make_unique(tempDir); tempPath = tempDir + "/x"; - auto fim = method.getFileIngestionMethod(); - switch (fim) { - case FileIngestionMethod::Flat: - case FileIngestionMethod::Recursive: - restorePath(tempPath, bothSource, (FileSerialisationMethod) fim); - break; - case FileIngestionMethod::Git: { - RestoreSink sink; - sink.dstPath = tempPath; - auto accessor = getFSAccessor(); - git::restore(sink, bothSource, [&](Hash childHash) { - return std::pair { - &*accessor, - CanonPath { - printStorePath(this->makeFixedOutputPath("git", FixedOutputInfo { - .method = FileIngestionMethod::Git, - .hash = childHash, - })) - }, - }; - }); - break; - } - } + restorePath(tempPath, bothSource, dumpMethod); dumpBuffer.reset(); dump = {}; } - auto [hash, size] = hashSink->finish(); + auto [dumpHash, size] = hashSink->finish(); + + PosixSourceAccessor accessor; auto desc = ContentAddressWithReferences::fromParts( - method, - hash, + hashMethod, + methodsMatch + ? dumpHash + : hashPath( + accessor, CanonPath { tempPath }, + hashMethod.getFileIngestionMethod(), hashAlgo), { .others = references, // caller is not capable of creating a self-reference, because this is content-addressed without modulus @@ -1269,32 +1259,19 @@ StorePath LocalStore::addToStoreFromDump( autoGC(); - if (inMemory) { + if (inMemoryAndDontNeedRestore) { StringSource dumpSource { dump }; /* Restore from the buffer in memory. */ - auto fim = method.getFileIngestionMethod(); + auto fim = hashMethod.getFileIngestionMethod(); switch (fim) { case FileIngestionMethod::Flat: case FileIngestionMethod::Recursive: restorePath(realPath, dumpSource, (FileSerialisationMethod) fim); break; - case FileIngestionMethod::Git: { - RestoreSink sink; - sink.dstPath = realPath; - auto accessor = getFSAccessor(); - git::restore(sink, dumpSource, [&](Hash childHash) { - return std::pair { - &*accessor, - CanonPath { - printStorePath(this->makeFixedOutputPath("git", FixedOutputInfo { - .method = FileIngestionMethod::Git, - .hash = childHash, - })) - }, - }; - }); - break; - } + case FileIngestionMethod::Git: + // doesn't correspond to serialization method, so + // this should be unreachable + assert(false); } } else { /* Move the temporary path we restored above. */ @@ -1303,8 +1280,8 @@ StorePath LocalStore::addToStoreFromDump( /* For computing the nar hash. In recursive SHA-256 mode, this is the same as the store hash, so no need to do it again. */ - auto narHash = std::pair { hash, size }; - if (method != FileIngestionMethod::Recursive || hashAlgo != HashAlgorithm::SHA256) { + auto narHash = std::pair { dumpHash, size }; + if (dumpMethod != FileSerialisationMethod::Recursive || hashAlgo != HashAlgorithm::SHA256) { HashSink narSink { HashAlgorithm::SHA256 }; dumpPath(realPath, narSink); narHash = narSink.finish(); diff --git a/src/libstore/local-store.hh b/src/libstore/local-store.hh index ba56d3ead125..7eff1d6905ea 100644 --- a/src/libstore/local-store.hh +++ b/src/libstore/local-store.hh @@ -180,7 +180,8 @@ public: StorePath addToStoreFromDump( Source & dump, std::string_view name, - ContentAddressMethod method, + FileSerialisationMethod dumpMethod, + ContentAddressMethod hashMethod, HashAlgorithm hashAlgo, const StorePathSet & references, RepairFlag repair) override; diff --git a/src/libstore/remote-store.cc b/src/libstore/remote-store.cc index 0cae84828d37..8dfe8addab79 100644 --- a/src/libstore/remote-store.cc +++ b/src/libstore/remote-store.cc @@ -509,12 +509,28 @@ ref RemoteStore::addCAToStore( StorePath RemoteStore::addToStoreFromDump( Source & dump, std::string_view name, - ContentAddressMethod method, + FileSerialisationMethod dumpMethod, + ContentAddressMethod hashMethod, HashAlgorithm hashAlgo, const StorePathSet & references, RepairFlag repair) { - return addCAToStore(dump, name, method, hashAlgo, references, repair)->path; + FileSerialisationMethod fsm; + switch (hashMethod.getFileIngestionMethod()) { + case FileIngestionMethod::Flat: + fsm = FileSerialisationMethod::Flat; + break; + case FileIngestionMethod::Recursive: + fsm = FileSerialisationMethod::Recursive; + break; + case FileIngestionMethod::Git: + // Use NAR; Git is not a serialization method + fsm = FileSerialisationMethod::Recursive; + break; + } + if (fsm != dumpMethod) + unsupported("RemoteStore::addToStoreFromDump doesn't support this `dumpMethod` `hashMethod` combination"); + return addCAToStore(dump, name, hashMethod, hashAlgo, references, repair)->path; } diff --git a/src/libstore/remote-store.hh b/src/libstore/remote-store.hh index c51a21375628..d630adc08b79 100644 --- a/src/libstore/remote-store.hh +++ b/src/libstore/remote-store.hh @@ -87,7 +87,8 @@ public: StorePath addToStoreFromDump( Source & dump, std::string_view name, - ContentAddressMethod method = FileIngestionMethod::Recursive, + FileSerialisationMethod dumpMethod = FileSerialisationMethod::Recursive, + ContentAddressMethod hashMethod = FileIngestionMethod::Recursive, HashAlgorithm hashAlgo = HashAlgorithm::SHA256, const StorePathSet & references = StorePathSet(), RepairFlag repair = NoRepair) override; diff --git a/src/libstore/store-api.cc b/src/libstore/store-api.cc index c44612ec51ac..4356296d45a8 100644 --- a/src/libstore/store-api.cc +++ b/src/libstore/store-api.cc @@ -197,40 +197,23 @@ StorePath Store::addToStore( PathFilter & filter, RepairFlag repair) { + FileSerialisationMethod fsm; + switch (method.getFileIngestionMethod()) { + case FileIngestionMethod::Flat: + fsm = FileSerialisationMethod::Flat; + break; + case FileIngestionMethod::Recursive: + fsm = FileSerialisationMethod::Recursive; + break; + case FileIngestionMethod::Git: + // Use NAR; Git is not a serialization method + fsm = FileSerialisationMethod::Recursive; + break; + } auto source = sinkToSource([&](Sink & sink) { - auto fim = method.getFileIngestionMethod(); - switch (fim) { - case FileIngestionMethod::Flat: - case FileIngestionMethod::Recursive: - { - dumpPath(accessor, path, sink, (FileSerialisationMethod) fim, filter); - break; - } - case FileIngestionMethod::Git: - { - git::dump( - accessor, path, - sink, - // recursively add to store if path is a directory - [&](const CanonPath & path) -> git::TreeEntry { - auto storePath = addToStore("git", accessor, path, method, hashAlgo, references, filter, repair); - auto info = queryPathInfo(storePath); - assert(info->ca); - assert(info->ca->method == FileIngestionMethod::Git); - auto stat = getFSAccessor()->lstat(CanonPath(printStorePath(storePath))); - auto gitModeOpt = git::convertMode(stat.type); - assert(gitModeOpt); - return { - .mode = *gitModeOpt, - .hash = info->ca->hash, - }; - }, - filter); - break; - } - } + dumpPath(accessor, path, sink, fsm, filter); }); - return addToStoreFromDump(*source, name, method, hashAlgo, references, repair); + return addToStoreFromDump(*source, name, fsm, method, hashAlgo, references, repair); } void Store::addMultipleToStore( diff --git a/src/libstore/store-api.hh b/src/libstore/store-api.hh index 5163070b2e6d..5f683a21139f 100644 --- a/src/libstore/store-api.hh +++ b/src/libstore/store-api.hh @@ -466,14 +466,23 @@ public: * in `dump`, which is either a NAR serialisation (if recursive == * true) or simply the contents of a regular file (if recursive == * false). - * `dump` may be drained * - * \todo remove? + * `dump` may be drained. + * + * @param dumpMethod What serialisation format is `dump`, i.e. how + * to deserialize it. Must either match hashMethod or be + * `FileSerialisationMethod::Recursive`. + * + * @param hashMethod How content addressing? Need not match be the + * same as `dumpMethod`. + * + * @todo remove? */ virtual StorePath addToStoreFromDump( Source & dump, std::string_view name, - ContentAddressMethod method = FileIngestionMethod::Recursive, + FileSerialisationMethod dumpMethod = FileSerialisationMethod::Recursive, + ContentAddressMethod hashMethod = FileIngestionMethod::Recursive, HashAlgorithm hashAlgo = HashAlgorithm::SHA256, const StorePathSet & references = StorePathSet(), RepairFlag repair = NoRepair) = 0; @@ -772,7 +781,7 @@ protected: * Helper for methods that are not unsupported: this is used for * default definitions for virtual methods that are meant to be overriden. * - * \todo Using this should be a last resort. It is better to make + * @todo Using this should be a last resort. It is better to make * the method "virtual pure" and/or move it to a subclass. */ [[noreturn]] void unsupported(const std::string & op) diff --git a/src/nix-env/user-env.cc b/src/nix-env/user-env.cc index 2f9c988d5f20..8bebe2b9eeee 100644 --- a/src/nix-env/user-env.cc +++ b/src/nix-env/user-env.cc @@ -113,7 +113,7 @@ bool createUserEnv(EvalState & state, PackageInfos & elems, std::string str2 = str.str(); StringSource source { str2 }; state.store->addToStoreFromDump( - source, "env-manifest.nix", TextIngestionMethod {}, HashAlgorithm::SHA256, references); + source, "env-manifest.nix", FileSerialisationMethod::Flat, TextIngestionMethod {}, HashAlgorithm::SHA256, references); }); /* Get the environment builder expression. */ diff --git a/src/nix/develop.cc b/src/nix/develop.cc index 403178a5d96e..c1842f2d57aa 100644 --- a/src/nix/develop.cc +++ b/src/nix/develop.cc @@ -226,7 +226,7 @@ static StorePath getDerivationEnvironment(ref store, ref evalStore auto getEnvShPath = ({ StringSource source { getEnvSh }; evalStore->addToStoreFromDump( - source, "get-env.sh", TextIngestionMethod {}, HashAlgorithm::SHA256, {}); + source, "get-env.sh", FileSerialisationMethod::Flat, TextIngestionMethod {}, HashAlgorithm::SHA256, {}); }); drv.args = {store->printStorePath(getEnvShPath)};