Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a Git-based content-addressed tarball cache #9485

Merged
merged 12 commits into from
Feb 15, 2024
Merged
5 changes: 5 additions & 0 deletions src/libfetchers/attrs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,4 +104,9 @@ std::map<std::string, std::string> attrsToQuery(const Attrs & attrs)
return query;
}

Hash getRevAttr(const Attrs & attrs, const std::string & name)
{
return Hash::parseAny(getStrAttr(attrs, name), HashAlgorithm::SHA1);
}

}
2 changes: 2 additions & 0 deletions src/libfetchers/attrs.hh
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,6 @@ bool getBoolAttr(const Attrs & attrs, const std::string & name);

std::map<std::string, std::string> attrsToQuery(const Attrs & attrs);

Hash getRevAttr(const Attrs & attrs, const std::string & name);

}
166 changes: 164 additions & 2 deletions src/libfetchers/git-utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
#include "finally.hh"
#include "processes.hh"
#include "signals.hh"

#include <boost/core/span.hpp>
#include "users.hh"
#include "fs-sink.hh"

#include <git2/attr.h>
#include <git2/blob.h>
Expand All @@ -28,6 +28,7 @@
#include <unordered_set>
#include <queue>
#include <regex>
#include <span>

namespace std {

Expand Down Expand Up @@ -356,6 +357,8 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>

ref<InputAccessor> getAccessor(const WorkdirInfo & wd, bool exportIgnore, MakeNotAllowedError e) override;

ref<GitFileSystemObjectSink> getFileSystemObjectSink() override;

static int sidebandProgressCallback(const char * str, int len, void * payload)
{
auto act = (Activity *) payload;
Expand Down Expand Up @@ -770,6 +773,154 @@ struct GitExportIgnoreInputAccessor : CachingFilteringInputAccessor {

};

struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink
{
ref<GitRepoImpl> repo;

struct PendingDir
{
std::string name;
TreeBuilder builder;
};

std::vector<PendingDir> pendingDirs;

size_t componentsToStrip = 1;

void pushBuilder(std::string name)
{
git_treebuilder * b;
if (git_treebuilder_new(&b, *repo, nullptr))
throw Error("creating a tree builder: %s", git_error_last()->message);
pendingDirs.push_back({ .name = std::move(name), .builder = TreeBuilder(b) });
};

GitFileSystemObjectSinkImpl(ref<GitRepoImpl> repo) : repo(repo)
{
pushBuilder("");
}

std::pair<git_oid, std::string> popBuilder()
{
assert(!pendingDirs.empty());
auto pending = std::move(pendingDirs.back());
git_oid oid;
if (git_treebuilder_write(&oid, pending.builder.get()))
throw Error("creating a tree object: %s", git_error_last()->message);
pendingDirs.pop_back();
return {oid, pending.name};
};

void addToTree(const std::string & name, const git_oid & oid, git_filemode_t mode)
{
assert(!pendingDirs.empty());
auto & pending = pendingDirs.back();
if (git_treebuilder_insert(nullptr, pending.builder.get(), name.c_str(), &oid, mode))
throw Error("adding a file to a tree builder: %s", git_error_last()->message);
};

void updateBuilders(std::span<const std::string> names)
{
// Find the common prefix of pendingDirs and names.
size_t prefixLen = 0;
for (; prefixLen < names.size() && prefixLen + 1 < pendingDirs.size(); ++prefixLen)
if (names[prefixLen] != pendingDirs[prefixLen + 1].name)
break;

// Finish the builders that are not part of the common prefix.
for (auto n = pendingDirs.size(); n > prefixLen + 1; --n) {
auto [oid, name] = popBuilder();
addToTree(name, oid, GIT_FILEMODE_TREE);
}

// Create builders for the new directories.
for (auto n = prefixLen; n < names.size(); ++n)
pushBuilder(names[n]);
};

bool prepareDirs(const std::vector<std::string> & pathComponents, bool isDir)
{
std::span<const std::string> pathComponents2{pathComponents};

if (pathComponents2.size() <= componentsToStrip) return false;
pathComponents2 = pathComponents2.subspan(componentsToStrip);

updateBuilders(
isDir
? pathComponents2
: pathComponents2.first(pathComponents2.size() - 1));

return true;
}

void createRegularFile(
const Path & path,
std::function<void(CreateRegularFileSink &)> func) override
{
auto pathComponents = tokenizeString<std::vector<std::string>>(path, "/");
if (!prepareDirs(pathComponents, false)) return;

git_writestream * stream = nullptr;
if (git_blob_create_from_stream(&stream, *repo, nullptr))
throw Error("creating a blob stream object: %s", git_error_last()->message);

struct CRF : CreateRegularFileSink {
const Path & path;
GitFileSystemObjectSinkImpl & back;
git_writestream * stream;
bool executable = false;
CRF(const Path & path, GitFileSystemObjectSinkImpl & back, git_writestream * stream)
: path(path), back(back), stream(stream)
{}
void operator () (std::string_view data) override
{
if (stream->write(stream, data.data(), data.size()))
throw Error("writing a blob for tarball member '%s': %s", path, git_error_last()->message);
}
void isExecutable() override
{
executable = true;
}
} crf { path, *this, stream };
func(crf);

git_oid oid;
if (git_blob_create_from_stream_commit(&oid, stream))
throw Error("creating a blob object for tarball member '%s': %s", path, git_error_last()->message);

addToTree(*pathComponents.rbegin(), oid,
crf.executable
? GIT_FILEMODE_BLOB_EXECUTABLE
: GIT_FILEMODE_BLOB);
}

void createDirectory(const Path & path) override
{
auto pathComponents = tokenizeString<std::vector<std::string>>(path, "/");
(void) prepareDirs(pathComponents, true);
}

void createSymlink(const Path & path, const std::string & target) override
{
auto pathComponents = tokenizeString<std::vector<std::string>>(path, "/");
if (!prepareDirs(pathComponents, false)) return;

git_oid oid;
if (git_blob_create_from_buffer(&oid, *repo, target.c_str(), target.size()))
throw Error("creating a blob object for tarball symlink member '%s': %s", path, git_error_last()->message);

addToTree(*pathComponents.rbegin(), oid, GIT_FILEMODE_LINK);
}

Hash sync() override {
updateBuilders({});

auto [oid, _name] = popBuilder();

return toHash(oid);
}
};

ref<GitInputAccessor> GitRepoImpl::getRawAccessor(const Hash & rev)
{
auto self = ref<GitRepoImpl>(shared_from_this());
Expand Down Expand Up @@ -804,6 +955,11 @@ ref<InputAccessor> GitRepoImpl::getAccessor(const WorkdirInfo & wd, bool exportI
}
}

ref<GitFileSystemObjectSink> GitRepoImpl::getFileSystemObjectSink()
{
return make_ref<GitFileSystemObjectSinkImpl>(ref<GitRepoImpl>(shared_from_this()));
}

std::vector<std::tuple<GitRepoImpl::Submodule, Hash>> GitRepoImpl::getSubmodules(const Hash & rev, bool exportIgnore)
{
/* Read the .gitmodules files from this revision. */
Expand All @@ -830,5 +986,11 @@ std::vector<std::tuple<GitRepoImpl::Submodule, Hash>> GitRepoImpl::getSubmodules
return result;
}

ref<GitRepo> getTarballCache()
{
static auto repoDir = std::filesystem::path(getCacheDir()) / "nix" / "tarball-cache";

return GitRepo::openRepo(repoDir, true, true);
}

}
19 changes: 13 additions & 6 deletions src/libfetchers/git-utils.hh
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,20 @@

#include "filtering-input-accessor.hh"
#include "input-accessor.hh"
#include "fs-sink.hh"

namespace nix {

namespace fetchers { struct PublicKey; }

struct GitFileSystemObjectSink : FileSystemObjectSink
{
/**
* Flush builder and return a final Git hash.
*/
virtual Hash sync() = 0;
};

struct GitRepo
{
virtual ~GitRepo()
Expand Down Expand Up @@ -64,18 +73,14 @@ struct GitRepo
const std::string & url,
const std::string & base) = 0;

struct TarballInfo
{
Hash treeHash;
time_t lastModified;
};

virtual bool hasObject(const Hash & oid) = 0;

virtual ref<InputAccessor> getAccessor(const Hash & rev, bool exportIgnore) = 0;

virtual ref<InputAccessor> getAccessor(const WorkdirInfo & wd, bool exportIgnore, MakeNotAllowedError makeNotAllowedError) = 0;

virtual ref<GitFileSystemObjectSink> getFileSystemObjectSink() = 0;

virtual void fetch(
const std::string & url,
const std::string & refspec,
Expand All @@ -90,4 +95,6 @@ struct GitRepo
const std::vector<fetchers::PublicKey> & publicKeys) = 0;
};

ref<GitRepo> getTarballCache();

}
Loading
Loading