diff --git a/lib/datadog/ci/git/local_repository.rb b/lib/datadog/ci/git/local_repository.rb index f002c7d5..02dca051 100644 --- a/lib/datadog/ci/git/local_repository.rb +++ b/lib/datadog/ci/git/local_repository.rb @@ -159,17 +159,34 @@ def self.git_generate_packfiles(included_commits:, excluded_commits:, path:) nil end + def self.git_shallow_clone? + exec_git_command("git rev-parse --is-shallow-repository") == "true" + rescue => e + log_failure(e, "git shallow clone") + false + end + + def self.git_unshallow + exec_git_command( + "git fetch " \ + "--shallow-since=\"1 month ago\" " \ + "--update-shallow " \ + "--filter=\"blob:none\" " \ + "--recurse-submodules=no " \ + "$(git config --default origin --get clone.defaultRemoteName) $(git rev-parse HEAD)" + ) + rescue => e + log_failure(e, "git unshallow") + nil + end + # makes .exec_git_command private to make sure that this method # is not called from outside of this module with insecure parameters class << self private def filter_invalid_commits(commits) - commits.filter_map do |commit| - next unless Utils::Git.valid_commit_sha?(commit) - - commit - end + commits.filter { |commit| Utils::Git.valid_commit_sha?(commit) } end def exec_git_command(cmd, stdin: nil) diff --git a/lib/datadog/ci/git/tree_uploader.rb b/lib/datadog/ci/git/tree_uploader.rb index cb2a117d..36ea0142 100644 --- a/lib/datadog/ci/git/tree_uploader.rb +++ b/lib/datadog/ci/git/tree_uploader.rb @@ -26,8 +26,6 @@ def call(repository_url) Datadog.logger.debug { "Uploading git tree for repository #{repository_url}" } - # 2. Check if the repository clone is shallow and unshallow if appropriate - # TO BE ADDED IN CIVIS-2863 latest_commits = LocalRepository.git_commits head_commit = latest_commits&.first if head_commit.nil? @@ -36,23 +34,36 @@ def call(repository_url) end begin - excluded_commits, included_commits = split_known_commits(repository_url, latest_commits) - if included_commits.empty? + # ask the backend for the list of commits it already has + known_commits, new_commits = fetch_known_commits_and_split(repository_url, latest_commits) + # if all commits are present in the backend, we don't need to upload anything + if new_commits.empty? Datadog.logger.debug("No new commits to upload") return end + + # quite often we deal with shallow clones in CI environment + if LocalRepository.git_shallow_clone? && LocalRepository.git_unshallow + Datadog.logger.debug("Detected shallow clone and unshallowed the repository, repeating commits search") + + # re-run the search with the updated commit list after unshallowing + known_commits, new_commits = fetch_known_commits_and_split( + repository_url, + LocalRepository.git_commits + ) + end rescue SearchCommits::ApiError => e Datadog.logger.debug("SearchCommits failed with #{e}, aborting git upload") return end - Datadog.logger.debug { "Uploading packfiles for commits: #{included_commits}" } + Datadog.logger.debug { "Uploading packfiles for commits: #{new_commits}" } uploader = UploadPackfile.new( api: api, head_commit_sha: head_commit, repository_url: repository_url ) - Packfiles.generate(included_commits: included_commits, excluded_commits: excluded_commits) do |filepath| + Packfiles.generate(included_commits: new_commits, excluded_commits: known_commits) do |filepath| uploader.call(filepath: filepath) rescue UploadPackfile::ApiError => e Datadog.logger.debug("Packfile upload failed with #{e}") @@ -62,7 +73,9 @@ def call(repository_url) private - def split_known_commits(repository_url, latest_commits) + # Split the latest commits list into known and new commits + # based on the backend response provided by /search_commits endpoint + def fetch_known_commits_and_split(repository_url, latest_commits) Datadog.logger.debug { "Checking the latest commits list with backend: #{latest_commits}" } backend_commits = SearchCommits.new(api: api).call(repository_url, latest_commits) latest_commits.partition do |commit| diff --git a/sig/datadog/ci/git/local_repository.rbs b/sig/datadog/ci/git/local_repository.rbs index add6cb97..6354161c 100644 --- a/sig/datadog/ci/git/local_repository.rbs +++ b/sig/datadog/ci/git/local_repository.rbs @@ -33,6 +33,10 @@ module Datadog def self.git_generate_packfiles: (included_commits: Enumerable[String], excluded_commits: Enumerable[String], path: String) -> String? + def self.git_shallow_clone?: () -> bool + + def self.git_unshallow: () -> String? + private def self.filter_invalid_commits: (Enumerable[String] commits) -> Array[String] diff --git a/sig/datadog/ci/git/tree_uploader.rbs b/sig/datadog/ci/git/tree_uploader.rbs index 5ef2c7dd..43eeb649 100644 --- a/sig/datadog/ci/git/tree_uploader.rbs +++ b/sig/datadog/ci/git/tree_uploader.rbs @@ -11,7 +11,7 @@ module Datadog private - def split_known_commits: (String repository_url, Array[String] latest_commits) -> [Array[String], Array[String]] + def fetch_known_commits_and_split: (String repository_url, Array[String] latest_commits) -> [Array[String], Array[String]] end end end diff --git a/spec/datadog/ci/git/local_repository_spec.rb b/spec/datadog/ci/git/local_repository_spec.rb index cdb8a166..9ad07089 100644 --- a/spec/datadog/ci/git/local_repository_spec.rb +++ b/spec/datadog/ci/git/local_repository_spec.rb @@ -293,4 +293,82 @@ def with_custom_git_environment it { is_expected.to eq("first-tag") } end end + + context "with shallow clone" do + let(:tmpdir) { Dir.mktmpdir } + after { FileUtils.remove_entry(tmpdir) } + + before do + # shallow clone datadog-ci-rb repository + `cd #{tmpdir} && git clone --depth 1 https://github.com/DataDog/datadog-ci-rb` + end + + def with_shallow_clone_git_dir + ClimateControl.modify("GIT_DIR" => File.join(tmpdir, "datadog-ci-rb/.git")) do + yield + end + end + + describe ".git_shallow_clone?" do + subject do + with_shallow_clone_git_dir { described_class.git_shallow_clone? } + end + + it { is_expected.to be_truthy } + end + + describe ".git_commits" do + subject do + with_shallow_clone_git_dir { described_class.git_commits } + end + + it "returns a list of single git commit sha" do + expect(subject).to be_kind_of(Array) + expect(subject).not_to be_empty + expect(subject).to have(1).item + expect(subject.first).to match(/^\h{40}$/) + end + end + + describe ".git_unshallow" do + # skip for jruby for now - old git version DD docker image + before { skip if PlatformHelpers.jruby? } + + subject do + with_shallow_clone_git_dir { described_class.git_unshallow } + end + let(:commits) do + with_shallow_clone_git_dir { described_class.git_commits } + end + + it "unshallows the repository" do + expect(subject).to be_truthy + expect(commits.size).to be > 1 + end + end + end + + context "with full clone" do + let(:tmpdir) { Dir.mktmpdir } + after { FileUtils.remove_entry(tmpdir) } + + before do + # shallow clone datadog-ci-rb repository + `cd #{tmpdir} && git clone https://github.com/DataDog/datadog-ci-rb` + end + + def with_full_clone_git_dir + ClimateControl.modify("GIT_DIR" => File.join(tmpdir, "datadog-ci-rb/.git")) do + yield + end + end + + describe ".git_shallow_clone?" do + subject do + with_full_clone_git_dir { described_class.git_shallow_clone? } + end + + it { is_expected.to be_falsey } + end + end end diff --git a/spec/datadog/ci/git/tree_uploader_spec.rb b/spec/datadog/ci/git/tree_uploader_spec.rb index 7af2d61a..19e9c55a 100644 --- a/spec/datadog/ci/git/tree_uploader_spec.rb +++ b/spec/datadog/ci/git/tree_uploader_spec.rb @@ -19,7 +19,6 @@ let(:search_commits) { double("search_commits", call: backend_commits) } before do - allow(Datadog::CI::Git::LocalRepository).to receive(:git_commits).and_return(latest_commits) allow(Datadog::CI::Git::SearchCommits).to receive(:new).with(api: api).and_return(search_commits) end @@ -33,73 +32,122 @@ end end - context "when the latest commits list is empty" do - let(:latest_commits) { [] } - - it "logs a debug message and aborts the git upload" do - expect(Datadog.logger).to receive(:debug).with("Got empty latest commits list, aborting git upload") - - tree_uploader.call(repository_url) - end - end - - context "when the backend commits search fails" do + context "when API is configured" do before do - expect(search_commits).to receive(:call).and_raise(Datadog::CI::Git::SearchCommits::ApiError, "test error") + expect(Datadog::CI::Git::LocalRepository).to receive(:git_commits).and_return(latest_commits) end - it "logs a debug message and aborts the git upload" do - expect(Datadog.logger).to receive(:debug).with("SearchCommits failed with test error, aborting git upload") - - tree_uploader.call(repository_url) - end - end - - context "when all commits are known to the backend" do - let(:backend_commits) { latest_commits } - - it "logs a debug message and aborts the git upload" do - expect(Datadog.logger).to receive(:debug).with("No new commits to upload") - - tree_uploader.call(repository_url) - end - end + context "when the latest commits list is empty" do + let(:latest_commits) { [] } - context "when some commits are new" do - let(:upload_packfile) { double("upload_packfile", call: nil) } + it "logs a debug message and aborts the git upload" do + expect(Datadog.logger).to receive(:debug).with("Got empty latest commits list, aborting git upload") - before do - expect(Datadog::CI::Git::Packfiles).to receive(:generate).with( - included_commits: latest_commits - backend_commits.to_a, - excluded_commits: backend_commits - ).and_yield("packfile_path") - - expect(Datadog::CI::Git::UploadPackfile).to receive(:new).with( - api: api, - head_commit_sha: head_commit, - repository_url: repository_url - ).and_return(upload_packfile) + tree_uploader.call(repository_url) + end end - context "when the packfile upload fails" do + context "when the backend commits search fails" do before do - expect(upload_packfile).to receive(:call).and_raise(Datadog::CI::Git::UploadPackfile::ApiError, "test error") + expect(search_commits).to receive(:call).and_raise(Datadog::CI::Git::SearchCommits::ApiError, "test error") end it "logs a debug message and aborts the git upload" do - expect(Datadog.logger).to receive(:debug).with("Packfile upload failed with test error") + expect(Datadog.logger).to receive(:debug).with("SearchCommits failed with test error, aborting git upload") tree_uploader.call(repository_url) end end - context "when the packfile upload succeeds" do - it "uploads the new commits" do - expect(upload_packfile).to receive(:call).with(filepath: "packfile_path").and_return(nil) + context "when all commits are known to the backend" do + let(:backend_commits) { latest_commits } + + it "logs a debug message and aborts the git upload" do + expect(Datadog.logger).to receive(:debug).with("No new commits to upload") tree_uploader.call(repository_url) end end + + context "when some commits are new" do + let(:upload_packfile) { double("upload_packfile", call: nil) } + + context "when the repository is shallow cloned" do + before do + expect(Datadog::CI::Git::LocalRepository).to receive(:git_shallow_clone?).and_return(true) + end + + context "when the unshallowing fails" do + before do + expect(Datadog::CI::Git::LocalRepository).to receive(:git_unshallow).and_return(nil) + end + + it "uploads what we can upload" do + expect(Datadog::CI::Git::Packfiles).to receive(:generate).with( + included_commits: %w[13c988d4f15e06bcdd0b0af290086a3079cdadb0], + excluded_commits: backend_commits + ).and_yield("packfile_path") + + tree_uploader.call(repository_url) + end + end + + context "when the unshallowing succeeds" do + before do + expect(Datadog::CI::Git::LocalRepository).to receive(:git_unshallow).and_return("unshallow_result") + expect(Datadog::CI::Git::LocalRepository).to receive(:git_commits).and_return( + latest_commits + %w[782d09e3fbfd8cf1b5c13f3eb9621362f9089ed5] + ) + end + + it "uploads the new commits" do + expect(Datadog::CI::Git::Packfiles).to receive(:generate).with( + included_commits: %w[13c988d4f15e06bcdd0b0af290086a3079cdadb0 782d09e3fbfd8cf1b5c13f3eb9621362f9089ed5], + excluded_commits: backend_commits + ).and_yield("packfile_path") + + tree_uploader.call(repository_url) + end + end + end + + context "when the repository is not shallow cloned" do + before do + expect(Datadog::CI::Git::LocalRepository).to receive(:git_shallow_clone?).and_return(false) + + expect(Datadog::CI::Git::Packfiles).to receive(:generate).with( + included_commits: latest_commits - backend_commits.to_a, + excluded_commits: backend_commits + ).and_yield("packfile_path") + + expect(Datadog::CI::Git::UploadPackfile).to receive(:new).with( + api: api, + head_commit_sha: head_commit, + repository_url: repository_url + ).and_return(upload_packfile) + end + + context "when the packfile upload fails" do + before do + expect(upload_packfile).to receive(:call).and_raise(Datadog::CI::Git::UploadPackfile::ApiError, "test error") + end + + it "logs a debug message and aborts the git upload" do + expect(Datadog.logger).to receive(:debug).with("Packfile upload failed with test error") + + tree_uploader.call(repository_url) + end + end + + context "when the packfile upload succeeds" do + it "uploads the new commits" do + expect(upload_packfile).to receive(:call).with(filepath: "packfile_path").and_return(nil) + + tree_uploader.call(repository_url) + end + end + end + end end end end