From 59b029c342381b89a5df9c249eac1e9582e578c8 Mon Sep 17 00:00:00 2001 From: Tianon Gravi Date: Fri, 19 Jan 2024 16:15:36 -0800 Subject: [PATCH] Update sources.sh to use bashbrew 0.1.10's `ArchGitChecksum` This makes a *very* dramatic difference in the speed of `sources.sh`. Before: ```console $ time .scripts/sources.sh $(cat subset.txt) > sources.json ... real 6m13.444s user 2m51.334s sys 3m29.256s ``` After: ```console $ time .scripts/sources.sh $(cat subset.txt) > sources.json ... real 0m31.238s user 0m32.769s sys 0m1.375s ``` --- .github/workflows/ci.yml | 4 +- .gitignore | 1 - go.mod | 2 +- go.sum | 4 +- sources.sh | 138 ++++++++++++--------------------------- tar-scrubber.go | 52 --------------- 6 files changed, 48 insertions(+), 153 deletions(-) delete mode 100644 tar-scrubber.go diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6dc9e9f..f582ca3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,8 +25,8 @@ jobs: run: | # not doing "uses: docker-library/bashbrew@xxx" because it'll build which is slow and we don't need more than just bashbrew here mkdir .bin - wget -O .bin/bashbrew 'https://github.com/docker-library/bashbrew/releases/download/v0.1.9/bashbrew-amd64' - echo '8cdd7adc707b972040577006f7a05b8e9d4dd362be5069f862fd1885f2eb107a *.bin/bashbrew' | sha256sum --strict --check - + wget -O .bin/bashbrew 'https://github.com/docker-library/bashbrew/releases/download/v0.1.11/bashbrew-amd64' + echo '6203635644d0efef2886f8ea9c487995a7abc4166db7a4773e94f89c943a4b04 *.bin/bashbrew' | sha256sum --strict --check - chmod +x .bin/bashbrew .bin/bashbrew --version echo "$PWD/.bin" >> "$GITHUB_PATH" diff --git a/.gitignore b/.gitignore index 16a4a81..d5905e3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1 @@ builds -tar-scrubber diff --git a/go.mod b/go.mod index 20c3649..beaa74f 100644 --- a/go.mod +++ b/go.mod @@ -4,7 +4,7 @@ go 1.21 require ( github.com/containerd/containerd v1.6.19 - github.com/docker-library/bashbrew v0.1.9 + github.com/docker-library/bashbrew v0.1.11 github.com/opencontainers/image-spec v1.1.0-rc2.0.20221013174636-8159c8264e2e github.com/sirupsen/logrus v1.9.0 golang.org/x/time v0.5.0 diff --git a/go.sum b/go.sum index 2ed05f0..d1160f4 100644 --- a/go.sum +++ b/go.sum @@ -3,8 +3,8 @@ github.com/containerd/containerd v1.6.19/go.mod h1:HZCDMn4v/Xl2579/MvtOC2M206i+J github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/docker-library/bashbrew v0.1.9 h1:mDRel5sLJzpWbbnacZ2Y0RL6pRsCD/DI57vwEIX0uHQ= -github.com/docker-library/bashbrew v0.1.9/go.mod h1:fp+ljAv22z5OK3k7gutU/1eZz0lIRnM6SSvfJd9ABEE= +github.com/docker-library/bashbrew v0.1.11 h1:9S6jYFu0+RaqEAfvS2lh7jcaDkcvFi2maB2aU3yb0TM= +github.com/docker-library/bashbrew v0.1.11/go.mod h1:6fyRRSm4vgBAgTw87EsfOT7wXKsc4JA9I5cdQJmwOm8= github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw= github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= diff --git a/sources.sh b/sources.sh index 2ae41f8..c34359c 100755 --- a/sources.sh +++ b/sources.sh @@ -24,18 +24,6 @@ defaultArchNamespaces=' : "${BASHBREW_ARCH_NAMESPACES=$defaultArchNamespaces}" export BASHBREW_ARCH_NAMESPACES -dir="$(dirname "$BASH_SOURCE")" -dir="$(readlink -ve "$dir")" -if [ "$dir/tar-scrubber.go" -nt "$dir/tar-scrubber" ] || [ "$dir/.go-env.sh" -nt "$dir/tar-scrubber" ]; then - { - echo "building '$dir/tar-scrubber' from 'tar-scrubber.go'" - "$dir/.go-env.sh" go build -v -o tar-scrubber tar-scrubber.go - ls -l "$dir/tar-scrubber" - } >&2 -fi -[ -x "$dir/tar-scrubber" ] -export tarScrubber="$dir/tar-scrubber" - # let's resolve all the external pins so we can inject those too libraryDir="${BASHBREW_LIBRARY:-"$HOME/docker/official-images/library"}" libraryDir="$(readlink -ve "$libraryDir")" @@ -51,99 +39,58 @@ for tag in $externalPins; do externalPinsJson="$(jq <<<"$externalPinsJson" -c --arg tag "${tag#library/}" --arg digest "$digest" '.[$tag] = $digest')" done -_sha256() { - sha256sum "$@" | cut -d' ' -f1 -} - -json="$( - bashbrew cat --build-order --format ' - {{- range $e := .SortedEntries false -}} - {{- range $a := $e.Architectures -}} - {{- $archNs := archNamespace $a -}} - {{- with $e -}} - { - "repo": {{ $.RepoName | json }}, - "arch": {{ $a | json }}, - "platformString": {{ (ociPlatform $a).String | json }}, - "platform": {{ ociPlatform $a | json }}, - "gitCache": {{ gitCache | json }}, - "tags": {{ $.Tags namespace false . | json }}, - "archTags": {{ if $archNs -}} {{ $.Tags $archNs false . | json }} {{- else -}} [] {{- end }}, +bashbrew cat --build-order --format ' + {{- range $e := .SortedEntries false -}} + {{- range $a := $e.Architectures -}} + {{- $archNs := archNamespace $a -}} + {{- with $e -}} + {{- $sum := $.ArchGitChecksum $a . -}} + {{- $file := .ArchFile $a -}} + {{- $builder := .ArchBuilder $a -}} + { + "sourceId": {{ join "\n" $sum $file $builder "" | sha256sum | json }}, + "reproducibleGitChecksum": {{ $sum | json }}, + "allTags": [], + "entry": { "GitRepo": {{ .ArchGitRepo $a | json }}, "GitFetch": {{ .ArchGitFetch $a | json }}, "GitCommit": {{ .ArchGitCommit $a | json }}, "Directory": {{ .ArchDirectory $a | json }}, - "File": {{ .ArchFile $a | json }}, - "Builder": {{ .ArchBuilder $a | json }}, - "froms": {{ $.ArchDockerFroms $a . | json }} + "File": {{ $file | json }}, + "Builder": {{ $builder | json }}, + "SOURCE_DATE_EPOCH": {{ ($.ArchGitTime $a .).Unix | json }} + }, + "arches": { + {{ $a | json }}: { + "tags": {{ $.Tags namespace false . | json }}, + "archTags": {{ if $archNs -}} {{ $.Tags $archNs false . | json }} {{- else -}} [] {{- end }}, + "froms": {{ $.ArchDockerFroms $a . | json }}, + "platformString": {{ (ociPlatform $a).String | json }}, + "platform": {{ ociPlatform $a | json }}, + "parents": { } + } } - {{- end -}} + } {{- end -}} {{- end -}} - ' "$@" -)" - -shell="$( - jq <<<"$json" -r ' - . as $e - | { - reproducibleGitChecksum: ( - [ - # TODO do this inside bashbrew? (could then use go-git to make an even more determistic tarball instead of munging Git afterwards, and could even do things like munge the Dockerfile to remove no-rebuild variance like comments and non-COPY-ed files) - "git -C \(.gitCache | @sh) archive --format=tar \(.GitCommit + ":" + (.Directory | if . == "." then "" else . + "/" end) | @sh)", - "\(env.tarScrubber | @sh) --sha256", - empty - ] | join(" | ") - ), - sourceId: "printf \("%s\\n" | @sh) \"$reproducibleGitChecksum\" \(.File | @sh) \(.Builder | @sh) | _sha256", # the combination of things that might cause a rebuild # TODO consider making this a compressed JSON object like buildId - SOURCE_DATE_EPOCH: "git -C \(.gitCache | @sh) show --no-patch --format=format:%ct \(.GitCommit | @sh)", - } - | to_entries - | [ - "printf >&2 \("%s (%s): " | @sh) \($e.tags[0]) \($e.arch)", - empty - ] - + map(.key + "=\"$(" + .value + ")\"") - + [ - "export \(map(.key) | join(" "))", - "printf >&2 \("%s\\n" | @sh) \"$sourceId\"", - ( - $e - | { - allTags: (.tags + .archTags), - entry: { - GitRepo: .GitRepo, - GitFetch: .GitFetch, - GitCommit: .GitCommit, - Directory: .Directory, - File: .File, - Builder: .Builder, - }, - arches: { - (.arch): { - tags: .tags, - archTags: .archTags, - froms: .froms, - platformString: .platformString, - platform: .platform, - }, - }, - } as $obj - | "jq <<<\($obj | tojson | @sh) -c \("{ sourceId: env.sourceId, reproducibleGitChecksum: env.reproducibleGitChecksum } + . | .entry.SOURCE_DATE_EPOCH = (env.SOURCE_DATE_EPOCH | tonumber)" | @sh)" - ), - empty - ] - | join("\n") - ' -)" -json="$(set -Eeuo pipefail; eval "$shell")" -jq <<<"$json" -s --argjson pins "$externalPinsJson" ' + {{- end -}} +' "$@" | jq 3>&1 1>&2 2>&3- -r ' + # https://github.com/jqlang/jq/issues/2063 - "stderr" cannot functionally output a string correctly until jq 1.7+ (which is very very recent), so we hack around it to get some progress output by using Bash to swap stdout and stderr so we can output our objects to stderr and our progress text to stdout and "fix it in post" + # TODO balk / error at multiple arches entries + first(.arches[].tags[], .arches[].archTags[]) as $tag + | first(.arches | keys_unsorted[]) as $arch + | stderr + | "\($tag) (\($arch)): \(.sourceId)" + # TODO if we could get jq 1.7+ for sure, we can drop this entire "jq" invocation and instead have the reduce loop of the following invocation print status strings directly to "stderr" +' | jq -n --argjson pins "$externalPinsJson" ' def unique_unsorted: # https://unix.stackexchange.com/a/738744/153467 reduce .[] as $a ([]; if IN(.[]; $a) then . else . += [$a] end) ; - reduce .[] as $in ({}; - .[$in.sourceId] |= ( + reduce inputs as $in ({}; + # TODO if we can get support for joining two arrays directly in bashbrew (sprig?) then we can avoid this .arches[] nonsense + ($in | .allTags = ([ $in.arches[].tags[], $in.arches[].archTags[] ] | unique_unsorted)) as $in + | .[$in.sourceId] |= if . == null then $in else @@ -167,8 +114,9 @@ jq <<<"$json" -s --argjson pins "$externalPinsJson" ' .entry = $in.entry else . end end - ) ) + # TODO a lot of this could be removed/parsed during the above reduce, since it has to parse things in build order anyhow + # TODO actually, instead, this bit should be a totally separate script so the use case of "combine sources.json files together" works better 👀 | ( reduce to_entries[] as $e ({}; $e.key as $sourceId diff --git a/tar-scrubber.go b/tar-scrubber.go deleted file mode 100644 index 4f5d89a..0000000 --- a/tar-scrubber.go +++ /dev/null @@ -1,52 +0,0 @@ -package main - -// TODO this should probably be part of bashbrew itself - -import ( - "archive/tar" - "crypto/sha256" - "fmt" - "io" - "os" -) - -func main() { - tr := tar.NewReader(os.Stdin) - - var out io.Writer = os.Stdout - if len(os.Args) >= 2 && os.Args[1] == "--sha256" { - h := sha256.New() - out = h - defer func() { - fmt.Printf("%x\n", h.Sum(nil)) - }() - } - - tw := tar.NewWriter(out) - defer tw.Flush() // note: flush instead of close to avoid the empty block at EOF - - for { - hdr, err := tr.Next() - if err == io.EOF { - break - } - if err != nil { - panic(err) - } - newHdr := &tar.Header{ - Typeflag: hdr.Typeflag, - Name: hdr.Name, - Linkname: hdr.Linkname, - Size: hdr.Size, - Mode: hdr.Mode, - Devmajor: hdr.Devmajor, - Devminor: hdr.Devminor, - } - if err := tw.WriteHeader(newHdr); err != nil { - panic(err) - } - if _, err := io.Copy(tw, tr); err != nil { - panic(err) - } - } -}