diff --git a/pkg/executor/build.go b/pkg/executor/build.go index 889da2a5b1..794e0a9d1d 100644 --- a/pkg/executor/build.go +++ b/pkg/executor/build.go @@ -44,6 +44,7 @@ import ( "github.com/GoogleContainerTools/kaniko/pkg/filesystem" image_util "github.com/GoogleContainerTools/kaniko/pkg/image" "github.com/GoogleContainerTools/kaniko/pkg/image/remote" + "github.com/GoogleContainerTools/kaniko/pkg/imagefs" "github.com/GoogleContainerTools/kaniko/pkg/snapshot" "github.com/GoogleContainerTools/kaniko/pkg/timing" "github.com/GoogleContainerTools/kaniko/pkg/util" @@ -731,9 +732,12 @@ func (s *stageBuilder) saveLayerToImage(layer v1.Layer, createdBy string) error return err } -func CalculateDependencies(stages []config.KanikoStage, opts *config.KanikoOptions, stageNameToIdx map[string]string) (map[int][]string, error) { +func CalculateDependencies(stages []config.KanikoStage, opts *config.KanikoOptions, stageNameToIdx map[string]string) (map[int][]string, map[string][]string, error) { images := []v1.Image{} - depGraph := map[int][]string{} + stageDepGraph := map[int][]string{} + // imageDepGraph tracks stage dependencies from non-stage + // images for use by imagefs to avoid extraction. + imageDepGraph := map[string][]string{} for _, s := range stages { ba := dockerfile.NewBuildArgs(opts.BuildArgs) ba.AddMetaArgs(s.MetaArgs) @@ -746,12 +750,12 @@ func CalculateDependencies(stages []config.KanikoStage, opts *config.KanikoOptio } else { image, err = image_util.RetrieveSourceImage(s, opts) if err != nil { - return nil, err + return nil, nil, err } } cfg, err := initializeConfig(image, opts) if err != nil { - return nil, err + return nil, nil, err } cmds, err := dockerfile.GetOnBuildInstructions(&cfg.Config, stageNameToIdx) @@ -761,29 +765,30 @@ func CalculateDependencies(stages []config.KanikoStage, opts *config.KanikoOptio switch cmd := c.(type) { case *instructions.CopyCommand: if cmd.From != "" { - i, err := strconv.Atoi(cmd.From) - if err != nil { - continue - } resolved, err := util.ResolveEnvironmentReplacementList(cmd.SourcesAndDest.SourcePaths, ba.ReplacementEnvs(cfg.Config.Env), true) if err != nil { - return nil, err + return nil, nil, err + } + i, err := strconv.Atoi(cmd.From) + if err == nil { + stageDepGraph[i] = append(stageDepGraph[i], resolved...) + } else { + imageDepGraph[cmd.From] = append(imageDepGraph[cmd.From], resolved...) } - depGraph[i] = append(depGraph[i], resolved...) } case *instructions.EnvCommand: if err := util.UpdateConfigEnv(cmd.Env, &cfg.Config, ba.ReplacementEnvs(cfg.Config.Env)); err != nil { - return nil, err + return nil, nil, err } image, err = mutate.Config(image, cfg.Config) if err != nil { - return nil, err + return nil, nil, err } case *instructions.ArgCommand: for _, arg := range cmd.Args { k, v, err := commands.ParseArg(arg.Key, arg.Value, cfg.Config.Env, ba) if err != nil { - return nil, err + return nil, nil, err } ba.AddArg(k, v) } @@ -791,7 +796,7 @@ func CalculateDependencies(stages []config.KanikoStage, opts *config.KanikoOptio } images = append(images, image) } - return depGraph, nil + return stageDepGraph, imageDepGraph, nil } // DoBuild executes building the Dockerfile @@ -816,15 +821,17 @@ func DoBuild(opts *config.KanikoOptions) (v1.Image, error) { return nil, err } - // Some stages may refer to other random images, not previous stages - if err := fetchExtraStages(kanikoStages, opts); err != nil { - return nil, err - } - crossStageDependencies, err := CalculateDependencies(kanikoStages, opts, stageNameToIdx) + crossStageDependencies, imageDependencies, err := CalculateDependencies(kanikoStages, opts, stageNameToIdx) if err != nil { return nil, err } logrus.Infof("Built cross stage deps: %v", crossStageDependencies) + logrus.Infof("Built image deps: %v", imageDependencies) + + // Some stages may refer to other random images, not previous stages + if err := fetchExtraStages(kanikoStages, opts, false, imageDependencies); err != nil { + return nil, errors.Wrap(err, "fetch extra stages failed") + } var args *dockerfile.BuildArgs @@ -940,6 +947,12 @@ func DoBuild(opts *config.KanikoOptions) (v1.Image, error) { // cache without modifying the filesystem. // Returns an error if any layers are missing from build cache. func DoCacheProbe(opts *config.KanikoOptions) (v1.Image, error) { + // Restore the filesystem after we're done since we're using imagefs. + origFS := filesystem.FS + defer func() { + filesystem.SetFS(origFS) + }() + digestToCacheKey := make(map[string]string) stageIdxToDigest := make(map[string]string) @@ -959,15 +972,16 @@ func DoCacheProbe(opts *config.KanikoOptions) (v1.Image, error) { return nil, err } - // Some stages may refer to other random images, not previous stages - if err := fetchExtraStages(kanikoStages, opts); err != nil { - return nil, err - } - crossStageDependencies, err := CalculateDependencies(kanikoStages, opts, stageNameToIdx) + crossStageDependencies, imageDependencies, err := CalculateDependencies(kanikoStages, opts, stageNameToIdx) if err != nil { return nil, err } logrus.Infof("Built cross stage deps: %v", crossStageDependencies) + logrus.Infof("Built image deps: %v", imageDependencies) + // Some stages may refer to other random images, not previous stages + if err := fetchExtraStages(kanikoStages, opts, true, imageDependencies); err != nil { + return nil, errors.Wrap(err, "fetch extra stages failed") + } var args *dockerfile.BuildArgs @@ -1021,6 +1035,19 @@ func DoCacheProbe(opts *config.KanikoOptions) (v1.Image, error) { digestToCacheKey[d.String()] = sb.finalCacheKey logrus.Infof("Mapping digest %v to cachekey %v", d.String(), sb.finalCacheKey) + if filesToCache, ok := crossStageDependencies[sb.stage.Index]; ok { + ifs, err := imagefs.New( + filesystem.FS, + filepath.Join(config.KanikoDir, strconv.Itoa(sb.stage.Index)), + sourceImage, + filesToCache, + ) + if err != nil { + return nil, errors.Wrap(err, "could not create image filesystem") + } + filesystem.SetFS(ifs) + } + if stage.Final { sourceImage, err = mutateCanonicalWithoutLayerEdit(sourceImage) if err != nil { @@ -1143,7 +1170,7 @@ func deduplicatePaths(paths []string) []string { return deduped } -func fetchExtraStages(stages []config.KanikoStage, opts *config.KanikoOptions) error { +func fetchExtraStages(stages []config.KanikoStage, opts *config.KanikoOptions, cacheProbe bool, imageDependencies map[string][]string) error { t := timing.Start("Fetching Extra Stages") defer timing.DefaultRun.Stop(t) @@ -1177,8 +1204,21 @@ func fetchExtraStages(stages []config.KanikoStage, opts *config.KanikoOptions) e if err := saveStageAsTarball(c.From, sourceImage); err != nil { return err } - if err := extractImageToDependencyDir(c.From, sourceImage); err != nil { - return err + if !cacheProbe { + if err := extractImageToDependencyDir(c.From, sourceImage); err != nil { + return err + } + } else { + ifs, err := imagefs.New( + filesystem.FS, + filepath.Join(config.KanikoDir, c.From), + sourceImage, + imageDependencies[c.From], + ) + if err != nil { + return errors.Wrap(err, "could not create image filesystem") + } + filesystem.SetFS(ifs) } } // Store the name of the current stage in the list with names, if applicable. diff --git a/pkg/executor/build_test.go b/pkg/executor/build_test.go index fadf66ad7c..02db6e76cd 100644 --- a/pkg/executor/build_test.go +++ b/pkg/executor/build_test.go @@ -216,9 +216,10 @@ func TestCalculateDependencies(t *testing.T) { mockInitConfig func(partial.WithConfigFile, *config.KanikoOptions) (*v1.ConfigFile, error) } tests := []struct { - name string - args args - want map[int][]string + name string + args args + want map[int][]string + wantImage map[string][]string }{ { name: "no deps", @@ -359,9 +360,27 @@ COPY --from=second /bar /bat 1: {"/bar"}, }, }, + { + name: "dependency from image", + args: args{ + dockerfile: ` +FROM scratch as target +COPY --from=alpine /etc/alpine-release /etc/alpine-release +`, + }, + wantImage: map[string][]string{ + "alpine": {"/etc/alpine-release"}, + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { + if tt.want == nil { + tt.want = map[int][]string{} + } + if tt.wantImage == nil { + tt.wantImage = map[string][]string{} + } if tt.args.mockInitConfig != nil { original := initializeConfig defer func() { initializeConfig = original }() @@ -385,14 +404,18 @@ COPY --from=second /bar /bat } stageNameToIdx := ResolveCrossStageInstructions(kanikoStages) - got, err := CalculateDependencies(kanikoStages, opts, stageNameToIdx) + got, gotImage, err := CalculateDependencies(kanikoStages, opts, stageNameToIdx) if err != nil { t.Errorf("got error: %s,", err) } if !reflect.DeepEqual(got, tt.want) { diff := cmp.Diff(got, tt.want) - t.Errorf("CalculateDependencies() = %v, want %v, diff %v", got, tt.want, diff) + t.Errorf("CalculateDependencies() crossStageDependencies = %v, want %v, diff %v", got, tt.want, diff) + } + if !reflect.DeepEqual(gotImage, tt.wantImage) { + diff := cmp.Diff(gotImage, tt.wantImage) + t.Errorf("CalculateDependencies() imageDependencies = %v, wantImage %v, diff %v", gotImage, tt.wantImage, diff) } }) } diff --git a/pkg/executor/cache_probe_test.go b/pkg/executor/cache_probe_test.go index ce13a16c71..e028db9813 100644 --- a/pkg/executor/cache_probe_test.go +++ b/pkg/executor/cache_probe_test.go @@ -165,8 +165,6 @@ COPY foo/baz.txt copied/ }) t.Run("MultiStage", func(t *testing.T) { - t.Skip("TODO: https://github.com/coder/envbuilder/issues/230") - // Share cache between both builds. regCache := setupCacheRegistry(t) @@ -175,10 +173,12 @@ COPY foo/baz.txt copied/ dockerFile := ` FROM scratch as first COPY foo/bam.txt copied/ + COPY foo/bam.link copied/ ENV test test From scratch as second - COPY --from=first copied/bam.txt output/bam.txt` + COPY --from=first copied/bam.txt output/bam.txt + COPY --from=first copied/bam.link output/bam.link` err := filesystem.WriteFile(filepath.Join(testDir, "workspace", "Dockerfile"), []byte(dockerFile), 0o755) testutil.CheckNoError(t, err) opts := &config.KanikoOptions{ diff --git a/pkg/imagefs/imagefs.go b/pkg/imagefs/imagefs.go new file mode 100644 index 0000000000..05fe13896c --- /dev/null +++ b/pkg/imagefs/imagefs.go @@ -0,0 +1,297 @@ +/* +Copyright 2018 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package imagefs + +import ( + "archive/tar" + "crypto/md5" + "fmt" + "io" + "io/fs" + "os" + "path/filepath" + "strconv" + "strings" + "sync" + "syscall" + "time" + + v1 "github.com/google/go-containerregistry/pkg/v1" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "github.com/twpayne/go-vfs/v5" + + "github.com/GoogleContainerTools/kaniko/pkg/util" +) + +type imageFS struct { + vfs.FS + + mu sync.RWMutex // Protects following. + image map[string]v1.Image + dirs map[string]*cachedDir + files map[string]imageFSFile +} + +type imageFSFile interface { + fs.File + fs.FileInfo + fs.DirEntry +} + +func New(parent vfs.FS, root string, image v1.Image, filesToCache []string) (vfs.FS, error) { + var ifs *imageFS + + // Multiple layers of imageFS might get confusing, enable delayering. + if pfs, ok := parent.(*imageFS); ok { + pfs.mu.Lock() + defer pfs.mu.Unlock() + + if _, ok := pfs.image[root]; ok { + return nil, fmt.Errorf("imagefs: root already exists: %s", root) + } + pfs.image[root] = image + ifs = pfs + } else { + ifs = &imageFS{ + FS: vfs.NewReadOnlyFS(parent), + image: map[string]v1.Image{root: image}, + dirs: make(map[string]*cachedDir), + files: make(map[string]imageFSFile), + } + } + + // Walk the image and cache file info and hash of the requested files. + _, err := util.GetFSFromImage(root, image, func(dest string, hdr *tar.Header, cleanedName string, tr io.Reader) error { + // Trim prefix for consistent path. + cleanedName = strings.TrimPrefix(cleanedName, "/") + + for _, f := range filesToCache { + dest := filepath.Join(root, cleanedName) + f = strings.TrimPrefix(f, "/") + + // Check if the file matches the requested file. + if ok, err := filepath.Match(f, cleanedName); ok && err == nil { + logrus.Debugf("imagefs: Found cacheable file %q (%s) (%d:%d)", f, dest, hdr.Uid, hdr.Gid) + + sum, err := hashFile(hdr, tr) + if err != nil { + return errors.Wrap(err, "imagefs: hash file failed") + } + + f := newCachedFileInfo(dest, hdr) + ifs.files[dest] = newCachedFileInfoWithMD5Sum(f, sum) + + return nil + } + + // Parent directories are needed for lookup. + if cleanedName == "" || strings.HasPrefix(f, cleanedName+"/") { + logrus.Debugf("imagefs: Found cacheable file parent %q (%s)", f, dest) + + ifs.files[dest] = newCachedFileInfo(dest, hdr) + } + } + return nil + }) + if err != nil { + return nil, errors.Wrap(err, "imagefs: walk image failed") + } + + for dir, d := range ifs.files { + if !d.IsDir() { + continue + } + ifs.dirs[dir] = &cachedDir{FileInfo: d} + for name, fi := range ifs.files { + if filepath.Dir(name) == dir { + ifs.dirs[dir].entry = append(ifs.dirs[dir].entry, fi) + } + } + } + + return ifs, nil +} + +func (ifs *imageFS) Open(name string) (fs.File, error) { + logrus.Debugf("imagefs: Open file %s", name) + if f, err := ifs.FS.Open(name); err == nil { + return f, nil + } + + ifs.mu.RLock() + defer ifs.mu.RUnlock() + if ifs.files[name] != nil { + logrus.Debugf("imagefs: Open cached file %s", name) + return ifs.files[name], nil + } + return nil, fs.ErrNotExist +} + +func (ifs *imageFS) Lstat(name string) (fs.FileInfo, error) { + logrus.Debugf("imagefs: Lstat file %s", name) + if fi, err := ifs.FS.Lstat(name); err == nil { + return fi, nil + } + + ifs.mu.RLock() + defer ifs.mu.RUnlock() + if ifs.files[name] != nil { + logrus.Debugf("imagefs: Lstat cached file %s", name) + return ifs.files[name], nil + } + return nil, fs.ErrNotExist +} + +func (ifs *imageFS) Stat(name string) (fs.FileInfo, error) { + logrus.Debugf("imagefs: Stat file %s", name) + if fi, err := ifs.FS.Stat(name); err == nil { + return fi, nil + } + + ifs.mu.RLock() + defer ifs.mu.RUnlock() + if ifs.files[name] != nil { + logrus.Debugf("imagefs: Stat cached file %s", name) + return ifs.files[name], nil + } + return nil, fs.ErrNotExist +} + +func (ifs *imageFS) ReadDir(name string) ([]fs.DirEntry, error) { + logrus.Debugf("imagefs: Reading directory %s", name) + if de, err := ifs.FS.ReadDir(name); err == nil { + return de, nil + } + + ifs.mu.RLock() + defer ifs.mu.RUnlock() + for dir, d := range ifs.dirs { + if ok, err := filepath.Match(name, dir); ok && err == nil { + logrus.Debugf("imagefs: Reading cached directory %s", name) + return d.entry, nil + } + } + return nil, fs.ErrNotExist +} + +type cachedDir struct { + fs.FileInfo + entry []fs.DirEntry +} + +type cachedFileInfo struct { + fs.FileInfo + path string + hdr *tar.Header + sys *syscall.Stat_t +} + +func newCachedFileInfo(path string, hdr *tar.Header) *cachedFileInfo { + return &cachedFileInfo{ + FileInfo: hdr.FileInfo(), + path: path, + hdr: hdr, + sys: tarHeaderToStat_t(hdr), + } +} + +func (cf *cachedFileInfo) Sys() interface{} { + logrus.Debugf("imagefs: Sys cached file: %s", cf.path) + return cf.sys +} + +func (cf *cachedFileInfo) Stat() (fs.FileInfo, error) { + logrus.Debugf("imagefs: Stat cached file: %s", cf.path) + return cf, nil +} + +func (cf *cachedFileInfo) Read(p []byte) (n int, err error) { + return 0, fmt.Errorf("imagefs: Read cached file is not allowed: %s", cf.path) +} + +func (cf *cachedFileInfo) Type() fs.FileMode { + logrus.Debugf("imagefs: Type cached file: %s", cf.path) + return cf.Mode() +} + +func (cf *cachedFileInfo) Info() (fs.FileInfo, error) { + logrus.Debugf("imagefs: Info cached file: %s", cf.path) + return cf, nil +} + +func (cf *cachedFileInfo) Close() error { + logrus.Debugf("imagefs: Close cached file: %s", cf.path) + return nil +} + +type cachedFileInfoWithMD5Sum struct { + *cachedFileInfo + md5sum []byte +} + +func newCachedFileInfoWithMD5Sum(fi *cachedFileInfo, md5sum []byte) *cachedFileInfoWithMD5Sum { + return &cachedFileInfoWithMD5Sum{ + cachedFileInfo: fi, + md5sum: md5sum, + } +} + +// Ensure that cachedFileInfo implements the CacheHasherFileInfoSum interface. +var _ util.CacheHasherFileInfoSum = &cachedFileInfoWithMD5Sum{} + +func (cf *cachedFileInfoWithMD5Sum) MD5Sum() ([]byte, error) { + logrus.Debugf("imagefs: MD5Sum cached file: %s", cf.path) + return cf.md5sum, nil +} + +// tarHeaderToStat_t converts a tar.Header to a syscall.Stat_t. +func tarHeaderToStat_t(hdr *tar.Header) *syscall.Stat_t { + fi := hdr.FileInfo() + return &syscall.Stat_t{ + Mode: uint32(fi.Mode()), + Uid: uint32(hdr.Uid), + Gid: uint32(hdr.Gid), + Size: fi.Size(), + Atim: timespec(hdr.AccessTime), + Ctim: timespec(hdr.ChangeTime), + Mtim: timespec(fi.ModTime()), + } +} + +func timespec(t time.Time) syscall.Timespec { + return syscall.Timespec{Sec: t.Unix(), Nsec: int64(t.Nanosecond())} +} + +// hashFile hashes the gievn file, implementation must match util.CacheHasher. +func hashFile(hdr *tar.Header, r io.Reader) ([]byte, error) { + fi := hdr.FileInfo() + + h := md5.New() + h.Write([]byte(fi.Mode().String())) + h.Write([]byte(strconv.FormatUint(uint64(hdr.Uid), 36))) + h.Write([]byte(",")) + h.Write([]byte(strconv.FormatUint(uint64(hdr.Gid), 36))) + if fi.Mode().IsRegular() { + if _, err := io.Copy(h, r); err != nil { + return nil, errors.Wrap(err, "imagefs: copy file content failed") + } + } else if fi.Mode()&os.ModeSymlink == os.ModeSymlink { + h.Write([]byte(hdr.Linkname)) + } + return h.Sum(nil), nil +} diff --git a/pkg/util/util.go b/pkg/util/util.go index 6c6acc4ee9..7501984fbe 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -88,6 +88,13 @@ func Hasher() func(string) (string, error) { return hasher } +// CacheHasherFileInfoSum is an interface for getting the MD5 sum of a file. +// This can be implemented by the concrete fs.FileInfo type to avoid reading the +// file contents. +type CacheHasherFileInfoSum interface { + MD5Sum() ([]byte, error) +} + // CacheHasher takes into account everything the regular hasher does except for mtime func CacheHasher() func(string) (string, error) { hasher := func(p string) (string, error) { @@ -96,6 +103,15 @@ func CacheHasher() func(string) (string, error) { if err != nil { return "", err } + + if fh, ok := fi.(CacheHasherFileInfoSum); ok { + b, err := fh.MD5Sum() + if err != nil { + return "", err + } + return hex.EncodeToString(b), nil + } + h.Write([]byte(fi.Mode().String())) // Cian: this is a disgusting hack, but it removes the need for the