Skip to content

Commit

Permalink
os: Add CopySparse function
Browse files Browse the repository at this point in the history
Its API is similar to os.Copy, but it detects consecutive zeros, and
replaces them with a seek. This should be enough to poke a hole in the
file being written, and not occupy unneeded disk space.
This works by copying 4kiB chunks at a time, and checking if these 4kiB
are all 0s or not. The chunk size is fairly arbitrary, and can be
tweaked if needed.
If a set of consecutive 0s is not aligned on a 4kiB block, then it won't
be detected.

This API is needed since we started using raw images on macOS. This will
avoid using unnecessary disk space when the 31GB disk image does not
contain a lot of data and is mostly 0s. It needs to be used in 2 different
places, when extracting the bundle from an archive, and when copying the
disk image from the bundle to the machine directory.
  • Loading branch information
cfergeau authored and praveenkumar committed May 4, 2022
1 parent 3e7c5f7 commit 615d4f0
Showing 1 changed file with 77 additions and 3 deletions.
80 changes: 77 additions & 3 deletions pkg/os/copy.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
package os

import (
"bytes"
"io"
"os"
)

func CopyFile(src, dst string) error {
func copyFile(src, dst string, sparse bool) error {
in, err := os.Open(src)
if err != nil {
return err
Expand All @@ -20,8 +21,14 @@ func CopyFile(src, dst string) error {

defer out.Close()

if _, err = io.Copy(out, in); err != nil {
return err
if sparse {
if _, err = CopySparse(out, in); err != nil {
return err
}
} else {
if _, err = io.Copy(out, in); err != nil {
return err
}
}

fi, err := os.Stat(src)
Expand All @@ -35,3 +42,70 @@ func CopyFile(src, dst string) error {

return out.Close()
}

func CopyFile(src, dst string) error {
return copyFile(src, dst, false)
}

func CopyFileSparse(src, dst string) error {
return copyFile(src, dst, true)
}

func CopySparse(dst io.WriteSeeker, src io.Reader) (int64, error) {
copyBuf := make([]byte, copyChunkSize)
sparseWriter := newSparseWriter(dst)

bytesWritten, err := io.CopyBuffer(sparseWriter, src, copyBuf)
if err != nil {
return bytesWritten, err
}
err = sparseWriter.Close()
return bytesWritten, err
}

type sparseWriter struct {
writer io.WriteSeeker
lastChunkSparse bool
}

func newSparseWriter(writer io.WriteSeeker) *sparseWriter {
return &sparseWriter{writer: writer}
}

const copyChunkSize = 4096

var emptyChunk = make([]byte, copyChunkSize)

func isEmptyChunk(p []byte) bool {
// HasPrefix instead of bytes.Equal in order to handle the last chunk
// of the file, which may be shorter than len(emptyChunk), and would
// fail bytes.Equal()
return bytes.HasPrefix(emptyChunk, p)
}

func (w *sparseWriter) Write(p []byte) (n int, err error) {
if isEmptyChunk(p) {
offset, err := w.writer.Seek(int64(len(p)), io.SeekCurrent)
if err != nil {
w.lastChunkSparse = false
return 0, err
}
_ = offset
w.lastChunkSparse = true
return len(p), nil
}
w.lastChunkSparse = false
return w.writer.Write(p)
}

func (w *sparseWriter) Close() error {
if w.lastChunkSparse {
if _, err := w.writer.Seek(-1, io.SeekCurrent); err != nil {
return err
}
if _, err := w.writer.Write([]byte{0}); err != nil {
return err
}
}
return nil
}

0 comments on commit 615d4f0

Please sign in to comment.