Skip to content

Commit

Permalink
Add zstd prefetching support (anatol#48)
Browse files Browse the repository at this point in the history
  • Loading branch information
Focshole committed Apr 16, 2023
1 parent d99cd60 commit 15b8703
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 11 deletions.
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,6 @@ require (
require (
github.com/jinzhu/inflection v1.0.0 // indirect
github.com/jinzhu/now v1.1.5 // indirect
github.com/klauspost/compress v1.16.4 // indirect
github.com/mattn/go-sqlite3 v1.14.16 // indirect
)
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkr
github.com/jinzhu/now v1.1.4/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8=
github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ=
github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8=
github.com/klauspost/compress v1.16.4 h1:91KN02FnsOYhuunwU4ssRe8lc2JosWmizWa91B5v1PU=
github.com/klauspost/compress v1.16.4/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
github.com/mattn/go-sqlite3 v1.14.15/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg=
github.com/mattn/go-sqlite3 v1.14.16 h1:yOQRA0RpS5PFz/oikGwBEqvAWhWg5ufRz4ETLjwpU1Y=
github.com/mattn/go-sqlite3 v1.14.16/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg=
Expand Down
38 changes: 37 additions & 1 deletion repo_db_mirror.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ import (
"path"
"path/filepath"
"strings"

"github.com/klauspost/compress/zstd"
)

// Uncompresses a gzip file
Expand Down Expand Up @@ -44,6 +46,37 @@ func uncompressGZ(filePath string, targetFile string) error {
return nil
}

// Uncompress a zstd file
func uncompressZSTD(filePath string, targetFile string) error {
zstdfile, err := os.Open(filePath)
if err != nil {
log.Printf("error: %v\n", err)
return err
}
reader, err := zstd.NewReader(zstdfile)
if err != nil {
log.Printf("error: %v\n", err)
return err
}
limitedReader := io.LimitReader(reader, 100*1024*1024) // Limits the size of the extracted file up to 100MB, so far community db is around 20MB
if err != nil {
log.Printf("error: %v\n", err)
return err
}
defer reader.Close()
writer, err := os.Create(targetFile)
if err != nil {
log.Printf("error: %v\n", err)
return err
}
defer writer.Close()
if _, err = io.Copy(writer, limitedReader); err != nil {
log.Printf("error: %v\n", err)
return err
}
return nil
}

func extractFilenamesFromTar(filePath string) ([]string, error) {
f, err := os.Open(filePath)
reader := bufio.NewReader(f)
Expand Down Expand Up @@ -125,7 +158,10 @@ func downloadAndParseDb(mirror MirrorDB) error {
log.Printf("Extracting %v...", filePath)
// the db file exists and have been downloaded. Now it is time to decompress it
if err := uncompressGZ(filePath, filePath+".tar"); err != nil {
return err
log.Printf("Gzip extraction failed with error '%v', attempting zstd extraction...", err)
if err := uncompressZSTD(filePath, filePath+".tar"); err != nil {
return err
}
}
// delete the original file
if err := os.Remove(filePath); err != nil {
Expand Down
52 changes: 42 additions & 10 deletions repo_db_mirror_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"testing"

"github.com/google/go-cmp/cmp"
"github.com/klauspost/compress/zstd"
)

// https://gist.github.com/maximilien/328c9ac19ab0a158a8df slightly modified to create a fake package
Expand Down Expand Up @@ -193,7 +194,6 @@ func addFileToTarWriter(pkgName string, content string, tarWriter *tar.Writer) {
}

// Uncompresses a gzip file
// TODO set some limits to avoid OOM with gzip bombs in uncompressGZ.
func TestUncompressGZ(t *testing.T) {
err := uncompressGZ("nope", "nope")
tmpDir := testSetupHelper(t)
Expand Down Expand Up @@ -223,16 +223,48 @@ func TestUncompressGZ(t *testing.T) {
log.Fatal(err)
}
}
func TestUncompressZSTD(t *testing.T) {
err := uncompressZSTD("nope", "nope")
tmpDir := testSetupHelper(t)
if err == nil {
t.Errorf("Should raise an error")
}
filePath := path.Join(tmpDir, "test.zstd")
testString := ``
zstdfile, err := os.Create(filePath)
if err != nil {
log.Fatal(err)
}
writer, err := zstd.NewWriter(zstdfile)
if err != nil {
log.Fatal(err)
}
reader := strings.NewReader(testString)
if _, err = io.Copy(writer, reader); err != nil {
log.Fatal(err)
}
writer.Close()
if err = uncompressZSTD(filePath, filePath+".uncompressed"); err != nil {
log.Fatal(err)
}
byteStr, err := ioutil.ReadFile(filePath + ".uncompressed")
if string(byteStr) != testString {
t.Errorf("Expected %v, got %v ", testString, string(byteStr))
}
if err != nil {
log.Fatal(err)
}
}

func TestUncompressGZBomb(t *testing.T) {
func TestUncompressZSTDBomb(t *testing.T) {
if testing.Short() {
t.Skip("skipping testing in short mode")
}
tmpDir := testSetupHelper(t)
filePath := path.Join(tmpDir, "test.gz")
var gzipBombSize int64
gzipBombSize = 120 * 1024 * 1024
gzipfile, err := os.Create(filePath)
filePath := path.Join(tmpDir, "test.zstd")
var zstdBombSize int64
zstdBombSize = 120 * 1024 * 1024
zstdfile, err := os.Create(filePath)
if err != nil {
log.Fatal(err)
}
Expand All @@ -241,8 +273,8 @@ func TestUncompressGZBomb(t *testing.T) {
t.Skip("Cannot open /dev/zero, skipping gzip bomb test")
}
defer zero.Close()
writer := gzip.NewWriter(gzipfile)
reader := io.LimitReader(bufio.NewReader(zero), gzipBombSize)
writer := gzip.NewWriter(zstdfile)
reader := io.LimitReader(bufio.NewReader(zero), zstdBombSize)
if _, err = io.Copy(writer, reader); err != nil {
log.Fatal(err)
}
Expand All @@ -258,8 +290,8 @@ func TestUncompressGZBomb(t *testing.T) {
return
}
size := fi.Size()
if size >= gzipBombSize {
log.Fatal("It fully extracted the gzip bomb, this shouldn't happen")
if size >= zstdBombSize {
log.Fatal("It fully extracted the zstd bomb, this shouldn't happen")
}
}

Expand Down

0 comments on commit 15b8703

Please sign in to comment.