Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add zstd prefetching support (#48) #77

Merged
merged 1 commit into from
Apr 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,6 @@ require (
require (
github.com/jinzhu/inflection v1.0.0 // indirect
github.com/jinzhu/now v1.1.5 // indirect
github.com/klauspost/compress v1.16.4 // indirect
github.com/mattn/go-sqlite3 v1.14.16 // indirect
)
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkr
github.com/jinzhu/now v1.1.4/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8=
github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ=
github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8=
github.com/klauspost/compress v1.16.4 h1:91KN02FnsOYhuunwU4ssRe8lc2JosWmizWa91B5v1PU=
github.com/klauspost/compress v1.16.4/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
github.com/mattn/go-sqlite3 v1.14.15/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg=
github.com/mattn/go-sqlite3 v1.14.16 h1:yOQRA0RpS5PFz/oikGwBEqvAWhWg5ufRz4ETLjwpU1Y=
github.com/mattn/go-sqlite3 v1.14.16/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg=
Expand Down
38 changes: 37 additions & 1 deletion repo_db_mirror.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ import (
"path"
"path/filepath"
"strings"

"github.com/klauspost/compress/zstd"
)

// Uncompresses a gzip file
Expand Down Expand Up @@ -44,6 +46,37 @@ func uncompressGZ(filePath string, targetFile string) error {
return nil
}

// Uncompress a zstd file
func uncompressZSTD(filePath string, targetFile string) error {
zstdfile, err := os.Open(filePath)
if err != nil {
log.Printf("error: %v\n", err)
return err
}
reader, err := zstd.NewReader(zstdfile)
if err != nil {
log.Printf("error: %v\n", err)
return err
}
limitedReader := io.LimitReader(reader, 100*1024*1024) // Limits the size of the extracted file up to 100MB, so far community db is around 20MB
if err != nil {
log.Printf("error: %v\n", err)
return err
}
defer reader.Close()
writer, err := os.Create(targetFile)
if err != nil {
log.Printf("error: %v\n", err)
return err
}
defer writer.Close()
if _, err = io.Copy(writer, limitedReader); err != nil {
log.Printf("error: %v\n", err)
return err
}
return nil
}

func extractFilenamesFromTar(filePath string) ([]string, error) {
f, err := os.Open(filePath)
reader := bufio.NewReader(f)
Expand Down Expand Up @@ -125,7 +158,10 @@ func downloadAndParseDb(mirror MirrorDB) error {
log.Printf("Extracting %v...", filePath)
// the db file exists and have been downloaded. Now it is time to decompress it
if err := uncompressGZ(filePath, filePath+".tar"); err != nil {
return err
log.Printf("Gzip extraction failed with error '%v', attempting zstd extraction...", err)
if err := uncompressZSTD(filePath, filePath+".tar"); err != nil {
return err
}
}
// delete the original file
if err := os.Remove(filePath); err != nil {
Expand Down
53 changes: 43 additions & 10 deletions repo_db_mirror_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"testing"

"github.com/google/go-cmp/cmp"
"github.com/klauspost/compress/zstd"
)

// https://gist.github.com/maximilien/328c9ac19ab0a158a8df slightly modified to create a fake package
Expand Down Expand Up @@ -193,7 +194,6 @@ func addFileToTarWriter(pkgName string, content string, tarWriter *tar.Writer) {
}

// Uncompresses a gzip file
// TODO set some limits to avoid OOM with gzip bombs in uncompressGZ.
func TestUncompressGZ(t *testing.T) {
err := uncompressGZ("nope", "nope")
tmpDir := testSetupHelper(t)
Expand Down Expand Up @@ -224,15 +224,48 @@ func TestUncompressGZ(t *testing.T) {
}
}

func TestUncompressGZBomb(t *testing.T) {
func TestUncompressZSTD(t *testing.T) {
err := uncompressZSTD("nope", "nope")
tmpDir := testSetupHelper(t)
if err == nil {
t.Errorf("Should raise an error")
}
filePath := path.Join(tmpDir, "test.zstd")
testString := ``
zstdfile, err := os.Create(filePath)
if err != nil {
log.Fatal(err)
}
writer, err := zstd.NewWriter(zstdfile)
if err != nil {
log.Fatal(err)
}
reader := strings.NewReader(testString)
if _, err = io.Copy(writer, reader); err != nil {
log.Fatal(err)
}
writer.Close()
if err = uncompressZSTD(filePath, filePath+".uncompressed"); err != nil {
log.Fatal(err)
}
byteStr, err := ioutil.ReadFile(filePath + ".uncompressed")
if string(byteStr) != testString {
t.Errorf("Expected %v, got %v ", testString, string(byteStr))
}
if err != nil {
log.Fatal(err)
}
}

func TestUncompressZSTDBomb(t *testing.T) {
if testing.Short() {
t.Skip("skipping testing in short mode")
}
tmpDir := testSetupHelper(t)
filePath := path.Join(tmpDir, "test.gz")
var gzipBombSize int64
gzipBombSize = 120 * 1024 * 1024
gzipfile, err := os.Create(filePath)
filePath := path.Join(tmpDir, "test.zstd")
var zstdBombSize int64
zstdBombSize = 120 * 1024 * 1024
zstdfile, err := os.Create(filePath)
if err != nil {
log.Fatal(err)
}
Expand All @@ -241,8 +274,8 @@ func TestUncompressGZBomb(t *testing.T) {
t.Skip("Cannot open /dev/zero, skipping gzip bomb test")
}
defer zero.Close()
writer := gzip.NewWriter(gzipfile)
reader := io.LimitReader(bufio.NewReader(zero), gzipBombSize)
writer := gzip.NewWriter(zstdfile)
reader := io.LimitReader(bufio.NewReader(zero), zstdBombSize)
if _, err = io.Copy(writer, reader); err != nil {
log.Fatal(err)
}
Expand All @@ -258,8 +291,8 @@ func TestUncompressGZBomb(t *testing.T) {
return
}
size := fi.Size()
if size >= gzipBombSize {
log.Fatal("It fully extracted the gzip bomb, this shouldn't happen")
if size >= zstdBombSize {
log.Fatal("It fully extracted the zstd bomb, this shouldn't happen")
}
}

Expand Down