From ac44c58174ae25d80f4a8642ec0e39ead02841d1 Mon Sep 17 00:00:00 2001 From: Moreno Giussani Date: Sun, 16 Apr 2023 18:35:09 +0200 Subject: [PATCH] Add zstd prefetching support Closes #48 --- go.mod | 1 + go.sum | 2 ++ repo_db_mirror.go | 38 +++++++++++++++++++++++++++++- repo_db_mirror_test.go | 53 ++++++++++++++++++++++++++++++++++-------- 4 files changed, 83 insertions(+), 11 deletions(-) diff --git a/go.mod b/go.mod index c5174aa..c59d9db 100644 --- a/go.mod +++ b/go.mod @@ -14,5 +14,6 @@ require ( require ( github.com/jinzhu/inflection v1.0.0 // indirect github.com/jinzhu/now v1.1.5 // indirect + github.com/klauspost/compress v1.16.4 // indirect github.com/mattn/go-sqlite3 v1.14.16 // indirect ) diff --git a/go.sum b/go.sum index e95df70..08e0557 100644 --- a/go.sum +++ b/go.sum @@ -7,6 +7,8 @@ github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkr github.com/jinzhu/now v1.1.4/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ= github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= +github.com/klauspost/compress v1.16.4 h1:91KN02FnsOYhuunwU4ssRe8lc2JosWmizWa91B5v1PU= +github.com/klauspost/compress v1.16.4/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= github.com/mattn/go-sqlite3 v1.14.15/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg= github.com/mattn/go-sqlite3 v1.14.16 h1:yOQRA0RpS5PFz/oikGwBEqvAWhWg5ufRz4ETLjwpU1Y= github.com/mattn/go-sqlite3 v1.14.16/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg= diff --git a/repo_db_mirror.go b/repo_db_mirror.go index 542e3a7..a55192c 100644 --- a/repo_db_mirror.go +++ b/repo_db_mirror.go @@ -11,6 +11,8 @@ import ( "path" "path/filepath" "strings" + + "github.com/klauspost/compress/zstd" ) // Uncompresses a gzip file @@ -44,6 +46,37 @@ func uncompressGZ(filePath string, targetFile string) error { return nil } +// Uncompress a zstd file +func uncompressZSTD(filePath string, targetFile string) error { + zstdfile, err := os.Open(filePath) + if err != nil { + log.Printf("error: %v\n", err) + return err + } + reader, err := zstd.NewReader(zstdfile) + if err != nil { + log.Printf("error: %v\n", err) + return err + } + limitedReader := io.LimitReader(reader, 100*1024*1024) // Limits the size of the extracted file up to 100MB, so far community db is around 20MB + if err != nil { + log.Printf("error: %v\n", err) + return err + } + defer reader.Close() + writer, err := os.Create(targetFile) + if err != nil { + log.Printf("error: %v\n", err) + return err + } + defer writer.Close() + if _, err = io.Copy(writer, limitedReader); err != nil { + log.Printf("error: %v\n", err) + return err + } + return nil +} + func extractFilenamesFromTar(filePath string) ([]string, error) { f, err := os.Open(filePath) reader := bufio.NewReader(f) @@ -125,7 +158,10 @@ func downloadAndParseDb(mirror MirrorDB) error { log.Printf("Extracting %v...", filePath) // the db file exists and have been downloaded. Now it is time to decompress it if err := uncompressGZ(filePath, filePath+".tar"); err != nil { - return err + log.Printf("Gzip extraction failed with error '%v', attempting zstd extraction...", err) + if err := uncompressZSTD(filePath, filePath+".tar"); err != nil { + return err + } } // delete the original file if err := os.Remove(filePath); err != nil { diff --git a/repo_db_mirror_test.go b/repo_db_mirror_test.go index 7df6e4b..bef2b2f 100644 --- a/repo_db_mirror_test.go +++ b/repo_db_mirror_test.go @@ -13,6 +13,7 @@ import ( "testing" "github.com/google/go-cmp/cmp" + "github.com/klauspost/compress/zstd" ) // https://gist.github.com/maximilien/328c9ac19ab0a158a8df slightly modified to create a fake package @@ -193,7 +194,6 @@ func addFileToTarWriter(pkgName string, content string, tarWriter *tar.Writer) { } // Uncompresses a gzip file -// TODO set some limits to avoid OOM with gzip bombs in uncompressGZ. func TestUncompressGZ(t *testing.T) { err := uncompressGZ("nope", "nope") tmpDir := testSetupHelper(t) @@ -224,15 +224,48 @@ func TestUncompressGZ(t *testing.T) { } } -func TestUncompressGZBomb(t *testing.T) { +func TestUncompressZSTD(t *testing.T) { + err := uncompressZSTD("nope", "nope") + tmpDir := testSetupHelper(t) + if err == nil { + t.Errorf("Should raise an error") + } + filePath := path.Join(tmpDir, "test.zstd") + testString := `` + zstdfile, err := os.Create(filePath) + if err != nil { + log.Fatal(err) + } + writer, err := zstd.NewWriter(zstdfile) + if err != nil { + log.Fatal(err) + } + reader := strings.NewReader(testString) + if _, err = io.Copy(writer, reader); err != nil { + log.Fatal(err) + } + writer.Close() + if err = uncompressZSTD(filePath, filePath+".uncompressed"); err != nil { + log.Fatal(err) + } + byteStr, err := ioutil.ReadFile(filePath + ".uncompressed") + if string(byteStr) != testString { + t.Errorf("Expected %v, got %v ", testString, string(byteStr)) + } + if err != nil { + log.Fatal(err) + } +} + +func TestUncompressZSTDBomb(t *testing.T) { if testing.Short() { t.Skip("skipping testing in short mode") } tmpDir := testSetupHelper(t) - filePath := path.Join(tmpDir, "test.gz") - var gzipBombSize int64 - gzipBombSize = 120 * 1024 * 1024 - gzipfile, err := os.Create(filePath) + filePath := path.Join(tmpDir, "test.zstd") + var zstdBombSize int64 + zstdBombSize = 120 * 1024 * 1024 + zstdfile, err := os.Create(filePath) if err != nil { log.Fatal(err) } @@ -241,8 +274,8 @@ func TestUncompressGZBomb(t *testing.T) { t.Skip("Cannot open /dev/zero, skipping gzip bomb test") } defer zero.Close() - writer := gzip.NewWriter(gzipfile) - reader := io.LimitReader(bufio.NewReader(zero), gzipBombSize) + writer := gzip.NewWriter(zstdfile) + reader := io.LimitReader(bufio.NewReader(zero), zstdBombSize) if _, err = io.Copy(writer, reader); err != nil { log.Fatal(err) } @@ -258,8 +291,8 @@ func TestUncompressGZBomb(t *testing.T) { return } size := fi.Size() - if size >= gzipBombSize { - log.Fatal("It fully extracted the gzip bomb, this shouldn't happen") + if size >= zstdBombSize { + log.Fatal("It fully extracted the zstd bomb, this shouldn't happen") } }