From a0f302696659a542194e5e833e2d8b6dd5be84d5 Mon Sep 17 00:00:00 2001 From: Cristian Maglie Date: Thu, 1 Aug 2024 13:23:19 +0200 Subject: [PATCH 1/8] Added parameter names to Extractor.FS interface. Added docs string. --- extractor.go | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/extractor.go b/extractor.go index ccd6b3f..1c634f0 100644 --- a/extractor.go +++ b/extractor.go @@ -24,10 +24,19 @@ import ( // rather than directly on the filesystem type Extractor struct { FS interface { - Link(string, string) error - MkdirAll(string, os.FileMode) error + // Link creates newname as a hard link to the oldname file. If there is an error, it will be of type *LinkError. + // Differently from os.Link, if newname already exists it will be overwritten. + Link(oldname, newname string) error + + // MkdirAll creates the directory path and all his parents if needed. + MkdirAll(path string, perm os.FileMode) error + + // OpenFile opens the named file with specified flag (O_RDONLY etc.). OpenFile(name string, flag int, perm os.FileMode) (*os.File, error) - Symlink(string, string) error + + // Symlink creates newname as a symbolic link to oldname. + // Differently from os.Symlink, if newname already exists it will be overwritten. + Symlink(oldname, newname string) error } } From 5f2cdff4d2032de2994363a017b3705b7b0253aa Mon Sep 17 00:00:00 2001 From: Cristian Maglie Date: Fri, 2 Aug 2024 17:19:17 +0200 Subject: [PATCH 2/8] Made MockDisk and LoggingFS methods Link and Symlink method overwriting To comply with the Extractor.FS interface definition. --- extractor_test.go | 2 ++ loggingfs_test.go | 2 ++ 2 files changed, 4 insertions(+) diff --git a/extractor_test.go b/extractor_test.go index 8035adc..8b54638 100644 --- a/extractor_test.go +++ b/extractor_test.go @@ -106,6 +106,7 @@ type MockDisk struct { func (m MockDisk) Link(oldname, newname string) error { oldname = filepath.Join(m.Base, oldname) newname = filepath.Join(m.Base, newname) + _ = os.Remove(newname) return os.Link(oldname, newname) } @@ -117,6 +118,7 @@ func (m MockDisk) MkdirAll(path string, perm os.FileMode) error { func (m MockDisk) Symlink(oldname, newname string) error { oldname = filepath.Join(m.Base, oldname) newname = filepath.Join(m.Base, newname) + _ = os.Remove(newname) return os.Symlink(oldname, newname) } diff --git a/loggingfs_test.go b/loggingfs_test.go index b3d20a9..b647d26 100644 --- a/loggingfs_test.go +++ b/loggingfs_test.go @@ -39,6 +39,7 @@ func (m *LoggingFS) Link(oldname, newname string) error { OldPath: oldname, Path: newname, }) + _ = os.Remove(newname) return nil } @@ -57,6 +58,7 @@ func (m *LoggingFS) Symlink(oldname, newname string) error { OldPath: oldname, Path: newname, }) + _ = os.Remove(newname) return nil } From e226cf5ac70ba4403d08381fc11c84da784eda21 Mon Sep 17 00:00:00 2001 From: Cristian Maglie Date: Thu, 1 Aug 2024 19:04:13 +0200 Subject: [PATCH 3/8] Some make-up --- extract.go | 45 +++++++++++++-------------------------------- 1 file changed, 13 insertions(+), 32 deletions(-) diff --git a/extract.go b/extract.go index 1a74094..c1a48c2 100644 --- a/extract.go +++ b/extract.go @@ -4,25 +4,27 @@ // Most of the time you'll just need to call the proper function with a Reader and // a destination: // -// file, _ := os.Open("path/to/file.tar.bz2") -// extract.Bz2(context.TODO, file, "/path/where/to/extract", nil) +// file, _ := os.Open("path/to/file.tar.bz2") +// extract.Bz2(context.TODO, file, "/path/where/to/extract", nil) +// // ``` // // Sometimes you'll want a bit more control over the files, such as extracting a // subfolder of the archive. In this cases you can specify a renamer func that will // change the path for every file: // -// var shift = func(path string) string { +// var shift = func(path string) string { // parts := strings.Split(path, string(filepath.Separator)) // parts = parts[1:] // return strings.Join(parts, string(filepath.Separator)) // } // extract.Bz2(context.TODO, file, "/path/where/to/extract", shift) +// // ``` // // If you don't know which archive you're dealing with (life really is always a surprise) you can use Archive, which will infer the type of archive from the first bytes // -// extract.Archive(context.TODO, file, "/path/where/to/extract", nil) +// extract.Archive(context.TODO, file, "/path/where/to/extract", nil) package extract import ( @@ -41,50 +43,35 @@ type Renamer func(string) string // handle the names of the files. // If the file is not an archive, an error is returned. func Archive(ctx context.Context, body io.Reader, location string, rename Renamer) error { - extractor := Extractor{ - FS: fs{}, - } - + extractor := Extractor{FS: fs{}} return extractor.Archive(ctx, body, location, rename) } // Zstd extracts a .zst or .tar.zst archived stream of data in the specified location. // It accepts a rename function to handle the names of the files (see the example) func Zstd(ctx context.Context, body io.Reader, location string, rename Renamer) error { - extractor := Extractor{ - FS: fs{}, - } - + extractor := Extractor{FS: fs{}} return extractor.Zstd(ctx, body, location, rename) } // Xz extracts a .xz or .tar.xz archived stream of data in the specified location. // It accepts a rename function to handle the names of the files (see the example) func Xz(ctx context.Context, body io.Reader, location string, rename Renamer) error { - extractor := Extractor{ - FS: fs{}, - } - + extractor := Extractor{FS: fs{}} return extractor.Xz(ctx, body, location, rename) } // Bz2 extracts a .bz2 or .tar.bz2 archived stream of data in the specified location. // It accepts a rename function to handle the names of the files (see the example) func Bz2(ctx context.Context, body io.Reader, location string, rename Renamer) error { - extractor := Extractor{ - FS: fs{}, - } - + extractor := Extractor{FS: fs{}} return extractor.Bz2(ctx, body, location, rename) } // Gz extracts a .gz or .tar.gz archived stream of data in the specified location. // It accepts a rename function to handle the names of the files (see the example) func Gz(ctx context.Context, body io.Reader, location string, rename Renamer) error { - extractor := Extractor{ - FS: fs{}, - } - + extractor := Extractor{FS: fs{}} return extractor.Gz(ctx, body, location, rename) } @@ -96,20 +83,14 @@ type link struct { // Tar extracts a .tar archived stream of data in the specified location. // It accepts a rename function to handle the names of the files (see the example) func Tar(ctx context.Context, body io.Reader, location string, rename Renamer) error { - extractor := Extractor{ - FS: fs{}, - } - + extractor := Extractor{FS: fs{}} return extractor.Tar(ctx, body, location, rename) } // Zip extracts a .zip archived stream of data in the specified location. // It accepts a rename function to handle the names of the files (see the example). func Zip(ctx context.Context, body io.Reader, location string, rename Renamer) error { - extractor := Extractor{ - FS: fs{}, - } - + extractor := Extractor{FS: fs{}} return extractor.Zip(ctx, body, location, rename) } From ff9829c9e536e0b920e25735a980b58e4409f5d4 Mon Sep 17 00:00:00 2001 From: Cristian Maglie Date: Fri, 2 Aug 2024 13:05:59 +0200 Subject: [PATCH 4/8] Ignore downloaded test cases --- testdata/.gitignore | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 testdata/.gitignore diff --git a/testdata/.gitignore b/testdata/.gitignore new file mode 100644 index 0000000..0121aea --- /dev/null +++ b/testdata/.gitignore @@ -0,0 +1,3 @@ +big.tar.gz +big.zip + From fbe90e5b14286cc4d10231d4a1f5b382560e3455 Mon Sep 17 00:00:00 2001 From: Cristian Maglie Date: Fri, 2 Aug 2024 13:11:11 +0200 Subject: [PATCH 5/8] Simplified test file downloader --- extract_test.go | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/extract_test.go b/extract_test.go index 7b088c1..61aa8e9 100644 --- a/extract_test.go +++ b/extract_test.go @@ -397,8 +397,7 @@ func testWalk(t *testing.T, dir string, testFiles Files) { func TestTarGzMemoryConsumption(t *testing.T) { archive := paths.New("testdata/big.tar.gz") - err := download(t, "http://downloads.arduino.cc/gcc-arm-none-eabi-4.8.3-2014q1-windows.tar.gz", archive) - require.NoError(t, err) + download(t, "http://downloads.arduino.cc/gcc-arm-none-eabi-4.8.3-2014q1-windows.tar.gz", archive) tmpDir, err := paths.MkTempDir("", "") require.NoError(t, err) @@ -425,8 +424,7 @@ func TestTarGzMemoryConsumption(t *testing.T) { func TestZipMemoryConsumption(t *testing.T) { archive := paths.New("testdata/big.zip") - err := download(t, "http://downloads.arduino.cc/tools/gcc-arm-none-eabi-7-2017-q4-major-win32-arduino1.zip", archive) - require.NoError(t, err) + download(t, "http://downloads.arduino.cc/tools/gcc-arm-none-eabi-7-2017-q4-major-win32-arduino1.zip", archive) tmpDir, err := paths.MkTempDir("", "") require.NoError(t, err) @@ -451,27 +449,23 @@ func TestZipMemoryConsumption(t *testing.T) { require.True(t, heapUsed < 10000000, "heap consumption should be less than 10M but is %d", heapUsed) } -func download(t require.TestingT, url string, file *paths.Path) error { +func download(t require.TestingT, url string, file *paths.Path) { if file.Exist() { - return nil + return } fmt.Printf("Downloading %s in %s\n", url, file) resp, err := http.Get(url) - if err != nil { - return err - } + require.NoError(t, err) defer resp.Body.Close() out, err := file.Create() - if err != nil { - return err - } + require.NoError(t, err) _, err = io.Copy(out, resp.Body) out.Close() if err != nil { file.Remove() } - return err + require.NoError(t, err) } From 42dc2b5e911a54132b077af121d539b53f9d91e7 Mon Sep 17 00:00:00 2001 From: Cristian Maglie Date: Thu, 1 Aug 2024 13:24:40 +0200 Subject: [PATCH 6/8] Use pointers to keep lists of (sym)links to process. Simplified for..range. --- extract.go | 5 ----- extractor.go | 29 +++++++++++++++++------------ 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/extract.go b/extract.go index c1a48c2..0e6d463 100644 --- a/extract.go +++ b/extract.go @@ -75,11 +75,6 @@ func Gz(ctx context.Context, body io.Reader, location string, rename Renamer) er return extractor.Gz(ctx, body, location, rename) } -type link struct { - Name string - Path string -} - // Tar extracts a .tar archived stream of data in the specified location. // It accepts a rename function to handle the names of the files (see the example) func Tar(ctx context.Context, body io.Reader, location string, rename Renamer) error { diff --git a/extractor.go b/extractor.go index 1c634f0..db6facb 100644 --- a/extractor.go +++ b/extractor.go @@ -156,11 +156,16 @@ func (e *Extractor) Gz(ctx context.Context, body io.Reader, location string, ren return nil } +type link struct { + Name string + Path string +} + // Tar extracts a .tar archived stream of data in the specified location. // It accepts a rename function to handle the names of the files (see the example) func (e *Extractor) Tar(ctx context.Context, body io.Reader, location string, rename Renamer) error { - links := []link{} - symlinks := []link{} + links := []*link{} + symlinks := []*link{} // We make the first pass creating the directory structure, or we could end up // attempting to create a file where there's no folder @@ -212,9 +217,9 @@ func (e *Extractor) Tar(ctx context.Context, body io.Reader, location string, re } name = filepath.Join(location, name) - links = append(links, link{Path: path, Name: name}) + links = append(links, &link{Path: path, Name: name}) case tar.TypeSymlink: - symlinks = append(symlinks, link{Path: path, Name: header.Linkname}) + symlinks = append(symlinks, &link{Path: path, Name: header.Linkname}) } } @@ -230,14 +235,14 @@ func (e *Extractor) Tar(ctx context.Context, body io.Reader, location string, re } } - for i := range symlinks { + for _, symlink := range symlinks { select { case <-ctx.Done(): return errors.New("interrupted") default: } - if err := e.FS.Symlink(symlinks[i].Name, symlinks[i].Path); err != nil { - return errors.Annotatef(err, "Create link %s", symlinks[i].Path) + if err := e.FS.Symlink(symlink.Name, symlink.Path); err != nil { + return errors.Annotatef(err, "Create link %s", symlink.Path) } } return nil @@ -271,7 +276,7 @@ func (e *Extractor) Zip(ctx context.Context, body io.Reader, location string, re return errors.Annotatef(err, "Read the zip file") } - links := []link{} + links := []*link{} // We make the first pass creating the directory structure, or we could end up // attempting to create a file where there's no folder @@ -318,7 +323,7 @@ func (e *Extractor) Zip(ctx context.Context, body io.Reader, location string, re } else if name, err := io.ReadAll(f); err != nil { return errors.Annotatef(err, "Read address of link %s", path) } else { - links = append(links, link{Path: path, Name: string(name)}) + links = append(links, &link{Path: path, Name: string(name)}) f.Close() } default: @@ -333,14 +338,14 @@ func (e *Extractor) Zip(ctx context.Context, body io.Reader, location string, re } // Now we make another pass creating the links - for i := range links { + for _, link := range links { select { case <-ctx.Done(): return errors.New("interrupted") default: } - if err := e.FS.Symlink(links[i].Name, links[i].Path); err != nil { - return errors.Annotatef(err, "Create link %s", links[i].Path) + if err := e.FS.Symlink(link.Name, link.Path); err != nil { + return errors.Annotatef(err, "Create link %s", link.Path) } } From bc6cfa192588e951306a9580318036ba4a1b3809 Mon Sep 17 00:00:00 2001 From: Cristian Maglie Date: Fri, 2 Aug 2024 13:06:22 +0200 Subject: [PATCH 7/8] Updated deps --- .github/workflows/test.yaml | 2 +- go.mod | 11 +++++++---- go.sum | 17 +++++++++-------- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 2d885ec..34dfef0 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ jobs: - uses: actions/checkout@v1 - uses: actions/setup-go@v1 with: - go-version: "1.17" + go-version: "1.22" - name: Build native run: go build -v ./... shell: bash diff --git a/go.mod b/go.mod index d797763..814f7d3 100644 --- a/go.mod +++ b/go.mod @@ -1,14 +1,16 @@ module github.com/codeclysm/extract/v3 -go 1.17 +go 1.22 + +toolchain go1.22.3 require ( - github.com/arduino/go-paths-helper v1.2.0 + github.com/arduino/go-paths-helper v1.12.1 github.com/h2non/filetype v1.1.3 github.com/juju/errors v0.0.0-20181118221551-089d3ea4e4d5 github.com/klauspost/compress v1.15.13 - github.com/stretchr/testify v1.3.0 - github.com/ulikunitz/xz v0.5.11 + github.com/stretchr/testify v1.9.0 + github.com/ulikunitz/xz v0.5.12 ) require ( @@ -17,4 +19,5 @@ require ( github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e // indirect github.com/pmezard/go-difflib v1.0.0 // indirect gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 543bd68..1c42629 100644 --- a/go.sum +++ b/go.sum @@ -1,6 +1,5 @@ -github.com/arduino/go-paths-helper v1.2.0 h1:qDW93PR5IZUN/jzO4rCtexiwF8P4OIcOmcSgAYLZfY4= -github.com/arduino/go-paths-helper v1.2.0/go.mod h1:HpxtKph+g238EJHq4geEPv9p+gl3v5YYu35Yb+w31Ck= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/arduino/go-paths-helper v1.12.1 h1:WkxiVUxBjKWlLMiMuYy8DcmVrkxdP7aKxQOAq7r2lVM= +github.com/arduino/go-paths-helper v1.12.1/go.mod h1:jcpW4wr0u69GlXhTYydsdsqAjLaYK5n7oWHfKqOG6LM= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/h2non/filetype v1.1.3 h1:FKkx9QbD7HR/zjK1Ia5XiBsq9zdLi5Kf3zGyFTAFkGg= @@ -25,13 +24,13 @@ github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWb github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= -github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/ulikunitz/xz v0.5.11 h1:kpFauv27b6ynzBNT/Xy+1k+fK4WswhN/6PN5WhFAGw8= -github.com/ulikunitz/xz v0.5.11/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/ulikunitz/xz v0.5.12 h1:37Nm15o69RwBkXM0J6A5OlE67RZTfzUxTj8fB3dfcsc= +github.com/ulikunitz/xz v0.5.12/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= golang.org/x/crypto v0.0.0-20180214000028-650f4a345ab4/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/net v0.0.0-20180406214816-61147c48b25b/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20160105164936-4f90aeace3a2/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU= gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= @@ -39,3 +38,5 @@ gopkg.in/mgo.v2 v2.0.0-20160818015218-f2b6f6c918c4 h1:hILp2hNrRnYjZpmIbx70psAHbB gopkg.in/mgo.v2 v2.0.0-20160818015218-f2b6f6c918c4/go.mod h1:yeKp02qBN3iKW1OzL3MGk2IdtZzaj7SFntXj72NppTA= gopkg.in/yaml.v2 v2.0.0-20170712054546-1be3d31502d6 h1:CvAnnm1XvMjfib69SZzDwgWfOk+PxYz0hA0HBupilBA= gopkg.in/yaml.v2 v2.0.0-20170712054546-1be3d31502d6/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= From 1d471312a8f14df4395f23b5acfe1cdf83756720 Mon Sep 17 00:00:00 2001 From: Cristian Maglie Date: Fri, 2 Aug 2024 19:39:26 +0200 Subject: [PATCH 8/8] Updated README badge --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 266d5f4..184b2fd 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Extract -[![Build Status](https://travis-ci.org/codeclysm/extract.svg?branch=master)](https://travis-ci.org/codeclysm/extract) +[![Build Status](https://github.com/codeclysm/extract/actions/workflows/test.yaml/badge.svg?branch=master)](https://github.com/codeclysm/extract/actions/workflows/test.yaml) [![GitHub license](https://img.shields.io/badge/license-MIT-blue.svg)](https://raw.githubusercontent.com/codeclysm/extract/master/LICENSE) [![Godoc Reference](https://img.shields.io/badge/Godoc-Reference-blue.svg)](https://godoc.org/github.com/codeclysm/extract)