Skip to content
This repository has been archived by the owner on Nov 19, 2024. It is now read-only.

Commit

Permalink
Refactor and simplify interfaces
Browse files Browse the repository at this point in the history
Split Archival into Archival/Extraction since some archive formats can't do both. Rar is proprietary for creating, and there's no pure-Go 7z writing implementation that I know of.

- Extractor no longer requires a filename filter (kind of pointless at best, confusing at worst)
- CompressedArchive renamed to Archive
- Archival is now just creating archives
- New Extraction interface is for reading archives
- Archive format can compose compression, archival, and extraction
  • Loading branch information
mholt committed Nov 8, 2024
1 parent 76ea0d6 commit f9dfd58
Show file tree
Hide file tree
Showing 8 changed files with 98 additions and 98 deletions.
13 changes: 5 additions & 8 deletions 7z.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,18 +51,15 @@ func (z SevenZip) Match(_ context.Context, filename string, stream io.Reader) (M
return mr, nil
}

// Archive is not implemented for 7z, but the method exists so that SevenZip satisfies the ArchiveFormat interface.
func (z SevenZip) Archive(_ context.Context, _ io.Writer, _ []FileInfo) error {
return fmt.Errorf("not implemented for 7z because there is no pure Go implementation found")
}
// Archive is not implemented for 7z because I do not know of a pure-Go 7z writer.

// Extract extracts files from z, implementing the Extractor interface. Uniquely, however,
// sourceArchive must be an io.ReaderAt and io.Seeker, which are oddly disjoint interfaces
// from io.Reader which is what the method signature requires. We chose this signature for
// the interface because we figure you can Read() from anything you can ReadAt() or Seek()
// with. Due to the nature of the zip archive format, if sourceArchive is not an io.Seeker
// and io.ReaderAt, an error is returned.
func (z SevenZip) Extract(ctx context.Context, sourceArchive io.Reader, pathsInArchive []string, handleFile FileHandler) error {
func (z SevenZip) Extract(ctx context.Context, sourceArchive io.Reader, handleFile FileHandler) error {
sra, ok := sourceArchive.(seekReaderAt)
if !ok {
return fmt.Errorf("input type must be an io.ReaderAt and io.Seeker because of zip format constraints")
Expand All @@ -87,9 +84,6 @@ func (z SevenZip) Extract(ctx context.Context, sourceArchive io.Reader, pathsInA
return err // honor context cancellation
}

if !fileIsIncluded(pathsInArchive, f.Name) {
continue
}
if fileIsIncluded(skipDirs, f.Name) {
continue
}
Expand Down Expand Up @@ -130,3 +124,6 @@ func (z SevenZip) Extract(ctx context.Context, sourceArchive io.Reader, pathsInA

// https://py7zr.readthedocs.io/en/latest/archive_format.html#signature
var sevenZipHeader = []byte("7z\xBC\xAF\x27\x1C")

// Interface guard
var _ Extractor = SevenZip{}
116 changes: 65 additions & 51 deletions formats.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,14 @@ func RegisterFormat(format Format) {
func Identify(ctx context.Context, filename string, stream io.Reader) (Format, io.Reader, error) {
var compression Compression
var archival Archival
var extraction Extraction

rewindableStream, err := newRewindReader(stream)
if err != nil {
return nil, nil, err
}

// try compression format first, since that's the outer "layer"
// try compression format first, since that's the outer "layer" if combined
for name, format := range formats {
cf, isCompression := format.(Compression)
if !isCompression {
Expand All @@ -68,10 +69,11 @@ func Identify(ctx context.Context, filename string, stream io.Reader) (Format, i
}
}

// try archive format next
// try archival and extraction format next
for name, format := range formats {
af, isArchive := format.(Archival)
if !isArchive {
ar, isArchive := format.(Archival)
ex, isExtract := format.(Extraction)
if !isArchive && !isExtract {
continue
}

Expand All @@ -81,20 +83,23 @@ func Identify(ctx context.Context, filename string, stream io.Reader) (Format, i
}

if matchResult.Matched() {
archival = af
archival = ar
extraction = ex
break
}
}

// the stream should be rewound by identifyOne
// the stream should be rewound by identifyOne; then return the most specific type of match
bufferedStream := rewindableStream.reader()
switch {
case compression != nil && archival == nil:
case compression != nil && archival == nil && extraction == nil:
return compression, bufferedStream, nil
case compression == nil && archival != nil:
case compression == nil && archival != nil && extraction == nil:
return archival, bufferedStream, nil
case compression != nil && archival != nil:
return CompressedArchive{compression, archival}, bufferedStream, nil
case compression == nil && archival == nil && extraction != nil:
return extraction, bufferedStream, nil
case archival != nil || extraction != nil:
return Archive{compression, archival, extraction}, bufferedStream, nil
default:
return nil, bufferedStream, NoMatch
}
Expand Down Expand Up @@ -161,44 +166,44 @@ func readAtMost(stream io.Reader, n int) ([]byte, error) {
return nil, err
}

// CompressedArchive combines a compression format on top of an archive
// format (e.g. "tar.gz") and provides both functionalities in a single
// type. It ensures that archive functions are wrapped by compressors and
// Archive represents an archive which may be compressed at the outer layer.
// It combines a compression format on top of an archive/extraction
// format (e.g. ".tar.gz") and provides both functionalities in a single
// type. It ensures that archival functions are wrapped by compressors and
// decompressors. However, compressed archives have some limitations; for
// example, files cannot be inserted/appended because of complexities with
// modifying existing compression state (perhaps this could be overcome,
// but I'm not about to try it).
//
// As this type is intended to compose compression and archive formats,
// both must be specified in order for this value to be valid, or its
// methods will return errors.
type CompressedArchive struct {
// The embedded Archival and Extraction values are used for writing and
// reading, respectively. Compression is optional and is only needed if the
// format is compressed externally (for example, tar archives).
type Archive struct {
Compression
Archival
Extraction
}

// Name returns a concatenation of the archive format name
// and the compression format name.
func (caf CompressedArchive) Extension() string {
if caf.Compression == nil && caf.Archival == nil {
panic("missing both compression and archive formats")
}
// Name returns a concatenation of the archive and compression format extensions.
func (ar Archive) Extension() string {
var name string
if caf.Archival != nil {
name += caf.Archival.Extension()
if ar.Archival != nil {
name += ar.Archival.Extension()
} else if ar.Extraction != nil {
name += ar.Extraction.Extension()
}
if caf.Compression != nil {
name += caf.Compression.Extension()
if ar.Compression != nil {
name += ar.Compression.Extension()
}
return name
}

// Match matches if the input matches both the compression and archive format.
func (caf CompressedArchive) Match(ctx context.Context, filename string, stream io.Reader) (MatchResult, error) {
// Match matches if the input matches both the compression and archival/extraction format.
func (ar Archive) Match(ctx context.Context, filename string, stream io.Reader) (MatchResult, error) {
var conglomerate MatchResult

if caf.Compression != nil {
matchResult, err := caf.Compression.Match(ctx, filename, stream)
if ar.Compression != nil {
matchResult, err := ar.Compression.Match(ctx, filename, stream)
if err != nil {
return MatchResult{}, err
}
Expand All @@ -208,7 +213,7 @@ func (caf CompressedArchive) Match(ctx context.Context, filename string, stream

// wrap the reader with the decompressor so we can
// attempt to match the archive by reading the stream
rc, err := caf.Compression.OpenReader(stream)
rc, err := ar.Compression.OpenReader(stream)
if err != nil {
return matchResult, err
}
Expand All @@ -218,8 +223,8 @@ func (caf CompressedArchive) Match(ctx context.Context, filename string, stream
conglomerate = matchResult
}

if caf.Archival != nil {
matchResult, err := caf.Archival.Match(ctx, filename, stream)
if ar.Archival != nil {
matchResult, err := ar.Archival.Match(ctx, filename, stream)
if err != nil {
return MatchResult{}, err
}
Expand All @@ -234,26 +239,32 @@ func (caf CompressedArchive) Match(ctx context.Context, filename string, stream
}

// Archive adds files to the output archive while compressing the result.
func (caf CompressedArchive) Archive(ctx context.Context, output io.Writer, files []FileInfo) error {
if caf.Compression != nil {
wc, err := caf.Compression.OpenWriter(output)
func (ar Archive) Archive(ctx context.Context, output io.Writer, files []FileInfo) error {
if ar.Archival == nil {
return fmt.Errorf("no archival format")
}
if ar.Compression != nil {
wc, err := ar.Compression.OpenWriter(output)
if err != nil {
return err
}
defer wc.Close()
output = wc
}
return caf.Archival.Archive(ctx, output, files)
return ar.Archival.Archive(ctx, output, files)
}

// ArchiveAsync adds files to the output archive while compressing the result asynchronously.
func (caf CompressedArchive) ArchiveAsync(ctx context.Context, output io.Writer, jobs <-chan ArchiveAsyncJob) error {
do, ok := caf.Archival.(ArchiverAsync)
func (ar Archive) ArchiveAsync(ctx context.Context, output io.Writer, jobs <-chan ArchiveAsyncJob) error {
if ar.Archival == nil {
return fmt.Errorf("no archival format")
}
do, ok := ar.Archival.(ArchiverAsync)
if !ok {
return fmt.Errorf("%s archive does not support async writing", caf.Extension())
return fmt.Errorf("%T archive does not support async writing", ar.Archival)
}
if caf.Compression != nil {
wc, err := caf.Compression.OpenWriter(output)
if ar.Compression != nil {
wc, err := ar.Compression.OpenWriter(output)
if err != nil {
return err
}
Expand All @@ -264,16 +275,19 @@ func (caf CompressedArchive) ArchiveAsync(ctx context.Context, output io.Writer,
}

// Extract reads files out of an archive while decompressing the results.
func (caf CompressedArchive) Extract(ctx context.Context, sourceArchive io.Reader, pathsInArchive []string, handleFile FileHandler) error {
if caf.Compression != nil {
rc, err := caf.Compression.OpenReader(sourceArchive)
func (ar Archive) Extract(ctx context.Context, sourceArchive io.Reader, handleFile FileHandler) error {
if ar.Extraction == nil {
return fmt.Errorf("no extraction format")
}
if ar.Compression != nil {
rc, err := ar.Compression.OpenReader(sourceArchive)
if err != nil {
return err
}
defer rc.Close()
sourceArchive = rc
}
return caf.Archival.Extract(ctx, sourceArchive, pathsInArchive, handleFile)
return ar.Extraction.Extract(ctx, sourceArchive, handleFile)
}

// MatchResult returns true if the format was matched either
Expand Down Expand Up @@ -408,8 +422,8 @@ var formats = make(map[string]Format)

// Interface guards
var (
_ Format = (*CompressedArchive)(nil)
_ Archiver = (*CompressedArchive)(nil)
_ ArchiverAsync = (*CompressedArchive)(nil)
_ Extractor = (*CompressedArchive)(nil)
_ Format = (*Archive)(nil)
_ Archiver = (*Archive)(nil)
_ ArchiverAsync = (*Archive)(nil)
_ Extractor = (*Archive)(nil)
)
4 changes: 2 additions & 2 deletions formats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ func checkErr(t *testing.T, err error, msgFmt string, args ...any) {
return
}
args = append(args, err)
t.Errorf(msgFmt+": %s", args...)
t.Fatalf(msgFmt+": %s", args...)
}

func TestIdentifyDoesNotMatchContentFromTrimmedKnownHeaderHaving0Suffix(t *testing.T) {
Expand Down Expand Up @@ -418,7 +418,7 @@ func TestIdentifyAndOpenZip(t *testing.T) {
t.Errorf("unexpected format found: expected=.zip actual=%s", format.Extension())
}

err = format.(Extractor).Extract(context.Background(), reader, nil, func(ctx context.Context, f FileInfo) error {
err = format.(Extractor).Extract(context.Background(), reader, func(ctx context.Context, f FileInfo) error {
rc, err := f.Open()
if err != nil {
return err
Expand Down
24 changes: 11 additions & 13 deletions fs.go
Original file line number Diff line number Diff line change
Expand Up @@ -350,14 +350,12 @@ func (f ArchiveFS) Open(name string) (fs.File, error) {
}

var decompressor io.ReadCloser
if caf, ok := f.Format.(CompressedArchive); ok {
if caf.Compression != nil {
decompressor, err = caf.Compression.OpenReader(inputStream)
if err != nil {
return nil, err
}
inputStream = decompressor
if decomp, ok := f.Format.(Decompressor); ok {
decompressor, err = decomp.OpenReader(inputStream)
if err != nil {
return nil, err
}
inputStream = decompressor
}

// prepare the handler that we'll need if we have to iterate the
Expand Down Expand Up @@ -413,13 +411,13 @@ func (f ArchiveFS) Open(name string) (fs.File, error) {
// files may have a "." component in them, and the underlying format doesn't
// know about our file system semantics, so we need to filter ourselves (it's
// not significantly less efficient).
if caf, ok := f.Format.(CompressedArchive); ok {
if ar, ok := f.Format.(Archive); ok {
// bypass the CompressedArchive format's opening of the decompressor, since
// we already did it, since we need to keep it open after returning
// we already did it because we need to keep it open after returning.
// "I BYPASSED THE COMPRESSOR!" -Rey
err = caf.Archival.Extract(f.context(), inputStream, nil, handler)
err = ar.Extraction.Extract(f.context(), inputStream, handler)
} else {
err = f.Format.Extract(f.context(), inputStream, nil, handler)
err = f.Format.Extract(f.context(), inputStream, handler)
}
if err != nil {
return nil, &fs.PathError{Op: "open", Path: name, Err: fmt.Errorf("extract: %w", err)}
Expand Down Expand Up @@ -486,7 +484,7 @@ func (f ArchiveFS) Stat(name string) (fs.FileInfo, error) {
if f.Stream != nil {
inputStream = io.NewSectionReader(f.Stream, 0, f.Stream.Size())
}
err = f.Format.Extract(f.context(), inputStream, nil, handler)
err = f.Format.Extract(f.context(), inputStream, handler)
if err != nil && result.FileInfo == nil {
return nil, err
}
Expand Down Expand Up @@ -601,7 +599,7 @@ func (f *ArchiveFS) ReadDir(name string) ([]fs.DirEntry, error) {
inputStream = io.NewSectionReader(f.Stream, 0, f.Stream.Size())
}

err = f.Format.Extract(f.context(), inputStream, nil, handler)
err = f.Format.Extract(f.context(), inputStream, handler)
if err != nil {
// these being non-nil implies that we have indexed the archive,
// but if an error occurred, we likely only got part of the way
Expand Down
16 changes: 8 additions & 8 deletions interfaces.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,15 @@ type Compression interface {
Decompressor
}

// Archival is an archival format with both archive and extract methods.
// Archival is an archival format that can create/write archives.
type Archival interface {
Format
Archiver
}

// Extraction is an archival format that extract from (read) archives.
type Extraction interface {
Format
Extractor
}

Expand Down Expand Up @@ -86,19 +91,14 @@ type ArchiverAsync interface {
// Extractor can extract files from an archive.
type Extractor interface {
// Extract walks entries in the archive and calls handleFile for each
// entry that matches the pathsInArchive filter by path/name.
//
// If pathsInArchive is nil, all files are extracted without discretion.
// If pathsInArchive is empty, no files are extracted.
// If a path refers to a directory, all files within it are extracted.
// Extracted files are passed to the handleFile callback for handling.
// entry in the archive.
//
// Any files opened in the FileHandler should be closed when it returns,
// as there is no guarantee the files can be read outside the handler
// or after the walk has proceeded to the next file.
//
// Context cancellation must be honored.
Extract(ctx context.Context, archive io.Reader, pathsInArchive []string, handleFile FileHandler) error
Extract(ctx context.Context, archive io.Reader, handleFile FileHandler) error
}

// Inserter can insert files into an existing archive.
Expand Down
Loading

0 comments on commit f9dfd58

Please sign in to comment.