Skip to content

Commit

Permalink
rewrite file matching for fs Open, ReadDir and Stat (mholt#354)
Browse files Browse the repository at this point in the history
* rewrite file matching for fs Open, ReadDir and Stat

* add back Top* functions and tests
  • Loading branch information
WeidiDeng authored Dec 5, 2022
1 parent f6e004e commit 62ea369
Show file tree
Hide file tree
Showing 3 changed files with 227 additions and 127 deletions.
281 changes: 184 additions & 97 deletions fs.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"path"
"path/filepath"
"runtime"
"sort"
"strings"
"time"

Expand Down Expand Up @@ -309,67 +310,166 @@ func (f ArchiveFS) Open(name string) (fs.File, error) {
}, nil
}

var fsFile fs.File
var (
files []File
found bool
)
// collect them all or stop at exact file match, note we don't stop at folder match
handler := func(_ context.Context, file File) error {
// if this is the requested file, and it's a directory, set up the dirFile,
// which will include a listing of all its contents as we continue the walk
trimmedName := strings.Trim(file.NameInArchive, "/")
if trimmedName == name && file.IsDir() {
fsFile = &dirFile{extractedFile: extractedFile{File: file}}
return nil
file.NameInArchive = strings.Trim(file.NameInArchive, "/")
files = append(files, file)
if file.NameInArchive == name && !file.IsDir() {
found = true
return errStopWalk
}
return nil
}

// if the named file was a directory and we are filling its entries,
// add this entry to the list
if df, ok := fsFile.(*dirFile); ok {
df.entries = append(df.entries, fs.FileInfoToDirEntry(file))
var inputStream io.Reader = archiveFile
if f.Stream != nil {
inputStream = io.NewSectionReader(f.Stream, 0, f.Stream.Size())
}

// don't traverse into subfolders
if file.IsDir() {
return fs.SkipDir
}
err = f.Format.Extract(f.context(), inputStream, []string{name}, handler)
if found {
err = nil
}
if err != nil {
return nil, err
}

return nil
if len(files) == 0 {
return nil, fs.ErrNotExist
}

// exactly one or exact file found, test name match to detect implicit dir name https://github.com/mholt/archiver/issues/340
if (len(files) == 1 && files[0].NameInArchive == name) || found {
file := files[len(files)-1]
if file.IsDir() {
return &dirFile{extractedFile: extractedFile{File: file}}, nil
}

// if named file is not a regular file, it can't be opened
if !file.Mode().IsRegular() {
fsFile = extractedFile{File: file}
return errStopWalk
return extractedFile{File: file}, nil
}

// regular files can be read, so open it for reading
rc, err := file.Open()
if err != nil {
return err
return nil, err
}
fsFile = extractedFile{File: file, ReadCloser: rc, parentArchive: archiveFile}
return errStopWalk
return extractedFile{File: file, ReadCloser: rc, parentArchive: archiveFile}, nil
}

var inputStream io.Reader = archiveFile
if f.Stream != nil {
inputStream = io.NewSectionReader(f.Stream, 0, f.Stream.Size())
// implicit files
files = fillImplicit(files)
file := search(name, files)
if file == nil {
return nil, fs.ErrNotExist
}

err = f.Format.Extract(f.context(), inputStream, []string{name}, handler)
if err != nil && fsFile != nil {
if ef, ok := fsFile.(extractedFile); ok {
if ef.parentArchive != nil {
// don't close the archive file in above defer; it
// will be closed when the returned file is closed
err = nil
}
}
if file.IsDir() {
return &dirFile{extractedFile: extractedFile{File: *file}, entries: openReadDir(name, files)}, nil
}

// very unlikely
// maybe just panic, because extractor already walk through all the entries, file is impossible to read
// unless it's from a zip file.

// if named file is not a regular file, it can't be opened
if !file.Mode().IsRegular() {
return extractedFile{File: *file}, nil
}

// regular files can be read, so open it for reading
rc, err := file.Open()
if err != nil {
return nil, err
}
if fsFile == nil {
return nil, fs.ErrNotExist
return extractedFile{File: *file, ReadCloser: rc, parentArchive: archiveFile}, nil
}

// copy of the same function from zip
func split(name string) (dir, elem string, isDir bool) {
if name[len(name)-1] == '/' {
isDir = true
name = name[:len(name)-1]
}
i := len(name) - 1
for i >= 0 && name[i] != '/' {
i--
}
if i < 0 {
return ".", name, isDir
}
return name[:i], name[i+1:], isDir
}

// modified from zip.Reader initFileList, it's used to find all implicit dirs
func fillImplicit(files []File) []File {
dirs := make(map[string]bool)
knownDirs := make(map[string]bool)
entries := make([]File, 0, 0)
for _, file := range files {
for dir := path.Dir(file.NameInArchive); dir != "."; dir = path.Dir(dir) {
dirs[dir] = true
}
entries = append(entries, file)
if file.IsDir() {
knownDirs[file.NameInArchive] = true
}
}
for dir := range dirs {
if !knownDirs[dir] {
entries = append(entries, File{FileInfo: implicitDirInfo{implicitDirEntry{path.Base(dir)}}, NameInArchive: dir})
}
}

return fsFile, nil
sort.Slice(entries, func(i, j int) bool {
fi, fj := entries[i], entries[j]
di, ei, _ := split(fi.NameInArchive)
dj, ej, _ := split(fj.NameInArchive)

if di != dj {
return di < dj
}
return ei < ej
})
return entries
}

// modified from zip.Reader openLookup
func search(name string, entries []File) *File {
dir, elem, _ := split(name)
i := sort.Search(len(entries), func(i int) bool {
idir, ielem, _ := split(entries[i].NameInArchive)
return idir > dir || idir == dir && ielem >= elem
})
if i < len(entries) {
fname := entries[i].NameInArchive
if fname == name || len(fname) == len(name)+1 && fname[len(name)] == '/' && fname[:len(name)] == name {
return &entries[i]
}
}
return nil
}

// modified from zip.Reader openReadDir
func openReadDir(dir string, entries []File) []fs.DirEntry {
i := sort.Search(len(entries), func(i int) bool {
idir, _, _ := split(entries[i].NameInArchive)
return idir >= dir
})
j := sort.Search(len(entries), func(j int) bool {
jdir, _, _ := split(entries[j].NameInArchive)
return jdir > dir
})
dirs := make([]fs.DirEntry, j-i)
for idx := range dirs {
dirs[idx] = fs.FileInfoToDirEntry(entries[i+idx])
}
return dirs
}

// Stat stats the named file from within the archive. If name is "." then
Expand Down Expand Up @@ -404,15 +504,15 @@ func (f ArchiveFS) Stat(name string) (fs.FileInfo, error) {
defer archiveFile.Close()
}

var result File
var (
files []File
found bool
)
handler := func(_ context.Context, file File) error {
// in theory, the first file handled should be the one requested,
// unless... the file requested is a directory and the archive was
// created depth-first (i.e. directory contents added before the
// directory itself), in which case we have to iterate through the
// contents first; hence the check for exact filename match (issue #310)
if strings.TrimRight(file.NameInArchive, "/") == strings.TrimRight(name, "/") {
result = file
file.NameInArchive = strings.Trim(file.NameInArchive, "/")
files = append(files, file)
if file.NameInArchive == name {
found = true
return errStopWalk
}
return nil
Expand All @@ -422,13 +522,23 @@ func (f ArchiveFS) Stat(name string) (fs.FileInfo, error) {
inputStream = io.NewSectionReader(f.Stream, 0, f.Stream.Size())
}
err = f.Format.Extract(f.context(), inputStream, []string{name}, handler)
if err != nil && result.FileInfo == nil {
if found {
err = nil
}
if err != nil {
return nil, err
}
if result.FileInfo == nil {

if (len(files) == 0 && files[0].NameInArchive == name) || found {
return files[len(files)-1].FileInfo, nil
}

files = fillImplicit(files)
file := search(name, files)
if file == nil {
return nil, fs.ErrNotExist
}
return result.FileInfo, nil
return file.FileInfo, nil
}

// ReadDir reads the named directory from within the archive.
Expand All @@ -450,53 +560,18 @@ func (f ArchiveFS) ReadDir(name string) ([]fs.DirEntry, error) {
// apply prefix if fs is rooted in a subtree
name = path.Join(f.Prefix, name)

// store entries in a map to inexpensively avoid duplication
entries := make(map[string]fs.DirEntry)
// collect all files with prefix
var (
files []File
foundFile bool
)
handler := func(_ context.Context, file File) error {
// directories may end with trailing slash; standardize name
trimmedName := strings.Trim(file.NameInArchive, "/")

// don't include the named directory itself in the list of entries
if trimmedName == name {
return nil
}

// items added to an archive depth-first results in the subfolder entry being
// added to the archive after all the files within it, meaning we won't have
// the chance to return SkipDir before traversing into it; so we have to also
// check if we are within a subfolder deeper than the requested name (because
// this is a ReadDir function, we do not intend to traverse subfolders) (issue #310)
// in other words, archive entries can be created out-of-(breadth-first)-order,
// or even an arbitrary/random order, and we need to make sure we get all entries
// in just this directory
if path.Dir(trimmedName) != name {
// additionally, some archive files don't have actual entries for folders,
// leaving them to be inferred from the names of files instead (issue #330)
// so as we traverse deeper, we need to implicitly find subfolders within
// this current directory and add fake entries to the output
remainingPath := file.NameInArchive

if name != "." {
remainingPath = strings.TrimPrefix(file.NameInArchive, name)
}
nextDir := topDir(remainingPath) // if name in archive is "a/b/c" and root is "a", this becomes "b" (the implied folder to add)
implicitDir := path.Join(name, nextDir) // the full path of the implied directory

// create fake entry only if no entry currently exists (don't overwrite a real entry)
if _, ok := entries[implicitDir]; !ok {
entries[implicitDir] = implicitDirEntry{nextDir}
}

return fs.SkipDir
}

entries[file.NameInArchive] = fs.FileInfoToDirEntry(file)

// don't traverse deeper into subfolders
if file.IsDir() {
return fs.SkipDir
file.NameInArchive = strings.Trim(file.NameInArchive, "/")
files = append(files, file)
if file.NameInArchive == name && !file.IsDir() {
foundFile = true
return errStopWalk
}

return nil
}

Expand All @@ -512,17 +587,29 @@ func (f ArchiveFS) ReadDir(name string) ([]fs.DirEntry, error) {
}

err = f.Format.Extract(f.context(), inputStream, filter, handler)
if foundFile {
return nil, &fs.PathError{Op: "readdir", Path: name, Err: errors.New("not a dir")}
}
if err != nil {
return nil, err
}

// convert map to slice
entriesSlice := make([]fs.DirEntry, 0, len(entries))
for _, ent := range entries {
entriesSlice = append(entriesSlice, ent)
// always find all implicit directories
files = fillImplicit(files)
// and return early for dot file
if name == "." {
return openReadDir(name, files), nil
}

return entriesSlice, nil
file := search(name, files)
if file == nil {
return nil, fs.ErrNotExist
}

if !file.IsDir() {
return nil, &fs.PathError{Op: "readdir", Path: name, Err: errors.New("not a dir")}
}
return openReadDir(name, files), nil
}

// Sub returns an FS corresponding to the subtree rooted at dir.
Expand Down
Loading

0 comments on commit 62ea369

Please sign in to comment.