From 982ca39c8ca49ded28bafcea9338c99d68a022bc Mon Sep 17 00:00:00 2001 From: Dominic Della Valle Date: Mon, 26 Mar 2018 19:54:08 -0400 Subject: [PATCH] squashme: more rules on reserved names --- extractor.go | 2 +- sanitize_windows.go | 33 ++++++++++++++++++++------------- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/extractor.go b/extractor.go index dac06a0..4a0d4ac 100644 --- a/extractor.go +++ b/extractor.go @@ -65,7 +65,7 @@ func (te *Extractor) Extract(reader io.Reader) error { func (te *Extractor) outputPath(tarPath string) string { elems := strings.Split(tarPath, "/") // break into elems elems = elems[1:] // remove IPFS root - safePath := platformSanitize(elems) // assure that our output path is platform legal + safePath := platformSanitize(elems) // sanitize IPFS path to be platform legal safePath = fp.Join(te.Path, safePath) // rebase on to extraction target root return safePath } diff --git a/sanitize_windows.go b/sanitize_windows.go index c96923a..ab2f3ac 100644 --- a/sanitize_windows.go +++ b/sanitize_windows.go @@ -1,6 +1,7 @@ package tar import ( + "fmt" "net/url" "path/filepath" "regexp" @@ -8,38 +9,44 @@ import ( ) //https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx +const reservedNamesRegexFmt = `(?i)^(%s)(?: *\.)?[^\w ]` //Case insensitive match reserved name, with or without whitespace between possible dot, but reserved word + non whitespace and non-dot is allowed var reservedNames = [...]string{"CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9"} +var reservedCharsRegex *regexp.Regexp -const reservedCharsRegex = `[<>:"\\|?*]` //NOTE: `/` is not included, files with this in the name will cause problems +func init() { + reservedCharsRegex = regexp.MustCompile(`[<>:"\\|?*]`) //NOTE: `/` is not included, files with this in the name are illegal in IPFS itself +} func platformSanitize(pathElements []string) string { - // first pass: scan and prefix reserved names CON -> _CON - for i, pe := range pathElements { + //first pass: strip illegal tail & prefix reserved names `CON .` -> `_CON` + for pi := range pathElements { + pathElements[pi] = strings.TrimRight(pathElements[pi], ". ") //MSDN: Do not end a file or directory name with a space or a period + for _, rn := range reservedNames { - if pe == rn { - pathElements[i] = "_" + rn + re, _ := regexp.Compile(fmt.Sprintf(reservedNamesRegexFmt, rn)) //no err, regex is a constant with guaranteed constant argument + if matched := re.MatchString(pathElements[pi]); matched { + pathElements[pi] = "_" + pathElements[pi] break } } - pathElements[i] = strings.TrimRight(pe, ". ") //MSDN: Do not end a file or directory name with a space or a period } + //second pass: scan and encode reserved characters ? -> %3F - res := strings.Join(pathElements, `/`) // intentionally avoiding [file]path.Clean(), we want `\`'s intact - re := regexp.MustCompile(reservedCharsRegex) - illegalIndices := re.FindAllStringIndex(res, -1) + res := strings.Join(pathElements, `/`) //intentionally avoiding [file]path.Clean() being called with Join(); we do our own filtering first + illegalIndices := reservedCharsRegex.FindAllStringIndex(res, -1) if illegalIndices != nil { var lastIndex int var builder strings.Builder - allocAssist := (len(res) - len(illegalIndices)) + (len(illegalIndices) * 3) // 3 = encoded length + allocAssist := (len(res) - len(illegalIndices)) + (len(illegalIndices) * 3) //3 = encoded length builder.Grow(allocAssist) for _, si := range illegalIndices { - builder.WriteString(res[lastIndex:si[0]]) // append up to problem char - builder.WriteString(url.QueryEscape(res[si[0]:si[1]])) // escape and append problem char + builder.WriteString(res[lastIndex:si[0]]) //append up to problem char + builder.WriteString(url.QueryEscape(res[si[0]:si[1]])) //escape and append problem char lastIndex = si[1] } - builder.WriteString(res[lastIndex:]) // append remainder + builder.WriteString(res[lastIndex:]) //append remainder res = builder.String() }