Skip to content

Commit

Permalink
helpers: Allow hyphen characters in UnicodeSanitize
Browse files Browse the repository at this point in the history
Improve handling of pre-existing hyphens in input to UnicodeSanitize.
This commit accomplishes three things:

1. Explicitly allow hyphens
2. Avoid appending a hyphen if a preceeding hyphen is found
3. Avoid prepending a hyphen if a trailing hyphen is found

Fixes gohugoio#7288
  • Loading branch information
moorereason committed Nov 28, 2020
1 parent 64789fb commit 0298b11
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 37 deletions.
42 changes: 19 additions & 23 deletions helpers/path.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,21 +25,17 @@ import (
"strings"
"unicode"

"github.com/gohugoio/hugo/common/hugio"
"github.com/gohugoio/hugo/common/text"

"github.com/gohugoio/hugo/config"

"github.com/gohugoio/hugo/hugofs"

"github.com/gohugoio/hugo/common/hugio"
_errors "github.com/pkg/errors"
"github.com/spf13/afero"
)

var (
// ErrThemeUndefined is returned when a theme has not be defined by the user.
ErrThemeUndefined = errors.New("no theme set")
)
// ErrThemeUndefined is returned when a theme has not be defined by the user.
var ErrThemeUndefined = errors.New("no theme set")

// filepathPathBridge is a bridge for common functionality in filepath vs path
type filepathPathBridge interface {
Expand Down Expand Up @@ -130,30 +126,40 @@ func ishex(c rune) bool {

// UnicodeSanitize sanitizes string to be used in Hugo URL's, allowing only
// a predefined set of special Unicode characters.
// If RemovePathAccents configuration flag is enabled, Uniccode accents
// If RemovePathAccents configuration flag is enabled, Unicode accents
// are also removed.
// Spaces will be replaced with a single hyphen, and sequential hyphens will be reduced to one.
// Spaces will be replaced with a single hyphen, and sequential replaced hyphens will be reduced to one.
func (p *PathSpec) UnicodeSanitize(s string) string {
if p.RemovePathAccents {
s = text.RemoveAccentsString(s)
}

source := []rune(s)
target := make([]rune, 0, len(source))
var prependHyphen bool
var (
prependHyphen bool
wasHyphen bool
)

for i, r := range source {
isAllowed := r == '.' || r == '/' || r == '\\' || r == '_' || r == '#' || r == '+' || r == '~'
isAllowed := r == '.' || r == '/' || r == '\\' || r == '_' || r == '#' || r == '+' || r == '~' || r == '-'
isAllowed = isAllowed || unicode.IsLetter(r) || unicode.IsDigit(r) || unicode.IsMark(r)
isAllowed = isAllowed || (r == '%' && i+2 < len(source) && ishex(source[i+1]) && ishex(source[i+2]))

if isAllowed {
// track explicit hyphen in input; no need to add a new hyphen if
// we just saw one.
wasHyphen = r == '-'

if prependHyphen {
target = append(target, '-')
// if currently have a hyphen, don't prepend an extra one
if !wasHyphen {
target = append(target, '-')
}
prependHyphen = false
}
target = append(target, r)
} else if len(target) > 0 && (r == '-' || unicode.IsSpace(r)) {
} else if len(target) > 0 && unicode.IsSpace(r) && !wasHyphen {
prependHyphen = true
}
}
Expand All @@ -169,7 +175,6 @@ func ReplaceExtension(path string, newExt string) string {
}

func makePathRelative(inPath string, possibleDirectories ...string) (string, error) {

for _, currentPath := range possibleDirectories {
if strings.HasPrefix(inPath, currentPath) {
return strings.TrimPrefix(inPath, currentPath), nil
Expand Down Expand Up @@ -281,7 +286,6 @@ func fileAndExt(in string, b filepathPathBridge) (name string, ext string) {
}

func extractFilename(in, ext, base, pathSeparator string) (name string) {

// No file name cases. These are defined as:
// 1. any "in" path that ends in a pathSeparator
// 2. any "base" consisting of just an pathSeparator
Expand All @@ -299,7 +303,6 @@ func extractFilename(in, ext, base, pathSeparator string) (name string) {
name = base
}
return

}

// GetRelativePath returns the relative path of a given path.
Expand Down Expand Up @@ -474,21 +477,18 @@ func ExtractRootPaths(paths []string) []string {
r[i] = root
}
return r

}

// FindCWD returns the current working directory from where the Hugo
// executable is run.
func FindCWD() (string, error) {
serverFile, err := filepath.Abs(os.Args[0])

if err != nil {
return "", fmt.Errorf("can't get absolute path for executable: %v", err)
}

path := filepath.Dir(serverFile)
realFile, err := filepath.EvalSymlinks(serverFile)

if err != nil {
if _, err = os.Stat(serverFile + ".exe"); err == nil {
realFile = filepath.Clean(serverFile + ".exe")
Expand Down Expand Up @@ -516,7 +516,6 @@ func SymbolicWalk(fs afero.Fs, root string, walker hugofs.WalkFunc) error {
})

return w.Walk()

}

// LstatIfPossible can be used to call Lstat if possible, else Stat.
Expand Down Expand Up @@ -555,7 +554,6 @@ func OpenFilesForWriting(fs afero.Fs, filenames ...string) (io.WriteCloser, erro
}

return hugio.NewMultiWriteCloser(writeClosers...), nil

}

// OpenFileForWriting opens or creates the given file. If the target directory
Expand Down Expand Up @@ -598,7 +596,6 @@ func GetCacheDir(fs afero.Fs, cfg config.Provider) (string, error) {

// Fall back to a cache in /tmp.
return GetTempDir("hugo_cache", fs), nil

}

func getCacheDir(cfg config.Provider) string {
Expand All @@ -614,7 +611,6 @@ func getCacheDir(cfg config.Provider) string {
// is this project:
// https://github.com/philhawksworth/content-shards/blob/master/gulpfile.js
return "/opt/build/cache/hugo_cache/"

}

// This will fall back to an hugo_cache folder in the tmp dir, which should work fine for most CI
Expand Down
27 changes: 13 additions & 14 deletions helpers/path_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ func TestMakePath(t *testing.T) {
expected string
removeAccents bool
}{
{"dot.slash/backslash\\underscore_pound#plus+hyphen-", "dot.slash/backslash\\underscore_pound#plus+hyphen-", true},
{"abcXYZ0123456789", "abcXYZ0123456789", true},
{"%20 %2", "%20-2", true},
{"dash- space-y zzz -tail", "dash-space-y-zzz-tail", true},
{" Foo bar ", "Foo-bar", true},
{"Foo.Bar/foo_Bar-Foo", "Foo.Bar/foo_Bar-Foo", true},
{"fOO,bar:foobAR", "fOObarfoobAR", true},
Expand All @@ -53,7 +57,7 @@ func TestMakePath(t *testing.T) {
{"a%C3%B1ame", "a%C3%B1ame", false}, // Issue #1292
{"this+is+a+test", "this+is+a+test", false}, // Issue #1290
{"~foo", "~foo", false}, // Issue #2177

{"foo--bar", "foo--bar", true}, // Issue #7288
}

for _, test := range tests {
Expand Down Expand Up @@ -184,7 +188,6 @@ func TestGetDottedRelativePath(t *testing.T) {
for _, f := range []func(string) string{filepath.FromSlash, func(s string) string { return s }} {
doTestGetDottedRelativePath(f, t)
}

}

func doTestGetDottedRelativePath(urlFixer func(string) string, t *testing.T) {
Expand Down Expand Up @@ -422,7 +425,6 @@ func createTempDirWithZeroLengthFiles() (string, error) {
}
// the dir now has one, zero length file in it
return d, nil

}

func createTempDirWithNonZeroLengthFiles() (string, error) {
Expand Down Expand Up @@ -451,7 +453,6 @@ func createTempDirWithNonZeroLengthFiles() (string, error) {

// the dir now has one, zero length file in it
return d, nil

}

func deleteTempDir(d string) {
Expand Down Expand Up @@ -490,7 +491,6 @@ func TestExists(t *testing.T) {
t.Errorf("Test %d failed. Expected %q got %q", i, d.expectedErr, err)
}
}

}

func TestAbsPathify(t *testing.T) {
Expand Down Expand Up @@ -544,7 +544,6 @@ func TestAbsPathify(t *testing.T) {
}
}
}

}

func TestExtNoDelimiter(t *testing.T) {
Expand Down Expand Up @@ -611,15 +610,12 @@ func TestFileAndExt(t *testing.T) {
t.Errorf("Test %d failed. Expected extension %q got %q.", i, d.expectedExt, ext)
}
}

}

func TestPathPrep(t *testing.T) {

}

func TestPrettifyPath(t *testing.T) {

}

func TestExtractAndGroupRootPaths(t *testing.T) {
Expand All @@ -642,16 +638,19 @@ func TestExtractAndGroupRootPaths(t *testing.T) {

// Make sure the original is preserved
c.Assert(in, qt.DeepEquals, inCopy)

}

func TestExtractRootPaths(t *testing.T) {
tests := []struct {
input []string
expected []string
}{{[]string{filepath.FromSlash("a/b"), filepath.FromSlash("a/b/c/"), "b",
filepath.FromSlash("/c/d"), filepath.FromSlash("d/"), filepath.FromSlash("//e//")},
[]string{"a", "a", "b", "c", "d", "e"}}}
}{{
[]string{
filepath.FromSlash("a/b"), filepath.FromSlash("a/b/c/"), "b",
filepath.FromSlash("/c/d"), filepath.FromSlash("d/"), filepath.FromSlash("//e//"),
},
[]string{"a", "a", "b", "c", "d", "e"},
}}

for _, test := range tests {
output := ExtractRootPaths(test.input)
Expand All @@ -667,7 +666,7 @@ func TestFindCWD(t *testing.T) {
expectedErr error
}

//cwd, _ := os.Getwd()
// cwd, _ := os.Getwd()
data := []test{
//{cwd, nil},
// Commenting this out. It doesn't work properly.
Expand Down

0 comments on commit 0298b11

Please sign in to comment.