Skip to content

Commit

Permalink
go/token: add IsIdentifier, IsKeyword, and IsExported
Browse files Browse the repository at this point in the history
Telling whether a string is a valid Go identifier can seem like an easy
task, but it's easy to forget about the edge cases. For example, some
implementations out there forget that an empty string or keywords like
"func" aren't valid identifiers.

Add a simple implementation with proper Unicode support, and start using
it in cmd/cover and cmd/doc. Other pieces of the standard library
reimplement part of this logic, but don't use a "func(string) bool"
signature, so we're leaving them untouched for now.

Add some tests too, to ensure that we actually got these edge cases
correctly.

Since telling whether a string is a valid identifier requires knowing
that it's not a valid keyword, add IsKeyword too. The internal map was
already accessible via Lookup, but "Lookup(str) != IDENT" isn't as easy
to understand as IsKeyword(str). And, as per Josh's suggestion, we could
have IsKeyword (and probably Lookup too) use a perfect hash function
instead of a global map.

Finally, for consistency with these new functions, add IsExported. That
makes go/ast.IsExported a bit redundant, so perhaps it can be deprecated
in favor of go/token.IsExported in the future. Clarify that
token.IsExported doesn't imply token.IsIdentifier, to avoid ambiguity.

Fixes #30064.

Change-Id: I0e0e49215fd7e47b603ebc2b5a44086c51ba57f7
Reviewed-on: https://go-review.googlesource.com/c/go/+/169018
Run-TryBot: Daniel Martí <mvdan@mvdan.cc>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Robert Griesemer <gri@golang.org>
Reviewed-by: Alan Donovan <adonovan@google.com>
  • Loading branch information
mvdan authored and griesemer committed Apr 15, 2019
1 parent a01d108 commit 60a8dbf
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 46 deletions.
19 changes: 1 addition & 18 deletions src/cmd/cover/cover.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ import (
"log"
"os"
"sort"
"unicode"

"cmd/internal/edit"
"cmd/internal/objabi"
Expand Down Expand Up @@ -117,7 +116,7 @@ func parseFlags() error {
return fmt.Errorf("too many options")
}

if *varVar != "" && !isValidIdentifier(*varVar) {
if *varVar != "" && !token.IsIdentifier(*varVar) {
return fmt.Errorf("-var: %q is not a valid identifier", *varVar)
}

Expand Down Expand Up @@ -685,22 +684,6 @@ func (f *File) addVariables(w io.Writer) {
}
}

func isValidIdentifier(ident string) bool {
if len(ident) == 0 {
return false
}
for i, c := range ident {
if i > 0 && unicode.IsDigit(c) {
continue
}
if c == '_' || unicode.IsLetter(c) {
continue
}
return false
}
return true
}

// It is possible for positions to repeat when there is a line
// directive that does not specify column information and the input
// has not been passed through gofmt.
Expand Down
23 changes: 7 additions & 16 deletions src/cmd/doc/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ import (
"flag"
"fmt"
"go/build"
"go/token"
"io"
"log"
"os"
Expand Down Expand Up @@ -333,28 +334,18 @@ func parseSymbol(str string) (symbol, method string) {
case 1:
case 2:
method = elem[1]
isIdentifier(method)
if !token.IsIdentifier(method) {
log.Fatalf("invalid identifier %q", method)
}
default:
log.Printf("too many periods in symbol specification")
usage()
}
symbol = elem[0]
isIdentifier(symbol)
return
}

// isIdentifier checks that the name is valid Go identifier, and
// logs and exits if it is not.
func isIdentifier(name string) {
if len(name) == 0 {
log.Fatal("empty symbol")
}
for i, ch := range name {
if unicode.IsLetter(ch) || ch == '_' || i > 0 && unicode.IsDigit(ch) {
continue
}
log.Fatalf("invalid identifier %q", name)
if !token.IsIdentifier(symbol) {
log.Fatalf("invalid identifier %q", symbol)
}
return
}

// isExported reports whether the name is an exported identifier.
Expand Down
15 changes: 4 additions & 11 deletions src/go/ast/ast.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ package ast
import (
"go/token"
"strings"
"unicode"
"unicode/utf8"
)

// ----------------------------------------------------------------------------
Expand Down Expand Up @@ -523,18 +521,13 @@ func (*ChanType) exprNode() {}
//
func NewIdent(name string) *Ident { return &Ident{token.NoPos, name, nil} }

// IsExported reports whether name is an exported Go symbol
// (that is, whether it begins with an upper-case letter).
// IsExported reports whether name starts with an upper-case letter.
//
func IsExported(name string) bool {
ch, _ := utf8.DecodeRuneInString(name)
return unicode.IsUpper(ch)
}
func IsExported(name string) bool { return token.IsExported(name) }

// IsExported reports whether id is an exported Go symbol
// (that is, whether it begins with an uppercase letter).
// IsExported reports whether id starts with an upper-case letter.
//
func (id *Ident) IsExported() bool { return IsExported(id.Name) }
func (id *Ident) IsExported() bool { return token.IsExported(id.Name) }

func (id *Ident) String() string {
if id != nil {
Expand Down
34 changes: 33 additions & 1 deletion src/go/token/token.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@
//
package token

import "strconv"
import (
"strconv"
"unicode"
"unicode/utf8"
)

// Token is the set of lexical tokens of the Go programming language.
type Token int
Expand Down Expand Up @@ -306,3 +310,31 @@ func (tok Token) IsOperator() bool { return operator_beg < tok && tok < operator
// it returns false otherwise.
//
func (tok Token) IsKeyword() bool { return keyword_beg < tok && tok < keyword_end }

// IsExported reports whether name starts with an upper-case letter.
//
func IsExported(name string) bool {
ch, _ := utf8.DecodeRuneInString(name)
return unicode.IsUpper(ch)
}

// IsKeyword reports whether name is a Go keyword, such as "func" or "return".
//
func IsKeyword(name string) bool {
// TODO: opt: use a perfect hash function instead of a global map.
_, ok := keywords[name]
return ok
}

// IsIdentifier reports whether name is a Go identifier, that is, a non-empty
// string made up of letters, digits, and underscores, where the first character
// is not a digit. Keywords are not identifiers.
//
func IsIdentifier(name string) bool {
for i, c := range name {
if !unicode.IsLetter(c) && c != '_' && (i == 0 || !unicode.IsDigit(c)) {
return false
}
}
return name != "" && !IsKeyword(name)
}
33 changes: 33 additions & 0 deletions src/go/token/token_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package token

import "testing"

func TestIsIdentifier(t *testing.T) {
tests := []struct {
name string
in string
want bool
}{
{"Empty", "", false},
{"Space", " ", false},
{"SpaceSuffix", "foo ", false},
{"Number", "123", false},
{"Keyword", "func", false},

{"LettersASCII", "foo", true},
{"MixedASCII", "_bar123", true},
{"UppercaseKeyword", "Func", true},
{"LettersUnicode", "fóö", true},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
if got := IsIdentifier(test.in); got != test.want {
t.Fatalf("IsIdentifier(%q) = %t, want %v", test.in, got, test.want)
}
})
}
}

0 comments on commit 60a8dbf

Please sign in to comment.