Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Disallow problematic identifiers #499

Merged
merged 1 commit into from
Jul 16, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/source/1.0/spec/core/idl.rst
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,12 @@ The Smithy IDL is defined by the following ABNF:
trait_structure_kvp :`node_object_key` `ws` ":" `ws` `node_value`
apply_statement :"apply" `ws` `shape_id` `ws` `trait` `br`

.. rubric:: Shape ID

.. seealso::

Refer to :ref:`shape-id` for the ABNF grammar of shape IDs.


.. _comments:

Expand Down
4 changes: 3 additions & 1 deletion docs/source/1.0/spec/core/model.rst
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,9 @@ Shape IDs are formally defined by the following ABNF:
root_shape_id :`absolute_root_shape_id` / `identifier`
absolute_root_shape_id :`namespace` "#" `identifier`
namespace :`identifier` *("." `identifier`)
identifier :(ALPHA / "_") *(ALPHA / DIGIT / "_")
identifier :identifier_start *identifier_chars
mtdowling marked this conversation as resolved.
Show resolved Hide resolved
identifier_start :*"_" ALPHA
identifier_chars :ALPHA / DIGIT / "_"
shape_id_member :"$" `identifier`

.. rubric:: Best practices for defining shape names
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,22 +147,37 @@ public static void consumeNamespace(SimpleParser parser) {
* Expects and skips over a Smithy identifier production.
*
* <pre>
* identifier = (ALPHA / "_") *(ALPHA / DIGIT / "_")
* identifier = identifier_start *identifier_chars
* identifier_start = *"_" ALPHA
* identifier_chars = ALPHA / DIGIT / "_"
* </pre>
*
* @param parser Parser to consume tokens from.
*/
public static void consumeIdentifier(SimpleParser parser) {
// (ALPHA / "_")
if (!isIdentifierStart(parser.peek())) {
throw parser.syntax("Expected a valid identifier character, but found '"
+ parser.peekSingleCharForMessage() + '\'');
// Parse identifier_start
char c = parser.peek();
if (c == '_') {
parser.consumeUntilNoLongerMatches(next -> next == '_');
if (!ParserUtils.isValidIdentifierCharacter(parser.peek())) {
throw invalidIdentifier(parser);
}
} else if (!isAlphabetic(c)) {
throw invalidIdentifier(parser);
}

// *(ALPHA / DIGIT / "_")
// Skip the first character since it's known to be valid.
parser.skip();

// Parse identifier_chars
parser.consumeUntilNoLongerMatches(ParserUtils::isValidIdentifierCharacter);
}

private static RuntimeException invalidIdentifier(SimpleParser parser) {
throw parser.syntax("Expected a valid identifier character, but found '"
+ parser.peekSingleCharForMessage() + '\'');
}

/**
* Returns true if the given character is allowed in an identifier.
*
Expand All @@ -180,7 +195,7 @@ public static boolean isValidIdentifierCharacter(char c) {
* @return Returns true if the character can start an identifier.
*/
public static boolean isIdentifierStart(char c) {
return c == '_' || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
return c == '_' || isAlphabetic(c);
}

/**
Expand All @@ -192,4 +207,15 @@ public static boolean isIdentifierStart(char c) {
public static boolean isDigit(char c) {
return c >= '0' && c <= '9';
}

/**
* Returns true if the given character is an alphabetic character
* A-Z, a-z. This is a stricter version of {@link Character#isAlphabetic}.
*
* @param c Character to check.
* @return Returns true if the character is an alphabetic character.
*/
public static boolean isAlphabetic(char c) {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -85,29 +85,29 @@ public static ShapeId from(String id) {
public static boolean isValidNamespace(CharSequence namespace) {
if (namespace == null) {
return false;
} else if (namespace.equals(Prelude.NAMESPACE)) {
// Shortcut for prelude namespaces.
return true;
}

// Shortcut for prelude namespaces.
if (namespace.equals(Prelude.NAMESPACE)) {
return true;
int length = namespace.length();
if (length == 0) {
return false;
}

boolean start = true;
for (int position = 0; position < namespace.length(); position++) {
char c = namespace.charAt(position);
if (start) {
start = false;
if (!ParserUtils.isIdentifierStart(c)) {
return false;
}
} else if (c == '.') {
start = true;
} else if (!ParserUtils.isValidIdentifierCharacter(c)) {
int position = 0;
while (true) {
position = parseIdentifier(namespace, position);
if (position == -1) { // Bad: did not parse a valid identifier.
return false;
}
} else if (position == length) { // Good: parsed and reached the end.
return true;
} else if (namespace.charAt(position) != '.') { // Bad: invalid character.
return false;
} else if (++position >= length) { // Bad: trailing '.'
return false;
} // continue parsing after '.', expecting an identifier.
}

return !start;
}

/**
Expand All @@ -117,21 +117,40 @@ public static boolean isValidNamespace(CharSequence namespace) {
* @return Returns true if this is a valid identifier.
*/
public static boolean isValidIdentifier(CharSequence identifier) {
if (identifier == null || identifier.length() == 0) {
return false;
return parseIdentifier(identifier, 0) == identifier.length();
}

private static int parseIdentifier(CharSequence identifier, int offset) {
if (identifier == null || identifier.length() <= offset) {
return -1;
}

if (!ParserUtils.isIdentifierStart(identifier.charAt(0))) {
return false;
// Parse the required identifier_start production.
char startingChar = identifier.charAt(offset);
if (startingChar == '_') {
while (identifier.charAt(offset) == '_') {
offset++;
}
if (!ParserUtils.isValidIdentifierCharacter(identifier.charAt(offset))) {
return -1;
}
offset++;
} else if (!ParserUtils.isAlphabetic(startingChar)) {
return -1;
}

for (int i = 1; i < identifier.length(); i++) {
if (!ParserUtils.isValidIdentifierCharacter(identifier.charAt(i))) {
return false;
// Parse the optional identifier_chars production.
while (offset < identifier.length()) {
if (!ParserUtils.isValidIdentifierCharacter(identifier.charAt(offset))) {
// Return the position of the character that stops the identifier.
// This is either an invalid case (e.g., isValidIdentifier), or
// just the marker needed for isValidNamespace to find '.'.
return offset;
}
offset++;
}

return true;
return offset;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -941,7 +941,8 @@ public void parsesValidVariableAccess() {
"${\nfoo\n}\n",
"${a}",
"${a_b_c}",
"${_}");
"${_a}",
"${__a}");

for (String expr : exprs) {
Selector.parse(expr);
Expand All @@ -956,7 +957,9 @@ public void detectsInvalidVariableAccess() {
"${}",
"$}",
"${a",
"${*}");
"${*}",
"${_}",
"${__}");

for (String expr : exprs) {
Assertions.assertThrows(SelectorSyntaxException.class, () -> Selector.parse(expr));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,6 @@ public static Collection<Object[]> shapeIdData() {
{ "Mixed.case#name", false},
{ "_foo.baz#name", false},
{ "__foo.baz#name", false},
{ "a._.b#name", false},

// invalid namespaces
{ "#name", true},
Expand All @@ -179,6 +178,8 @@ public static Collection<Object[]> shapeIdData() {
{ ".name.space#name", true},
{ "name-space#name", true},
{ "1namespace.foo#name", true},
{ "a._.b#name", true},
{ "a.____.b#name", true},

// valid shape names
{ "ns.foo#shapename", false},
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// Parse error at line 3, column 19 near `#\n`: Expected a valid identifier character, but found '#'

metadata abc = __$#
metadata abc = aa$#

namespace foo.baz
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
// Parse error at line 4, column 11 near `%I`: Expected a valid identifier character, but found '%'
namespace com.foo

structure %Invalid {}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
// Parse error at line 4, column 12 near ` `: Expected a valid identifier character, but found ' '
namespace com.foo

structure _ {}
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ $version: "1.0"

namespace smithy.example

use _$#Bar
use a$#Bar