Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,30 @@

grammar SqlBase;

@members {
/**
* Verify whether current token is a valid decimal token (which contains dot).
* Returns true if the character that follows the token is not a digit or letter or underscore.
*
* For example:
* For char stream "2.3", "2." is not a valid decimal token, because it is followed by digit '3'.
* For char stream "2.3_", "2.3" is not a valid decimal token, because it is followed by '_'.
* For char stream "2.3W", "2.3" is not a valid decimal token, because it is followed by 'W'.
* For char stream "12.0D 34.E2+0.12 " 12.0D is a valid decimal token because it is folllowed
* by a space. 34.E2 is a valid decimal token because it is followed by symbol '+'
* which is not a digit or letter or underscore.
*/
public boolean isValidDecimal() {
int nextChar = _input.LA(1);
if (nextChar >= 'A' && nextChar <= 'Z' || nextChar >= '0' && nextChar <= '9' ||
Copy link
Contributor

@hvanhovell hvanhovell Sep 14, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are basically checking the IDENTIFIER rule here by hand.

You could also write:

return !(nextChar >= 'A' && nextChar <= 'Z' ||
         nextChar >= '0' && nextChar <= '9' ||
         nextChar == '_');

nextChar == '_') {
return false;
} else {
return true;
}
}
}

tokens {
DELIMITER
}
Expand Down Expand Up @@ -917,23 +941,22 @@ INTEGER_VALUE
;

DECIMAL_VALUE
: DIGIT+ '.' DIGIT*
| '.' DIGIT+
: DECIMAL_DIGITS {isValidDecimal()}?
;

SCIENTIFIC_DECIMAL_VALUE
: DIGIT+ ('.' DIGIT*)? EXPONENT
| '.' DIGIT+ EXPONENT
: DIGIT+ EXPONENT
| DECIMAL_DIGITS EXPONENT {isValidDecimal()}?
;

DOUBLE_LITERAL
:
(INTEGER_VALUE | DECIMAL_VALUE | SCIENTIFIC_DECIMAL_VALUE) 'D'
: DIGIT+ EXPONENT? 'D'
| DECIMAL_DIGITS EXPONENT? 'D' {isValidDecimal()}?
;

BIGDECIMAL_LITERAL
:
(INTEGER_VALUE | DECIMAL_VALUE | SCIENTIFIC_DECIMAL_VALUE) 'BD'
: DIGIT+ EXPONENT? 'BD'
| DECIMAL_DIGITS EXPONENT? 'BD' {isValidDecimal()}?
;

IDENTIFIER
Expand All @@ -944,6 +967,11 @@ BACKQUOTED_IDENTIFIER
: '`' ( ~'`' | '``' )* '`'
;

fragment DECIMAL_DIGITS
: DIGIT+ '.' DIGIT*
| '.' DIGIT+
;

fragment EXPONENT
: 'E' [+-]? DIGIT+
;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ package org.apache.spark.sql.catalyst.parser

import java.sql.{Date, Timestamp}

import org.apache.spark.sql.catalyst.FunctionIdentifier
import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, _}
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans.PlanTest
Expand Down Expand Up @@ -518,4 +518,17 @@ class ExpressionParserSuite extends PlanTest {
assertEqual("current_date", CurrentDate())
assertEqual("current_timestamp", CurrentTimestamp())
}

test("SPARK-17364, fully qualified column name which starts with number") {
assertEqual("123_", UnresolvedAttribute("123_"))
assertEqual("1a.123_", UnresolvedAttribute("1a.123_"))
// ".123" should not be treated as token of type DECIMAL_VALUE
assertEqual("a.123A", UnresolvedAttribute("a.123A"))
// ".123E3" should not be treated as token of type SCIENTIFIC_DECIMAL_VALUE
assertEqual("a.123E3_column", UnresolvedAttribute("a.123E3_column"))
// ".123D" should not be treated as token of type DOUBLE_LITERAL
assertEqual("a.123D_column", UnresolvedAttribute("a.123D_column"))
// ".123BD" should not be treated as token of type BIGDECIMAL_LITERAL
assertEqual("a.123BD_column", UnresolvedAttribute("a.123BD_column"))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -91,4 +91,17 @@ class TableIdentifierParserSuite extends SparkFunSuite {
assert(TableIdentifier(nonReserved) === parseTableIdentifier(nonReserved))
}
}

test("SPARK-17364 table identifier - contains number") {
assert(parseTableIdentifier("123_") == TableIdentifier("123_"))
assert(parseTableIdentifier("1a.123_") == TableIdentifier("123_", Some("1a")))
// ".123" should not be treated as token of type DECIMAL_VALUE
assert(parseTableIdentifier("a.123A") == TableIdentifier("123A", Some("a")))
// ".123E3" should not be treated as token of type SCIENTIFIC_DECIMAL_VALUE
assert(parseTableIdentifier("a.123E3_LIST") == TableIdentifier("123E3_LIST", Some("a")))
// ".123D" should not be treated as token of type DOUBLE_LITERAL
assert(parseTableIdentifier("a.123D_LIST") == TableIdentifier("123D_LIST", Some("a")))
// ".123BD" should not be treated as token of type BIGDECIMAL_LITERAL
assert(parseTableIdentifier("a.123BD_LIST") == TableIdentifier("123BD_LIST", Some("a")))
}
}