Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
c15ae29
Enable Expression Parsing in CatalysQl
hvanhovell Jan 7, 2016
cd7f8ec
Enable Expression Parsing in CatalysQl
hvanhovell Jan 7, 2016
682df13
Merge remote-tracking branch 'spark/master' into SPARK-12576
hvanhovell Jan 7, 2016
7f37d81
Add tests
hvanhovell Jan 7, 2016
c2b35b7
Fix a few parser bugs. Address rxin's comments.
hvanhovell Jan 7, 2016
b070bf9
Fix HIveQlSuite
hvanhovell Jan 8, 2016
bc0e298
Make name more consistent. Remove dead clause.
hvanhovell Jan 8, 2016
17d6da0
Replace existing SQL parser with the new Parser
hvanhovell Jan 10, 2016
ebe7d90
Merge remote-tracking branch 'spark/master' into SPARK-12575-2
hvanhovell Jan 10, 2016
5b19b8a
Merge remote-tracking branch 'spark/master' into SPARK-12575-2
hvanhovell Jan 10, 2016
e1de29f
Change tests using Approximate
hvanhovell Jan 10, 2016
d5c2898
Align CatalystQl behavior with the old SparkSQLParser.
hvanhovell Jan 11, 2016
3111ffb
Merge remote-tracking branch 'spark/master' into SPARK-12576
hvanhovell Jan 11, 2016
beb5ca0
Comment string improvement.
hvanhovell Jan 11, 2016
0592b8d
Merge branch 'SPARK-12576' into SPARK-12575-2
hvanhovell Jan 11, 2016
9a3d716
Merge remote-tracking branch 'spark/master' into SPARK-12575-2
hvanhovell Jan 12, 2016
3f73287
Fix nested unary expressions.
hvanhovell Jan 12, 2016
514ba3b
Add Long type
hvanhovell Jan 12, 2016
ea01c5a
Do not use keywords in query/
hvanhovell Jan 12, 2016
155aa44
Identifier names cannot start with an _ in order to avoid confusion w…
hvanhovell Jan 12, 2016
67b1386
Remove charset literal. Improve interval handling.
hvanhovell Jan 13, 2016
02dc7dd
Make tests pass. Improve integration.
hvanhovell Jan 13, 2016
5eea11d
Merge remote-tracking branch 'spark/master' into SPARK-12575-2
hvanhovell Jan 13, 2016
179c5d9
Style
hvanhovell Jan 13, 2016
2b6a876
Revert visibility of parse method.
hvanhovell Jan 13, 2016
8ea9865
Fix bug, and remove some not-yet-used functionality from the parser.
hvanhovell Jan 14, 2016
e8c0813
Fix python test.
hvanhovell Jan 14, 2016
7e31ee8
Remove CharSet literal. Change Decimal default to Double. Improve exp…
hvanhovell Jan 15, 2016
5aa780f
Revert inputTypes change in HyperLogLogPlusPlus
hvanhovell Jan 15, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions python/pyspark/sql/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -1081,8 +1081,7 @@ def test_replace(self):
def test_capture_analysis_exception(self):
self.assertRaises(AnalysisException, lambda: self.sqlCtx.sql("select abc"))
self.assertRaises(AnalysisException, lambda: self.df.selectExpr("a + b"))
# RuntimeException should not be captured
self.assertRaises(py4j.protocol.Py4JJavaError, lambda: self.sqlCtx.sql("abc"))
self.assertRaises(AnalysisException, lambda: self.sqlCtx.sql("abc"))

def test_capture_illegalargument_exception(self):
self.assertRaisesRegexp(IllegalArgumentException, "Setting negative mapred.reduce.tasks",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,6 @@ constant
| SmallintLiteral
| TinyintLiteral
| DecimalLiteral
| charSetStringLiteral
| booleanValue
;

Expand All @@ -132,13 +131,6 @@ stringLiteralSequence
StringLiteral StringLiteral+ -> ^(TOK_STRINGLITERALSEQUENCE StringLiteral StringLiteral+)
;

charSetStringLiteral
@init { gParent.pushMsg("character string literal", state); }
@after { gParent.popMsg(state); }
:
csName=CharSetName csLiteral=CharSetLiteral -> ^(TOK_CHARSETLITERAL $csName $csLiteral)
;

dateLiteral
:
KW_DATE StringLiteral ->
Expand All @@ -163,22 +155,38 @@ timestampLiteral

intervalLiteral
:
KW_INTERVAL StringLiteral qualifiers=intervalQualifiers ->
{
adaptor.create($qualifiers.tree.token.getType(), $StringLiteral.text)
(KW_INTERVAL intervalConstant KW_YEAR KW_TO KW_MONTH) => KW_INTERVAL intervalConstant KW_YEAR KW_TO KW_MONTH
-> ^(TOK_INTERVAL_YEAR_MONTH_LITERAL intervalConstant)
| (KW_INTERVAL intervalConstant KW_DAY KW_TO KW_SECOND) => KW_INTERVAL intervalConstant KW_DAY KW_TO KW_SECOND
-> ^(TOK_INTERVAL_DAY_TIME_LITERAL intervalConstant)
| KW_INTERVAL
((intervalConstant KW_YEAR)=> year=intervalConstant KW_YEAR)?
((intervalConstant KW_MONTH)=> month=intervalConstant KW_MONTH)?
((intervalConstant KW_WEEK)=> week=intervalConstant KW_WEEK)?
((intervalConstant KW_DAY)=> day=intervalConstant KW_DAY)?
((intervalConstant KW_HOUR)=> hour=intervalConstant KW_HOUR)?
((intervalConstant KW_MINUTE)=> minute=intervalConstant KW_MINUTE)?
((intervalConstant KW_SECOND)=> second=intervalConstant KW_SECOND)?
(millisecond=intervalConstant KW_MILLISECOND)?
(microsecond=intervalConstant KW_MICROSECOND)?
-> ^(TOK_INTERVAL
^(TOK_INTERVAL_YEAR_LITERAL $year?)
^(TOK_INTERVAL_MONTH_LITERAL $month?)
^(TOK_INTERVAL_WEEK_LITERAL $week?)
^(TOK_INTERVAL_DAY_LITERAL $day?)
^(TOK_INTERVAL_HOUR_LITERAL $hour?)
^(TOK_INTERVAL_MINUTE_LITERAL $minute?)
^(TOK_INTERVAL_SECOND_LITERAL $second?)
^(TOK_INTERVAL_MILLISECOND_LITERAL $millisecond?)
^(TOK_INTERVAL_MICROSECOND_LITERAL $microsecond?))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are these copied from hive?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No this is what was supported in the old SqlParser.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are you trying to support both hive's and our interval literal grammar?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hive does not support multi time unit interval, such as: 1 year 3 month 10 milliseconds

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are you trying to support both hive's and our interval literal grammar?

In this case I am trying to do support both. Our interval grammar can be seen as an extention to hive's interval grammar.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 on supporting both. actually we have to here.

;

intervalConstant
:
sign=(MINUS|PLUS)? value=Number -> {
adaptor.create(Number, ($sign != null ? $sign.getText() : "") + $value.getText())
}
;

intervalQualifiers
:
KW_YEAR KW_TO KW_MONTH -> TOK_INTERVAL_YEAR_MONTH_LITERAL
| KW_DAY KW_TO KW_SECOND -> TOK_INTERVAL_DAY_TIME_LITERAL
| KW_YEAR -> TOK_INTERVAL_YEAR_LITERAL
| KW_MONTH -> TOK_INTERVAL_MONTH_LITERAL
| KW_DAY -> TOK_INTERVAL_DAY_LITERAL
| KW_HOUR -> TOK_INTERVAL_HOUR_LITERAL
| KW_MINUTE -> TOK_INTERVAL_MINUTE_LITERAL
| KW_SECOND -> TOK_INTERVAL_SECOND_LITERAL
| StringLiteral
;

expression
Expand Down Expand Up @@ -219,7 +227,8 @@ nullCondition

precedenceUnaryPrefixExpression
:
(precedenceUnaryOperator^)* precedenceFieldExpression
(precedenceUnaryOperator+)=> precedenceUnaryOperator^ precedenceUnaryPrefixExpression
| precedenceFieldExpression
;

precedenceUnarySuffixExpression
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,11 +206,8 @@ tableName
@init { gParent.pushMsg("table name", state); }
@after { gParent.popMsg(state); }
:
db=identifier DOT tab=identifier
-> ^(TOK_TABNAME $db $tab)
|
tab=identifier
-> ^(TOK_TABNAME $tab)
id1=identifier (DOT id2=identifier)?
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is this change?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They are semantically equal. The old one is a bit longer. That is all.

-> ^(TOK_TABNAME $id1 $id2?)
;

viewName
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -307,12 +307,12 @@ KW_AUTHORIZATION: 'AUTHORIZATION';
KW_CONF: 'CONF';
KW_VALUES: 'VALUES';
KW_RELOAD: 'RELOAD';
KW_YEAR: 'YEAR';
KW_MONTH: 'MONTH';
KW_DAY: 'DAY';
KW_HOUR: 'HOUR';
KW_MINUTE: 'MINUTE';
KW_SECOND: 'SECOND';
KW_YEAR: 'YEAR'|'YEARS';
KW_MONTH: 'MONTH'|'MONTHS';
KW_DAY: 'DAY'|'DAYS';
KW_HOUR: 'HOUR'|'HOURS';
KW_MINUTE: 'MINUTE'|'MINUTES';
KW_SECOND: 'SECOND'|'SECONDS';
KW_START: 'START';
KW_TRANSACTION: 'TRANSACTION';
KW_COMMIT: 'COMMIT';
Expand All @@ -324,6 +324,9 @@ KW_ISOLATION: 'ISOLATION';
KW_LEVEL: 'LEVEL';
KW_SNAPSHOT: 'SNAPSHOT';
KW_AUTOCOMMIT: 'AUTOCOMMIT';
KW_WEEK: 'WEEK'|'WEEKS';
KW_MILLISECOND: 'MILLISECOND'|'MILLISECONDS';
KW_MICROSECOND: 'MICROSECOND'|'MICROSECONDS';

// Operators
// NOTE: if you add a new function/operator, add it to sysFuncNames so that describe function _FUNC_ will work.
Expand Down Expand Up @@ -400,12 +403,6 @@ StringLiteral
)+
;

CharSetLiteral
:
StringLiteral
| '0' 'X' (HexDigit|Digit)+
;

BigintLiteral
:
(Digit)+ 'L'
Expand Down Expand Up @@ -433,7 +430,7 @@ ByteLengthLiteral

Number
:
(Digit)+ ( DOT (Digit)* (Exponent)? | Exponent)?
((Digit+ (DOT Digit*)?) | (DOT Digit+)) Exponent?
;

/*
Expand All @@ -456,10 +453,10 @@ An Identifier can be:
- macro name
- hint name
- window name
*/
*/
Identifier
:
(Letter | Digit) (Letter | Digit | '_')*
(Letter | Digit | '_')+
| {allowQuotedId()}? QuotedIdentifier /* though at the language level we allow all Identifiers to be QuotedIdentifiers;
at the API level only columns are allowed to be of this form */
| '`' RegexComponent+ '`'
Expand All @@ -471,11 +468,6 @@ QuotedIdentifier
'`' ( '``' | ~('`') )* '`' { setText(getText().substring(1, getText().length() -1 ).replaceAll("``", "`")); }
;

CharSetName
:
'_' (Letter | Digit | '_' | '-' | '.' | ':' )+
;

WS : (' '|'\r'|'\t'|'\n') {$channel=HIDDEN;}
;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,16 +116,20 @@ TOK_DATELITERAL;
TOK_DATETIME;
TOK_TIMESTAMP;
TOK_TIMESTAMPLITERAL;
TOK_INTERVAL;
TOK_INTERVAL_YEAR_MONTH;
TOK_INTERVAL_YEAR_MONTH_LITERAL;
TOK_INTERVAL_DAY_TIME;
TOK_INTERVAL_DAY_TIME_LITERAL;
TOK_INTERVAL_YEAR_LITERAL;
TOK_INTERVAL_MONTH_LITERAL;
TOK_INTERVAL_WEEK_LITERAL;
TOK_INTERVAL_DAY_LITERAL;
TOK_INTERVAL_HOUR_LITERAL;
TOK_INTERVAL_MINUTE_LITERAL;
TOK_INTERVAL_SECOND_LITERAL;
TOK_INTERVAL_MILLISECOND_LITERAL;
TOK_INTERVAL_MICROSECOND_LITERAL;
TOK_STRING;
TOK_CHAR;
TOK_VARCHAR;
Expand Down Expand Up @@ -228,7 +232,6 @@ TOK_TMP_FILE;
TOK_TABSORTCOLNAMEASC;
TOK_TABSORTCOLNAMEDESC;
TOK_STRINGLITERALSEQUENCE;
TOK_CHARSETLITERAL;
TOK_CREATEFUNCTION;
TOK_DROPFUNCTION;
TOK_RELOADFUNCTION;
Expand Down Expand Up @@ -509,7 +512,9 @@ import java.util.HashMap;
xlateMap.put("KW_UPDATE", "UPDATE");
xlateMap.put("KW_VALUES", "VALUES");
xlateMap.put("KW_PURGE", "PURGE");

xlateMap.put("KW_WEEK", "WEEK");
xlateMap.put("KW_MILLISECOND", "MILLISECOND");
xlateMap.put("KW_MICROSECOND", "MICROSECOND");

// Operators
xlateMap.put("DOT", ".");
Expand Down Expand Up @@ -2078,6 +2083,7 @@ primitiveType
| KW_SMALLINT -> TOK_SMALLINT
| KW_INT -> TOK_INT
| KW_BIGINT -> TOK_BIGINT
| KW_LONG -> TOK_BIGINT
| KW_BOOLEAN -> TOK_BOOLEAN
| KW_FLOAT -> TOK_FLOAT
| KW_DOUBLE -> TOK_DOUBLE
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,46 +18,17 @@

package org.apache.spark.sql.catalyst.parser;

import java.io.UnsupportedEncodingException;

/**
* A couple of utility methods that help with parsing ASTs.
*
* Both methods in this class were take from the SemanticAnalyzer in Hive:
* The 'unescapeSQLString' method in this class was take from the SemanticAnalyzer in Hive:
* ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
*/
public final class ParseUtils {
private ParseUtils() {
super();
}

public static String charSetString(String charSetName, String charSetString)
throws UnsupportedEncodingException {
// The character set name starts with a _, so strip that
charSetName = charSetName.substring(1);
if (charSetString.charAt(0) == '\'') {
return new String(unescapeSQLString(charSetString).getBytes(), charSetName);
} else // hex input is also supported
{
assert charSetString.charAt(0) == '0';
assert charSetString.charAt(1) == 'x';
charSetString = charSetString.substring(2);

byte[] bArray = new byte[charSetString.length() / 2];
int j = 0;
for (int i = 0; i < charSetString.length(); i += 2) {
int val = Character.digit(charSetString.charAt(i), 16) * 16
+ Character.digit(charSetString.charAt(i + 1), 16);
if (val > 127) {
val = val - 256;
}
bArray[j++] = (byte)val;
}

return new String(bArray, charSetName);
}
}

private static final int[] multiplier = new int[] {1000, 100, 10, 1};

@SuppressWarnings("nls")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ import scala.util.parsing.input.CharArrayReader.EofCh
import org.apache.spark.sql.catalyst.plans.logical._

private[sql] abstract class AbstractSparkSQLParser
extends StandardTokenParsers with PackratParsers {
extends StandardTokenParsers with PackratParsers with ParserDialect {

def parse(input: String): LogicalPlan = synchronized {
def parsePlan(input: String): LogicalPlan = synchronized {
// Initialize the Keywords.
initLexical
phrase(start)(new lexical.Scanner(input)) match {
Expand Down
Loading