From ed76c3e7513434e8ca2f6abf51b2c48ee54dfa0b Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Fri, 7 Feb 2020 17:59:47 +0900 Subject: [PATCH 1/4] Fix --- docs/_data/menu-sql.yaml | 11 ++- ...keywords.md => sql-ref-ansi-compliance.md} | 85 ++++++++++++++++++- docs/sql-ref-arithmetic-ops.md | 22 ----- 3 files changed, 92 insertions(+), 26 deletions(-) rename docs/{sql-keywords.md => sql-ref-ansi-compliance.md} (87%) delete mode 100644 docs/sql-ref-arithmetic-ops.md diff --git a/docs/_data/menu-sql.yaml b/docs/_data/menu-sql.yaml index 241ec399d7bd..1e343f630f88 100644 --- a/docs/_data/menu-sql.yaml +++ b/docs/_data/menu-sql.yaml @@ -80,6 +80,15 @@ url: sql-ref-null-semantics.html - text: NaN Semantics url: sql-ref-nan-semantics.html + - text: ANSI Compliance + url: sql-ref-ansi-compliance.html + subitems: + - text: Arithmetic Operations + url: sql-ref-ansi-compliance.html#arithmetic-operations + - text: Type Conversion + url: sql-ref-ansi-compliance.html#type-conversion + - text: SQL Keywords + url: sql-ref-ansi-compliance.html#sql-keywords - text: SQL Syntax url: sql-ref-syntax.html subitems: @@ -214,5 +223,3 @@ url: sql-ref-syntax-aux-resource-mgmt-list-file.html - text: LIST JAR url: sql-ref-syntax-aux-resource-mgmt-list-jar.html - - text: Arithmetic operations - url: sql-ref-arithmetic-ops.html diff --git a/docs/sql-keywords.md b/docs/sql-ref-ansi-compliance.md similarity index 87% rename from docs/sql-keywords.md rename to docs/sql-ref-ansi-compliance.md index 9e4a3c54100c..6fd701224d85 100644 --- a/docs/sql-keywords.md +++ b/docs/sql-ref-ansi-compliance.md @@ -1,7 +1,7 @@ --- layout: global -title: Spark SQL Keywords -displayTitle: Spark SQL Keywords +title: ANSI Compliance +displayTitle: ANSI Compliance license: | Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with @@ -19,6 +19,87 @@ license: | limitations under the License. --- +Spark SQL has two options to comply with the SQL standard: `spark.sql.ansi.enabled` and `spark.sql.storeAssignmentPolicy`. +When `spark.sql.ansi.enabled` is set to `true` (`false` by default), Spark SQL follows the standard in basic behaviours (e.g., arithmetic operations, type conversion, and SQL parsing). +Moreover, Spark SQL has an independent option to control implicit casting behaviours when inserting rows in a table. +The casting behaviours are defined as store assignment rules in the standard. +When `spark.sql.storeAssignmentPolicy` is set to `ANSI`, Spark SQL complies with the ANSI store assignment rules and this setting is enabled by default. + +The following subsections present behaviour changes in arithmetic operations, type conversions, and SQL parsing when the ANSI mode enabled. + +### Arithmetic Operations + +In Spark SQL, arithmetic operations performed on numeric types (with the exception of decimal) are not checked for overflow by default. +This means that in case an operation causes an overflow, the result is the same that the same operation returns in a Java/Scala program (eg. if the sum of 2 integers is higher than the maximum value representable, the result is a negative number). +On the other hand, Spark SQL returns null for decimal overflow. +When `spark.sql.ansi.enabled` is set to `true` and overflow occurs in numeric and interval arithmetic operations, it throws an arithmetic exception at runtime. + +{% highlight sql %} +-- `spark.sql.ansi.enabled=true` +SELECT 2147483647 + 1; + + java.lang.ArithmeticException: integer overflow + +-- `spark.sql.ansi.enabled=false` +SELECT 2147483647 + 1; + + +----------------+ + |(2147483647 + 1)| + +----------------+ + | -2147483648| + +----------------+ + +{% endhighlight %} + +### Type Conversion + +Spark SQL has three kinds of type conversions: explicit casting, type coercion, and store assignment casting. +When `spark.sql.ansi.enabled` is set to `true`, explicit castings by `CAST` syntax throws a number-format exception at runtime for illegal cast patterns defined in the standard, e.g. casts from a string to an integer. +On the other hand, `INSERT INTO` syntax throws an analysis exception when the ANSI mode enabled via `spark.sql.storeAssignmentPolicy=ANSI`. + +Currently, the ANSI mode affects explicit casting and assignment casting only. +In future releases, the behaviour of type coercion might change along with the other two type conversion rules. + +{% highlight sql %} +-- Examples of explicit casting + +-- `spark.sql.ansi.enabled=true` +SELECT CAST('a' AS INT); + + java.lang.NumberFormatException: invalid input syntax for type numeric: a + +-- `spark.sql.ansi.enabled=false` (This is a legacy behaviour until Spark 2.x) +SELECT CAST('a' AS INT); + + +--------------+ + |CAST(a AS INT)| + +--------------+ + | null| + +--------------+ + +-- Examples of store assignment rules +CREATE TABLE t (v INT); + +-- `spark.sql.storeAssignmentPolicy=ANSI` +INSERT INTO t VALUES ('1'); + + org.apache.spark.sql.AnalysisException: Cannot write incompatible data to table '`default`.`t`': + - Cannot safely cast 'v': StringType to IntegerType; + +-- `spark.sql.storeAssignmentPolicy=LEGACY` (This is a legacy behaviour until Spark 2.x) +INSERT INTO t VALUES ('1'); +SELECT * FROM t; + + +---+ + | v| + +---+ + | 1| + +---+ + +{% endhighlight %} + +### SQL Keywords + When `spark.sql.ansi.enabled` is true, Spark SQL will use the ANSI mode parser. In this mode, Spark SQL has two kinds of keywords: * Reserved keywords: Keywords that are reserved and can't be used as identifiers for table, view, column, function, alias, etc. diff --git a/docs/sql-ref-arithmetic-ops.md b/docs/sql-ref-arithmetic-ops.md deleted file mode 100644 index 7bc8ffe31c99..000000000000 --- a/docs/sql-ref-arithmetic-ops.md +++ /dev/null @@ -1,22 +0,0 @@ ---- -layout: global -title: Arithmetic Operations -displayTitle: Arithmetic Operations -license: | - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---- - -Operations performed on numeric types (with the exception of decimal) are not checked for overflow. This means that in case an operation causes an overflow, the result is the same that the same operation returns in a Java/Scala program (eg. if the sum of 2 integers is higher than the maximum value representable, the result is a negative number). From a8681de17a77664245a9ec398e1421adb93b5644 Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Fri, 7 Feb 2020 21:55:12 +0900 Subject: [PATCH 2/4] Update --- docs/sql-ref-ansi-compliance.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index 6fd701224d85..e3223b648ca7 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -23,14 +23,14 @@ Spark SQL has two options to comply with the SQL standard: `spark.sql.ansi.enabl When `spark.sql.ansi.enabled` is set to `true` (`false` by default), Spark SQL follows the standard in basic behaviours (e.g., arithmetic operations, type conversion, and SQL parsing). Moreover, Spark SQL has an independent option to control implicit casting behaviours when inserting rows in a table. The casting behaviours are defined as store assignment rules in the standard. -When `spark.sql.storeAssignmentPolicy` is set to `ANSI`, Spark SQL complies with the ANSI store assignment rules and this setting is enabled by default. +When `spark.sql.storeAssignmentPolicy` is set to `ANSI`, Spark SQL complies with the ANSI store assignment rules and this setting is a default value. The following subsections present behaviour changes in arithmetic operations, type conversions, and SQL parsing when the ANSI mode enabled. ### Arithmetic Operations In Spark SQL, arithmetic operations performed on numeric types (with the exception of decimal) are not checked for overflow by default. -This means that in case an operation causes an overflow, the result is the same that the same operation returns in a Java/Scala program (eg. if the sum of 2 integers is higher than the maximum value representable, the result is a negative number). +This means that in case an operation causes an overflow, the result is the same that the same operation returns in a Java/Scala program (e.g., if the sum of 2 integers is higher than the maximum value representable, the result is a negative number). On the other hand, Spark SQL returns null for decimal overflow. When `spark.sql.ansi.enabled` is set to `true` and overflow occurs in numeric and interval arithmetic operations, it throws an arithmetic exception at runtime. From 6b0d5d62dc407f5ba64dbc4594cec723bfa90daa Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Sun, 9 Feb 2020 09:31:22 +0900 Subject: [PATCH 3/4] Fix --- docs/sql-ref-ansi-compliance.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index e3223b648ca7..ad2940645d79 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -29,10 +29,10 @@ The following subsections present behaviour changes in arithmetic operations, ty ### Arithmetic Operations -In Spark SQL, arithmetic operations performed on numeric types (with the exception of decimal) are not checked for overflow by default. -This means that in case an operation causes an overflow, the result is the same that the same operation returns in a Java/Scala program (e.g., if the sum of 2 integers is higher than the maximum value representable, the result is a negative number). -On the other hand, Spark SQL returns null for decimal overflow. -When `spark.sql.ansi.enabled` is set to `true` and overflow occurs in numeric and interval arithmetic operations, it throws an arithmetic exception at runtime. +In Spark SQL, arithmetic operations performed on numeric types (with the exception of decimal) are not checked for overflows by default. +This means that in case an operation causes overflows, the result is the same that the same operation returns in a Java/Scala program (e.g., if the sum of 2 integers is higher than the maximum value representable, the result is a negative number). +On the other hand, Spark SQL returns null for decimal overflows. +When `spark.sql.ansi.enabled` is set to `true` and an overflow occurs in numeric and interval arithmetic operations, it throws an arithmetic exception at runtime. {% highlight sql %} -- `spark.sql.ansi.enabled=true` @@ -54,7 +54,7 @@ SELECT 2147483647 + 1; ### Type Conversion Spark SQL has three kinds of type conversions: explicit casting, type coercion, and store assignment casting. -When `spark.sql.ansi.enabled` is set to `true`, explicit castings by `CAST` syntax throws a number-format exception at runtime for illegal cast patterns defined in the standard, e.g. casts from a string to an integer. +When `spark.sql.ansi.enabled` is set to `true`, explicit casting by `CAST` syntax throws a number-format exception at runtime for illegal cast patterns defined in the standard, e.g. casts from a string to an integer. On the other hand, `INSERT INTO` syntax throws an analysis exception when the ANSI mode enabled via `spark.sql.storeAssignmentPolicy=ANSI`. Currently, the ANSI mode affects explicit casting and assignment casting only. @@ -68,7 +68,7 @@ SELECT CAST('a' AS INT); java.lang.NumberFormatException: invalid input syntax for type numeric: a --- `spark.sql.ansi.enabled=false` (This is a legacy behaviour until Spark 2.x) +-- `spark.sql.ansi.enabled=false` (This is a default behaviour) SELECT CAST('a' AS INT); +--------------+ From 64b42b4cd919a15d7c3f85b3527a1a916e472558 Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Thu, 13 Feb 2020 07:54:40 +0900 Subject: [PATCH 4/4] Fix --- docs/sql-ref-ansi-compliance.md | 48 ++++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index ad2940645d79..d02383518b96 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -19,11 +19,39 @@ license: | limitations under the License. --- -Spark SQL has two options to comply with the SQL standard: `spark.sql.ansi.enabled` and `spark.sql.storeAssignmentPolicy`. -When `spark.sql.ansi.enabled` is set to `true` (`false` by default), Spark SQL follows the standard in basic behaviours (e.g., arithmetic operations, type conversion, and SQL parsing). +Spark SQL has two options to comply with the SQL standard: `spark.sql.ansi.enabled` and `spark.sql.storeAssignmentPolicy` (See a table below for details). +When `spark.sql.ansi.enabled` is set to `true`, Spark SQL follows the standard in basic behaviours (e.g., arithmetic operations, type conversion, and SQL parsing). Moreover, Spark SQL has an independent option to control implicit casting behaviours when inserting rows in a table. The casting behaviours are defined as store assignment rules in the standard. -When `spark.sql.storeAssignmentPolicy` is set to `ANSI`, Spark SQL complies with the ANSI store assignment rules and this setting is a default value. +When `spark.sql.storeAssignmentPolicy` is set to `ANSI`, Spark SQL complies with the ANSI store assignment rules. + + + + + + + + + + + + + +
Property NameDefaultMeaning
spark.sql.ansi.enabledfalse + When true, Spark tries to conform to the ANSI SQL specification: + 1. Spark will throw a runtime exception if an overflow occurs in any operation on integral/decimal field. + 2. Spark will forbid using the reserved keywords of ANSI SQL as identifiers in the SQL parser. +
spark.sql.storeAssignmentPolicyANSI + When inserting a value into a column with different data type, Spark will perform type coercion. + Currently, we support 3 policies for the type coercion rules: ANSI, legacy and strict. With ANSI policy, + Spark performs the type coercion as per ANSI SQL. In practice, the behavior is mostly the same as PostgreSQL. + It disallows certain unreasonable type conversions such as converting string to int or double to boolean. + With legacy policy, Spark allows the type coercion as long as it is a valid Cast, which is very loose. + e.g. converting string to int or double to boolean is allowed. + It is also the only behavior in Spark 2.x and it is compatible with Hive. + With strict policy, Spark doesn't allow any possible precision loss or data truncation in type coercion, + e.g. converting double to int or decimal to double is not allowed. +
The following subsections present behaviour changes in arithmetic operations, type conversions, and SQL parsing when the ANSI mode enabled. @@ -54,7 +82,7 @@ SELECT 2147483647 + 1; ### Type Conversion Spark SQL has three kinds of type conversions: explicit casting, type coercion, and store assignment casting. -When `spark.sql.ansi.enabled` is set to `true`, explicit casting by `CAST` syntax throws a number-format exception at runtime for illegal cast patterns defined in the standard, e.g. casts from a string to an integer. +When `spark.sql.ansi.enabled` is set to `true`, explicit casting by `CAST` syntax throws a runtime exception for illegal cast patterns defined in the standard, e.g. casts from a string to an integer. On the other hand, `INSERT INTO` syntax throws an analysis exception when the ANSI mode enabled via `spark.sql.storeAssignmentPolicy=ANSI`. Currently, the ANSI mode affects explicit casting and assignment casting only. @@ -68,6 +96,10 @@ SELECT CAST('a' AS INT); java.lang.NumberFormatException: invalid input syntax for type numeric: a +SELECT CAST(2147483648L AS INT); + + java.lang.ArithmeticException: Casting 2147483648 to int causes overflow + -- `spark.sql.ansi.enabled=false` (This is a default behaviour) SELECT CAST('a' AS INT); @@ -77,6 +109,14 @@ SELECT CAST('a' AS INT); | null| +--------------+ +SELECT CAST(2147483648L AS INT); + + +-----------------------+ + |CAST(2147483648 AS INT)| + +-----------------------+ + | -2147483648| + +-----------------------+ + -- Examples of store assignment rules CREATE TABLE t (v INT);