update

cloud-fan · cloud-fan · commit b21157db8596 · 2020-11-24T01:42:53.000+08:00
diff --git a/docs/sql-ref-datatypes.md b/docs/sql-ref-datatypes.md
@@ -37,6 +37,8 @@ Spark SQL and DataFrames support the following data types:
   - `DecimalType`: Represents arbitrary-precision signed decimal numbers. Backed internally by `java.math.BigDecimal`. A `BigDecimal` consists of an arbitrary precision integer unscaled value and a 32-bit integer scale.
 * String type
   - `StringType`: Represents character string values.
+  - `VarcharType(length)`: A variant of `StringType` which has a length limitation. Data writing will fail if the input string exceeds the length limitation. Note: this type can only be used in table schema, not functions/operators.
+  - `CharType(length)`: A variant of `VarcharType(length)` which is fixed length. Data writing will pad the input string if its length is smaller than the char type length. Char type comparison will pad the short one to the longer length.
 * Binary type
   - `BinaryType`: Represents byte sequence values.
 * Boolean type
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -94,7 +94,7 @@ trait CheckAnalysis extends PredicateHelper {
 
       case p if p.analyzed => // Skip already analyzed sub-plans
 
-      case p if p.output.map(_.dataType).exists(CharVarcharUtils.hasCharVarchar) =>
+      case p if p.resolved && p.output.map(_.dataType).exists(CharVarcharUtils.hasCharVarchar) =>
         throw new IllegalStateException(
           "[BUG] logical plan should not have output of char/varchar type: " + p)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala
@@ -92,16 +92,6 @@ object CharVarcharUtils {
     }
   }
 
-  /**
-   * Re-construct the original StructType from the type strings in the metadata of StructFields.
-   * This is needed when dealing with char/varchar columns/fields.
-   */
-  def getRawSchema(schema: StructType): StructType = {
-    StructType(schema.map { field =>
-      getRawType(field.metadata).map(rawType => field.copy(dataType = rawType)).getOrElse(field)
-    })
-  }
-
   /**
    * Returns expressions to apply read-side char type padding for the given attributes. String
    * values should be right-padded to N characters if it's from a CHAR(N) column/field.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
@@ -32,6 +32,7 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.Resolver
 import org.apache.spark.sql.catalyst.expressions.{Cast, Expression}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.catalyst.util.DataTypeJsonUtils.{DataTypeJsonDeserializer, DataTypeJsonSerializer}
 import org.apache.spark.sql.catalyst.util.StringUtils.StringConcat
 import org.apache.spark.sql.internal.SQLConf
@@ -132,7 +133,8 @@ object DataType {
       ddl,
       CatalystSqlParser.parseDataType,
       "Cannot parse the data type: ",
-      fallbackParser = CatalystSqlParser.parseTableSchema)
+      fallbackParser = str => CharVarcharUtils.replaceCharVarcharWithString(
+        CatalystSqlParser.parseTableSchema(str)))
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
@@ -35,13 +35,10 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
   }
 
   test("char type values should be padded: partitioned columns") {
-    // DS V2 doesn't support partitioned table.
-    if (!conf.contains(SQLConf.DEFAULT_CATALOG.key)) {
-      withTable("t") {
-        sql(s"CREATE TABLE t(i STRING, c CHAR(5)) USING $format PARTITIONED BY (c)")
-        sql("INSERT INTO t VALUES ('1', 'a')")
-        checkAnswer(spark.table("t"), Row("1", "a" + " " * 4))
-      }
+    withTable("t") {
+      sql(s"CREATE TABLE t(i STRING, c CHAR(5)) USING $format PARTITIONED BY (c)")
+      sql("INSERT INTO t VALUES ('1', 'a')")
+      checkAnswer(spark.table("t"), Row("1", "a" + " " * 4))
     }
   }
 

Original file line number	Diff line number	Diff line change
`@@ -35,13 +35,10 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {`
`35`	`35`	`}`
`36`	`36`
`37`	`37`	`test("char type values should be padded: partitioned columns") {`
`38`		`- // DS V2 doesn't support partitioned table.`
`39`		`- if (!conf.contains(SQLConf.DEFAULT_CATALOG.key)) {`
`40`		`- withTable("t") {`
`41`		`- sql(s"CREATE TABLE t(i STRING, c CHAR(5)) USING $format PARTITIONED BY (c)")`
`42`		`- sql("INSERT INTO t VALUES ('1', 'a')")`
`43`		`- checkAnswer(spark.table("t"), Row("1", "a" + " " * 4))`
`44`		`- }`
	`38`	`+ withTable("t") {`
	`39`	`+ sql(s"CREATE TABLE t(i STRING, c CHAR(5)) USING $format PARTITIONED BY (c)")`
	`40`	`+ sql("INSERT INTO t VALUES ('1', 'a')")`
	`41`	`+ checkAnswer(spark.table("t"), Row("1", "a" + " " * 4))`
`45`	`42`	`}`
`46`	`43`	`}`
`47`	`44`