Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -216,8 +216,8 @@ class InMemoryCatalog(
} else {
tableDefinition
}

catalog(db).tables.put(table, new TableDesc(tableWithLocation))
val tableProp = tableWithLocation.properties.filter(_._1 != "comment")
catalog(db).tables.put(table, new TableDesc(tableWithLocation.copy(properties = tableProp)))
}
}

Expand Down Expand Up @@ -295,7 +295,9 @@ class InMemoryCatalog(
assert(tableDefinition.identifier.database.isDefined)
val db = tableDefinition.identifier.database.get
requireTableExists(db, tableDefinition.identifier.table)
catalog(db).tables(tableDefinition.identifier.table).table = tableDefinition
val updatedProperties = tableDefinition.properties.filter(kv => kv._1 != "comment")
val newTableDefinition = tableDefinition.copy(properties = updatedProperties)
catalog(db).tables(tableDefinition.identifier.table).table = newTableDefinition
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This only fixes the issue in InMemoryCatalog. We still have a hole in HiveExternalCatalog.

Could you move the fixes to AlterTableSetPropertiesCommand ?

Copy link
Contributor Author

@sujith71955 sujith71955 Apr 26, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@gatorsmile @wzhfy
As i noticed for hive tables already the 'comment' property is been filtered while getting the hive table metadata(reading) in HiveClientImpl.scala -- > getTableOption() API. so i think it will be better if we can do the same for InMemoryCatalogs type table, while reading the table metadata we can exclude the 'comment', i think while alter command if we exclude 'comment' from table definition this can lead to an inconsistency state of metadata,

Eg: In create table we persist the 'comment' property inside properties map in metadata and while altering we are not.

Please correct me if i am going in wrong direction, Thanks.
I am very much curious to know why spark persists 'comment' property in both CatalogTable field level and in its properties map.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should not duplicate comment in both CatalogTable field and table properties. Please correct it if you found it in the code. Thanks!

Copy link
Contributor Author

@sujith71955 sujith71955 Apr 27, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure, i will revisit and verify on this point and will update the PR.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you!

}

override def alterTableSchema(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -231,8 +231,12 @@ case class AlterTableSetPropertiesCommand(
val catalog = sparkSession.sessionState.catalog
val table = catalog.getTableMetadata(tableName)
DDLUtils.verifyAlterTableType(catalog, table, isView)
// This overrides old properties
val newTable = table.copy(properties = table.properties ++ properties)
// This overrides old properties and update the comment parameter of CatalogTable
// with the newly added/modified comment since CatalogTable also holds comment as its
// direct property.
val newTable = table.copy(
properties = table.properties ++ properties,
comment = properties.get("comment"))
catalog.alterTable(newTable)
Seq.empty[Row]
}
Expand Down Expand Up @@ -267,8 +271,10 @@ case class AlterTableUnsetPropertiesCommand(
}
}
}
// If comment is in the table property, we reset it to None
val tableComment = if (propKeys.contains("comment")) None else table.properties.get("comment")
val newProperties = table.properties.filter { case (k, _) => !propKeys.contains(k) }
val newTable = table.copy(properties = newProperties)
val newTable = table.copy(properties = newProperties, comment = tableComment)
catalog.alterTable(newTable)
Seq.empty[Row]
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
CREATE TABLE table_with_comment (a STRING, b INT, c STRING, d STRING) USING parquet COMMENT 'added';

DESC FORMATTED table_with_comment;

-- ALTER TABLE BY MODIFYING COMMENT
ALTER TABLE table_with_comment SET TBLPROPERTIES("comment"= "modified comment", "type"= "parquet");

DESC FORMATTED table_with_comment;

-- DROP TEST TABLE
DROP TABLE table_with_comment;

-- CREATE TABLE WITHOUT COMMENT
CREATE TABLE table_comment (a STRING, b INT) USING parquet;

DESC FORMATTED table_comment;

-- ALTER TABLE BY ADDING COMMENT
ALTER TABLE table_comment SET TBLPROPERTIES(comment = "added comment");

DESC formatted table_comment;

-- ALTER UNSET PROPERTIES COMMENT
ALTER TABLE table_comment UNSET TBLPROPERTIES IF EXISTS ('comment');

DESC FORMATTED table_comment;

-- DROP TEST TABLE
DROP TABLE table_comment;
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 12


-- !query 0
CREATE TABLE table_with_comment (a STRING, b INT, c STRING, d STRING) USING parquet COMMENT 'added'
-- !query 0 schema
struct<>
-- !query 0 output



-- !query 1
DESC FORMATTED table_with_comment
-- !query 1 schema
struct<col_name:string,data_type:string,comment:string>
-- !query 1 output
# col_name data_type comment
a string
b int
c string
d string

# Detailed Table Information
Database default
Table table_with_comment
Created [not included in comparison]
Last Access [not included in comparison]
Type MANAGED
Provider parquet
Comment added
Location [not included in comparison]sql/core/spark-warehouse/table_with_comment


-- !query 2
ALTER TABLE table_with_comment SET TBLPROPERTIES("comment"= "modified comment", "type"= "parquet")
-- !query 2 schema
struct<>
-- !query 2 output



-- !query 3
DESC FORMATTED table_with_comment
-- !query 3 schema
struct<col_name:string,data_type:string,comment:string>
-- !query 3 output
# col_name data_type comment
a string
b int
c string
d string

# Detailed Table Information
Database default
Table table_with_comment
Created [not included in comparison]
Last Access [not included in comparison]
Type MANAGED
Provider parquet
Comment modified comment
Properties [type=parquet]
Location [not included in comparison]sql/core/spark-warehouse/table_with_comment


-- !query 4
DROP TABLE table_with_comment
-- !query 4 schema
struct<>
-- !query 4 output



-- !query 5
CREATE TABLE table_comment (a STRING, b INT) USING parquet
-- !query 5 schema
struct<>
-- !query 5 output



-- !query 6
DESC FORMATTED table_comment
-- !query 6 schema
struct<col_name:string,data_type:string,comment:string>
-- !query 6 output
# col_name data_type comment
a string
b int

# Detailed Table Information
Database default
Table table_comment
Created [not included in comparison]
Last Access [not included in comparison]
Type MANAGED
Provider parquet
Location [not included in comparison]sql/core/spark-warehouse/table_comment


-- !query 7
ALTER TABLE table_comment SET TBLPROPERTIES(comment = "added comment")
-- !query 7 schema
struct<>
-- !query 7 output



-- !query 8
DESC formatted table_comment
-- !query 8 schema
struct<col_name:string,data_type:string,comment:string>
-- !query 8 output
# col_name data_type comment
a string
b int

# Detailed Table Information
Database default
Table table_comment
Created [not included in comparison]
Last Access [not included in comparison]
Type MANAGED
Provider parquet
Comment added comment
Location [not included in comparison]sql/core/spark-warehouse/table_comment


-- !query 9
ALTER TABLE table_comment UNSET TBLPROPERTIES IF EXISTS ('comment')
-- !query 9 schema
struct<>
-- !query 9 output



-- !query 10
DESC FORMATTED table_comment
-- !query 10 schema
struct<col_name:string,data_type:string,comment:string>
-- !query 10 output
# col_name data_type comment
a string
b int

# Detailed Table Information
Database default
Table table_comment
Created [not included in comparison]
Last Access [not included in comparison]
Type MANAGED
Provider parquet
Location [not included in comparison]sql/core/spark-warehouse/table_comment


-- !query 11
DROP TABLE table_comment
-- !query 11 schema
struct<>
-- !query 11 output