From 2132023a3bc8c7834fa3c4a27428eedd714cf537 Mon Sep 17 00:00:00 2001 From: JingsongLi Date: Wed, 26 Aug 2020 14:55:20 +0800 Subject: [PATCH 1/5] Extract FixupTypes to Iceberg from Spark --- .../org/apache/iceberg/types}/FixupTypes.java | 59 ++++++------------- .../apache/iceberg/spark/SparkFixupTypes.java | 54 +++++++++++++++++ .../apache/iceberg/spark/SparkSchemaUtil.java | 2 +- 3 files changed, 74 insertions(+), 41 deletions(-) rename {spark/src/main/java/org/apache/iceberg/spark => api/src/main/java/org/apache/iceberg/types}/FixupTypes.java (68%) create mode 100644 spark/src/main/java/org/apache/iceberg/spark/SparkFixupTypes.java diff --git a/spark/src/main/java/org/apache/iceberg/spark/FixupTypes.java b/api/src/main/java/org/apache/iceberg/types/FixupTypes.java similarity index 68% rename from spark/src/main/java/org/apache/iceberg/spark/FixupTypes.java rename to api/src/main/java/org/apache/iceberg/types/FixupTypes.java index 3c3cc1ea67c2..d34632efaa64 100644 --- a/spark/src/main/java/org/apache/iceberg/spark/FixupTypes.java +++ b/api/src/main/java/org/apache/iceberg/types/FixupTypes.java @@ -1,48 +1,34 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ -package org.apache.iceberg.spark; +package org.apache.iceberg.types; import java.util.List; import java.util.function.Supplier; import org.apache.iceberg.Schema; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.Lists; -import org.apache.iceberg.types.Type; -import org.apache.iceberg.types.TypeUtil; -import org.apache.iceberg.types.Types; /** - * This is used to fix primitive types to match a table schema. Some types, like binary and fixed, - * are converted to the same Spark type. Conversion back can produce only one, which may not be - * correct. This uses a reference schema to override types that were lost in round-trip conversion. + * This is used to fix primitive types to match a table schema. This uses a reference schema to + * override types that were lost in round-trip conversion. */ -class FixupTypes extends TypeUtil.CustomOrderSchemaVisitor { +public abstract class FixupTypes extends TypeUtil.CustomOrderSchemaVisitor { private final Schema referenceSchema; private Type sourceType; - static Schema fixup(Schema schema, Schema referenceSchema) { - return new Schema(TypeUtil.visit(schema, - new FixupTypes(referenceSchema)).asStructType().fields()); - } - - private FixupTypes(Schema referenceSchema) { + public FixupTypes(Schema referenceSchema) { this.referenceSchema = referenceSchema; this.sourceType = referenceSchema.asStruct(); } @@ -156,20 +142,13 @@ public Type primitive(Type.PrimitiveType primitive) { return primitive; // already correct } - switch (primitive.typeId()) { - case STRING: - if (sourceType.typeId() == Type.TypeID.UUID) { - return sourceType; - } - break; - case BINARY: - if (sourceType.typeId() == Type.TypeID.FIXED) { - return sourceType; - } - break; - default: + if (fixupPrimitive(primitive, sourceType)) { + return sourceType; } + // nothing to fix up, let validation catch promotion errors return primitive; } + + protected abstract boolean fixupPrimitive(Type.PrimitiveType type, Type source); } diff --git a/spark/src/main/java/org/apache/iceberg/spark/SparkFixupTypes.java b/spark/src/main/java/org/apache/iceberg/spark/SparkFixupTypes.java new file mode 100644 index 000000000000..073cd66db7c2 --- /dev/null +++ b/spark/src/main/java/org/apache/iceberg/spark/SparkFixupTypes.java @@ -0,0 +1,54 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.iceberg.spark; + +import org.apache.iceberg.Schema; +import org.apache.iceberg.types.FixupTypes; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.TypeUtil; + +/** + * Some types, like binary and fixed, are converted to the same Spark type. Conversion back + * can produce only one, which may not be correct. + */ +class SparkFixupTypes extends FixupTypes { + + private SparkFixupTypes(Schema referenceSchema) { + super(referenceSchema); + } + + static Schema fixup(Schema schema, Schema referenceSchema) { + return new Schema(TypeUtil.visit(schema, + new SparkFixupTypes(referenceSchema)).asStructType().fields()); + } + + @Override + protected boolean fixupPrimitive(Type.PrimitiveType type, Type source) { + switch (type.typeId()) { + case STRING: + if (source.typeId() == Type.TypeID.UUID) { + return true; + } + break; + case BINARY: + if (source.typeId() == Type.TypeID.FIXED) { + return true; + } + break; + default: + } + return false; + } +} diff --git a/spark/src/main/java/org/apache/iceberg/spark/SparkSchemaUtil.java b/spark/src/main/java/org/apache/iceberg/spark/SparkSchemaUtil.java index 3f6e41e3072b..484c407e0247 100644 --- a/spark/src/main/java/org/apache/iceberg/spark/SparkSchemaUtil.java +++ b/spark/src/main/java/org/apache/iceberg/spark/SparkSchemaUtil.java @@ -163,7 +163,7 @@ public static Schema convert(Schema baseSchema, StructType sparkType) { // reassign ids to match the base schema Schema schema = TypeUtil.reassignIds(new Schema(struct.fields()), baseSchema); // fix types that can't be represented in Spark (UUID and Fixed) - return FixupTypes.fixup(schema, baseSchema); + return SparkFixupTypes.fixup(schema, baseSchema); } /** From 494c2247be4ce1008d114f88aaede148be17fdaf Mon Sep 17 00:00:00 2001 From: JingsongLi Date: Wed, 26 Aug 2020 14:56:56 +0800 Subject: [PATCH 2/5] Fix license --- .../org/apache/iceberg/types/FixupTypes.java | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/types/FixupTypes.java b/api/src/main/java/org/apache/iceberg/types/FixupTypes.java index d34632efaa64..b203ca131807 100644 --- a/api/src/main/java/org/apache/iceberg/types/FixupTypes.java +++ b/api/src/main/java/org/apache/iceberg/types/FixupTypes.java @@ -1,15 +1,20 @@ /* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.iceberg.types; From 8aec0cc5d3446e7beeb798f56c14270bfe1dda55 Mon Sep 17 00:00:00 2001 From: JingsongLi Date: Wed, 26 Aug 2020 14:57:13 +0800 Subject: [PATCH 3/5] Fix license --- .../apache/iceberg/spark/SparkFixupTypes.java | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/spark/src/main/java/org/apache/iceberg/spark/SparkFixupTypes.java b/spark/src/main/java/org/apache/iceberg/spark/SparkFixupTypes.java index 073cd66db7c2..2d3ea4c81f00 100644 --- a/spark/src/main/java/org/apache/iceberg/spark/SparkFixupTypes.java +++ b/spark/src/main/java/org/apache/iceberg/spark/SparkFixupTypes.java @@ -1,15 +1,20 @@ /* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.iceberg.spark; From d634e9fa78709b90d028cefeb18075d89944d45f Mon Sep 17 00:00:00 2001 From: JingsongLi Date: Thu, 27 Aug 2020 10:12:28 +0800 Subject: [PATCH 4/5] Address comments --- .../src/main/java/org/apache/iceberg/types/FixupTypes.java | 3 +++ 1 file changed, 3 insertions(+) rename {api => core}/src/main/java/org/apache/iceberg/types/FixupTypes.java (97%) diff --git a/api/src/main/java/org/apache/iceberg/types/FixupTypes.java b/core/src/main/java/org/apache/iceberg/types/FixupTypes.java similarity index 97% rename from api/src/main/java/org/apache/iceberg/types/FixupTypes.java rename to core/src/main/java/org/apache/iceberg/types/FixupTypes.java index b203ca131807..0e5db85370da 100644 --- a/api/src/main/java/org/apache/iceberg/types/FixupTypes.java +++ b/core/src/main/java/org/apache/iceberg/types/FixupTypes.java @@ -24,6 +24,9 @@ import org.apache.iceberg.Schema; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.TypeUtil; +import org.apache.iceberg.types.Types; /** * This is used to fix primitive types to match a table schema. This uses a reference schema to From 4900d518e1be7ca38777200eb09fa5c95e6f2bbf Mon Sep 17 00:00:00 2001 From: JingsongLi Date: Thu, 27 Aug 2020 10:28:20 +0800 Subject: [PATCH 5/5] checkstyle --- core/src/main/java/org/apache/iceberg/types/FixupTypes.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/types/FixupTypes.java b/core/src/main/java/org/apache/iceberg/types/FixupTypes.java index 0e5db85370da..b203ca131807 100644 --- a/core/src/main/java/org/apache/iceberg/types/FixupTypes.java +++ b/core/src/main/java/org/apache/iceberg/types/FixupTypes.java @@ -24,9 +24,6 @@ import org.apache.iceberg.Schema; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.Lists; -import org.apache.iceberg.types.Type; -import org.apache.iceberg.types.TypeUtil; -import org.apache.iceberg.types.Types; /** * This is used to fix primitive types to match a table schema. This uses a reference schema to