-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Closed
Description
Does parquet-mr support Protobuf version 3? I've applied the following patch and the tests are failing mostly due to optional vs required.
diff --git a/parquet-protobuf/pom.xml b/parquet-protobuf/pom.xml
index b3e4e50..aa67423 100644
--- a/parquet-protobuf/pom.xml
+++ b/parquet-protobuf/pom.xml
@@ -31,7 +31,7 @@
<properties>
<elephant-bird.version>4.4</elephant-bird.version>
- <protobuf.version>2.5.0</protobuf.version>
+ <protobuf.version>3.0.0-beta-4</protobuf.version>
</properties>
diff --git a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java
index 5c6ebca..7e2557f 100644
--- a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java
+++ b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java
@@ -88,7 +88,7 @@ public class ProtoInputOutputFormatTest {
//test that only requested fields were deserialized
- assertTrue(readDocument.hasDocId());
+ assertTrue(readDocument.getDocId() == 12345);
assertTrue("Found data outside projection.", readDocument.getNameCount() == 0);
}
diff --git a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoRecordConverterTest.java b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoRecordConverterTest.java
index 5318bd2..1cbb972 100644
--- a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoRecordConverterTest.java
+++ b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoRecordConverterTest.java
@@ -183,16 +183,16 @@ public class ProtoRecordConverterTest {
TestProtobuf.InnerMessage third = result.getInner(2);
assertEquals("First inner", first.getOne());
- assertFalse(first.hasTwo());
- assertFalse(first.hasThree());
+ assertEquals(first.getTwo(), "");
+ assertEquals(first.getThree(), "");
assertEquals("Second inner", second.getTwo());
- assertFalse(second.hasOne());
- assertFalse(second.hasThree());
+ assertEquals(second.getOne(), "");
+ assertEquals(second.getThree(), "");
assertEquals("Third inner", third.getThree());
- assertFalse(third.hasOne());
- assertFalse(third.hasTwo());
+ assertEquals(third.getOne(), "");
+ assertEquals(third.getTwo(), "");
}
diff --git a/parquet-protobuf/src/test/resources/TestProtobuf.proto b/parquet-protobuf/src/test/resources/TestProtobuf.proto
index afa0f63..caf7926 100644
--- a/parquet-protobuf/src/test/resources/TestProtobuf.proto
+++ b/parquet-protobuf/src/test/resources/TestProtobuf.proto
@@ -9,7 +9,7 @@
//
// http://www.apache.org/licenses/LICENSE-2.0
//
-// Unless required by applicable law or agreed to in writing,
+// Unless by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
@@ -17,6 +17,8 @@
// under the License.
//
+syntax = "proto3";
+
package TestProtobuf;
option java_package = "org.apache.parquet.proto.test";
@@ -25,17 +27,18 @@ option java_package = "org.apache.parquet.proto.test";
// messages but groups were deprecated.
message Document {
- required int64 DocId = 1;
- optional Links links = 32;
- repeated group Name = 24 {
+ int64 DocId = 1;
+ Links links = 32;
+ message Name {
repeated Language name = 4;
- optional string url = 5;
+ string url = 5;
}
+ repeated Name name = 24;
}
message Language {
- required string code = 12;
- optional string Country = 14;
+ string code = 12;
+ string Country = 14;
}
message Links {
@@ -47,42 +50,43 @@ message Links {
// begin - protocol buffers for ProtoSchemaConverterTest
message SchemaConverterSimpleMessage {
- optional int32 someId = 3;
+ int32 someId = 3;
}
message SchemaConverterAllDatatypes {
- optional double optionalDouble = 1;
- optional float optionalFloat = 2;
- optional int32 optionalInt32 = 3;
- optional int64 optionalInt64 = 4;
- optional uint32 optionalUInt32 = 5;
- optional uint64 optionalUInt64 = 6;
- optional sint32 optionalSInt32 = 7;
- optional sint64 optionalSInt64 = 8;
- optional fixed32 optionalFixed32 = 9;
- optional fixed64 optionalFixed64 = 10;
- optional sfixed32 optionalSFixed32 = 11;
- optional sfixed64 optionalSFixed64 = 12;
- optional bool optionalBool = 13;
- optional string optionalString = 14;
- optional bytes optionalBytes = 15;
- optional SchemaConverterSimpleMessage optionalMessage = 16;
- optional group PbGroup = 17 {
- optional int32 groupInt = 2;
+ double optionalDouble = 1;
+ float optionalFloat = 2;
+ int32 optionalInt32 = 3;
+ int64 optionalInt64 = 4;
+ uint32 optionalUInt32 = 5;
+ uint64 optionalUInt64 = 6;
+ sint32 optionalSInt32 = 7;
+ sint64 optionalSInt64 = 8;
+ fixed32 optionalFixed32 = 9;
+ fixed64 optionalFixed64 = 10;
+ sfixed32 optionalSFixed32 = 11;
+ sfixed64 optionalSFixed64 = 12;
+ bool optionalBool = 13;
+ string optionalString = 14;
+ bytes optionalBytes = 15;
+ SchemaConverterSimpleMessage optionalMessage = 16;
+ message PbGroup {
+ int32 groupInt = 2;
}
+ PbGroup pbGroup = 17;
enum TestEnum {
FIRST = 0;
SECOND = 1;
}
- optional TestEnum optionalEnum = 18;
+ TestEnum optionalEnum = 18;
}
message SchemaConverterRepetition {
- optional int32 optionalPrimitive = 1;
- required int32 requiredPrimitive = 2;
+ int32 optionalPrimitive = 1;
+ int32 requiredPrimitive = 2;
repeated int32 repeatedPrimitive = 3;
- optional SchemaConverterSimpleMessage optionalMessage = 7;
- required SchemaConverterSimpleMessage requiredMessage = 8;
+ SchemaConverterSimpleMessage optionalMessage = 7;
+ SchemaConverterSimpleMessage requiredMessage = 8;
repeated SchemaConverterSimpleMessage repeatedMessage = 9;
}
@@ -92,22 +96,22 @@ message Links {
//begin protocol buffers for ProtoInputOutputFormatTest
message InputOutputMsgFormat {
- optional int32 someId = 3;
+ int32 someId = 3;
}
message IOFormatMessage {
- optional double optionalDouble = 1;
+ double optionalDouble = 1;
repeated string repeatedString = 2;
- optional InputOutputMsgFormat msg = 3;
+ InputOutputMsgFormat msg = 3;
}
//end protocol buffers for ProtoInputOutputFormatTest
message InnerMessage {
- optional string one = 1;
- optional string two = 2;
- optional string three = 3;
+ string one = 1;
+ string two = 2;
+ string three = 3;
}
message TopMessage {
@@ -115,7 +119,7 @@ message TopMessage {
}
message MessageA {
- optional InnerMessage inner = 123;
+ InnerMessage inner = 123;
}
message RepeatedIntMessage {
@@ -129,11 +133,11 @@ message HighIndexMessage {
//custom proto class - ProtoInputOutputFormatTest
message FirstCustomClassMessage {
- optional string string = 11;
+ string string = 11;
}
message SecondCustomClassMessage {
- optional string string = 11;
+ string string = 11;
}
//please place your unit test Protocol Buffer definitions here.Reporter: Wael Nasreddine / @kalbasit
Assignee: Wael Nasreddine / @kalbasit
Note: This issue was originally created as PARQUET-665. Please see the migration documentation for further details.