2121
2222import java .io .File ;
2323import java .io .IOException ;
24+ import java .util .Arrays ;
2425import java .util .List ;
2526import org .apache .avro .SchemaBuilder ;
2627import org .apache .avro .file .DataFileWriter ;
@@ -59,7 +60,7 @@ public void writeAndValidateRequiredComplexUnion() throws IOException {
5960 .endRecord ();
6061
6162 GenericData .Record unionRecord1 = new GenericData .Record (avroSchema );
62- unionRecord1 .put ("unionCol" , "StringType1 " );
63+ unionRecord1 .put ("unionCol" , "foo " );
6364 GenericData .Record unionRecord2 = new GenericData .Record (avroSchema );
6465 unionRecord2 .put ("unionCol" , 1 );
6566
@@ -80,6 +81,14 @@ public void writeAndValidateRequiredComplexUnion() throws IOException {
8081 .project (expectedSchema )
8182 .build ()) {
8283 rows = Lists .newArrayList (reader );
84+
85+ Assert .assertEquals (2 , rows .get (0 ).getStruct (0 , 2 ).numFields ());
86+ Assert .assertTrue (rows .get (0 ).getStruct (0 , 2 ).isNullAt (0 ));
87+ Assert .assertEquals ("foo" , rows .get (0 ).getStruct (0 , 2 ).getString (1 ));
88+
89+ Assert .assertEquals (2 , rows .get (1 ).getStruct (0 , 2 ).numFields ());
90+ Assert .assertEquals (1 , rows .get (1 ).getStruct (0 , 2 ).getInt (0 ));
91+ Assert .assertTrue (rows .get (1 ).getStruct (0 , 2 ).isNullAt (1 ));
8392 }
8493 }
8594
@@ -96,13 +105,15 @@ public void writeAndValidateOptionalComplexUnion() throws IOException {
96105 .and ()
97106 .stringType ()
98107 .endUnion ()
99- .noDefault ()
108+ .nullDefault ()
100109 .endRecord ();
101110
102111 GenericData .Record unionRecord1 = new GenericData .Record (avroSchema );
103- unionRecord1 .put ("unionCol" , "StringType1 " );
112+ unionRecord1 .put ("unionCol" , "foo " );
104113 GenericData .Record unionRecord2 = new GenericData .Record (avroSchema );
105114 unionRecord2 .put ("unionCol" , 1 );
115+ GenericData .Record unionRecord3 = new GenericData .Record (avroSchema );
116+ unionRecord3 .put ("unionCol" , null );
106117
107118 File testFile = temp .newFile ();
108119 Assert .assertTrue ("Delete should succeed" , testFile .delete ());
@@ -111,6 +122,7 @@ public void writeAndValidateOptionalComplexUnion() throws IOException {
111122 writer .create (avroSchema , testFile );
112123 writer .append (unionRecord1 );
113124 writer .append (unionRecord2 );
125+ writer .append (unionRecord3 );
114126 }
115127
116128 Schema expectedSchema = AvroSchemaUtil .toIceberg (avroSchema );
@@ -121,25 +133,78 @@ public void writeAndValidateOptionalComplexUnion() throws IOException {
121133 .project (expectedSchema )
122134 .build ()) {
123135 rows = Lists .newArrayList (reader );
136+
137+ Assert .assertEquals ("foo" , rows .get (0 ).getStruct (0 , 2 ).getString (1 ));
138+ Assert .assertEquals (1 , rows .get (1 ).getStruct (0 , 2 ).getInt (0 ));
139+ Assert .assertTrue (rows .get (2 ).getStruct (0 , 2 ).isNullAt (0 ));
140+ Assert .assertTrue (rows .get (2 ).getStruct (0 , 2 ).isNullAt (1 ));
124141 }
125142 }
126143
127144 @ Test
128- public void writeAndValidateSingleComponentUnion () throws IOException {
145+ public void writeAndValidateSingleTypeUnion () throws IOException {
129146 org .apache .avro .Schema avroSchema = SchemaBuilder .record ("root" )
130147 .fields ()
131148 .name ("unionCol" )
132149 .type ()
133150 .unionOf ()
151+ .nullType ()
152+ .and ()
134153 .intType ()
135154 .endUnion ()
155+ .nullDefault ()
156+ .endRecord ();
157+
158+ GenericData .Record unionRecord1 = new GenericData .Record (avroSchema );
159+ unionRecord1 .put ("unionCol" , 0 );
160+ GenericData .Record unionRecord2 = new GenericData .Record (avroSchema );
161+ unionRecord2 .put ("unionCol" , 1 );
162+
163+ File testFile = temp .newFile ();
164+ Assert .assertTrue ("Delete should succeed" , testFile .delete ());
165+
166+ try (DataFileWriter <GenericData .Record > writer = new DataFileWriter <>(new GenericDatumWriter <>())) {
167+ writer .create (avroSchema , testFile );
168+ writer .append (unionRecord1 );
169+ writer .append (unionRecord2 );
170+ }
171+
172+ Schema expectedSchema = AvroSchemaUtil .toIceberg (avroSchema );
173+
174+ List <InternalRow > rows ;
175+ try (AvroIterable <InternalRow > reader = Avro .read (Files .localInput (testFile ))
176+ .createReaderFunc (SparkAvroReader ::new )
177+ .project (expectedSchema )
178+ .build ()) {
179+ rows = Lists .newArrayList (reader );
180+
181+ Assert .assertEquals (0 , rows .get (0 ).getInt (0 ));
182+ Assert .assertEquals (1 , rows .get (1 ).getInt (0 ));
183+ }
184+ }
185+
186+ @ Test
187+ public void testDeeplyNestedUnionSchema1 () throws IOException {
188+ org .apache .avro .Schema avroSchema = SchemaBuilder .record ("root" )
189+ .fields ()
190+ .name ("col1" )
191+ .type ()
192+ .array ()
193+ .items ()
194+ .unionOf ()
195+ .nullType ()
196+ .and ()
197+ .intType ()
198+ .and ()
199+ .stringType ()
200+ .endUnion ()
136201 .noDefault ()
137202 .endRecord ();
138203
139204 GenericData .Record unionRecord1 = new GenericData .Record (avroSchema );
140- unionRecord1 .put ("unionCol " , 1 );
205+ unionRecord1 .put ("col1 " , Arrays . asList ( "foo" , 1 ) );
141206 GenericData .Record unionRecord2 = new GenericData .Record (avroSchema );
142- unionRecord2 .put ("unionCol " , 2 );
207+ unionRecord2 .put ("col1 " , Arrays . asList ( 2 , "bar" ) );
143208
144209 File testFile = temp .newFile ();
145210 Assert .assertTrue ("Delete should succeed" , testFile .delete ());
@@ -158,6 +223,65 @@ public void writeAndValidateSingleComponentUnion() throws IOException {
158223 .project (expectedSchema )
159224 .build ()) {
160225 rows = Lists .newArrayList (reader );
226+
227+ // making sure it reads the correctly nested structured data, based on the transformation from union to struct
228+ Assert .assertEquals ("foo" , rows .get (0 ).getArray (0 ).getStruct (0 , 2 ).getString (1 ));
229+ }
230+ }
231+
232+ @ Test
233+ public void testDeeplyNestedUnionSchema2 () throws IOException {
234+ org .apache .avro .Schema avroSchema = SchemaBuilder .record ("root" )
235+ .fields ()
236+ .name ("col1" )
237+ .type ()
238+ .array ()
239+ .items ()
240+ .unionOf ()
241+ .record ("r1" )
242+ .fields ()
243+ .name ("id" )
244+ .type ()
245+ .intType ()
246+ .noDefault ()
247+ .endRecord ()
248+ .and ()
249+ .record ("r2" )
250+ .fields ()
251+ .name ("id" )
252+ .type ()
253+ .intType ()
254+ .noDefault ()
255+ .endRecord ()
256+ .endUnion ()
257+ .noDefault ()
258+ .endRecord ();
259+
260+ GenericData .Record outer = new GenericData .Record (avroSchema );
261+ GenericData .Record inner = new GenericData .Record (avroSchema .getFields ().get (0 ).schema ()
262+ .getElementType ().getTypes ().get (0 ));
263+
264+ inner .put ("id" , 1 );
265+ outer .put ("col1" , Arrays .asList (inner ));
266+
267+ File testFile = temp .newFile ();
268+ Assert .assertTrue ("Delete should succeed" , testFile .delete ());
269+
270+ try (DataFileWriter <GenericData .Record > writer = new DataFileWriter <>(new GenericDatumWriter <>())) {
271+ writer .create (avroSchema , testFile );
272+ writer .append (outer );
273+ }
274+
275+ Schema expectedSchema = AvroSchemaUtil .toIceberg (avroSchema );
276+ List <InternalRow > rows ;
277+ try (AvroIterable <InternalRow > reader = Avro .read (Files .localInput (testFile ))
278+ .createReaderFunc (SparkAvroReader ::new )
279+ .project (expectedSchema )
280+ .build ()) {
281+ rows = Lists .newArrayList (reader );
282+
283+ // making sure it reads the correctly nested structured data, based on the transformation from union to struct
284+ Assert .assertEquals (1 , rows .get (0 ).getArray (0 ).getStruct (0 , 2 ).getStruct (0 , 1 ).getInt (0 ));
161285 }
162286 }
163287}
0 commit comments