Skip to content

Commit dac4152

Browse files
committed
Add test for schema evolution
1 parent 5bf80d6 commit dac4152

File tree

1 file changed

+117
-0
lines changed

1 file changed

+117
-0
lines changed
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
##########
19+
# Tests for schema evolution -- reading
20+
# data from different files with different schemas
21+
##########
22+
23+
24+
statement ok
25+
CREATE EXTERNAL TABLE parquet_table(a varchar, b int, c float) STORED AS PARQUET
26+
LOCATION 'test_files/scratch/schema_evolution/parquet_table/';
27+
28+
# File1 has only columns a and b
29+
statement ok
30+
COPY (
31+
SELECT column1 as a, column2 as b
32+
FROM ( VALUES ('foo', 1), ('foo', 2), ('foo', 3) )
33+
) TO 'test_files/scratch/schema_evolution/parquet_table/1.parquet'
34+
(FORMAT PARQUET, SINGLE_FILE_OUTPUT true);
35+
36+
37+
# File2 has only b
38+
statement ok
39+
COPY (
40+
SELECT column1 as b
41+
FROM ( VALUES (10) )
42+
) TO 'test_files/scratch/schema_evolution/parquet_table/2.parquet'
43+
(FORMAT PARQUET, SINGLE_FILE_OUTPUT true);
44+
45+
# File3 has a column from 'z' which does not appear in the table
46+
# but also values from a which do appear in the table
47+
statement ok
48+
COPY (
49+
SELECT column1 as z, column2 as a
50+
FROM ( VALUES ('bar', 'foo'), ('blarg', 'foo') )
51+
) TO 'test_files/scratch/schema_evolution/parquet_table/3.parquet'
52+
(FORMAT PARQUET, SINGLE_FILE_OUTPUT true);
53+
54+
# File4 has data for b and a (reversed) and d
55+
statement ok
56+
COPY (
57+
SELECT column1 as b, column2 as a, column3 as c
58+
FROM ( VALUES (100, 'foo', 10.5), (200, 'foo', 12.6), (300, 'bzz', 13.7) )
59+
) TO 'test_files/scratch/schema_evolution/parquet_table/4.parquet'
60+
(FORMAT PARQUET, SINGLE_FILE_OUTPUT true);
61+
62+
63+
# Show all the data
64+
query TIR rowsort
65+
select * from parquet_table;
66+
----
67+
NULL 10 NULL
68+
bzz 300 13.7
69+
foo 1 NULL
70+
foo 100 10.5
71+
foo 2 NULL
72+
foo 200 12.6
73+
foo 3 NULL
74+
foo NULL NULL
75+
foo NULL NULL
76+
77+
# Should see all 7 rows that have 'a=foo'
78+
query TIR rowsort
79+
select * from parquet_table where a = 'foo';
80+
----
81+
foo 1 NULL
82+
foo 2 NULL
83+
foo 3 NULL
84+
85+
query TIR rowsort
86+
select * from parquet_table where a != 'foo';
87+
----
88+
bzz 300 13.7
89+
90+
# this should produce at least one row
91+
query TIR rowsort
92+
select * from parquet_table where a is NULL;
93+
----
94+
95+
query TIR rowsort
96+
select * from parquet_table where b > 5;
97+
----
98+
NULL 10 NULL
99+
bzz 300 13.7
100+
foo 100 10.5
101+
foo 200 12.6
102+
103+
104+
query TIR rowsort
105+
select * from parquet_table where b < 150;
106+
----
107+
NULL 10 NULL
108+
foo 1 NULL
109+
foo 100 10.5
110+
foo 2 NULL
111+
foo 3 NULL
112+
113+
query TIR rowsort
114+
select * from parquet_table where c > 11.0;
115+
----
116+
bzz 300 13.7
117+
foo 200 12.6

0 commit comments

Comments
 (0)