Skip to content

Commit

Permalink
[FIX](jsonb) fix jsonb in predict column #27325 (#27424)
Browse files Browse the repository at this point in the history
  • Loading branch information
amorynan authored Nov 22, 2023
1 parent b2a8550 commit 1212b56
Show file tree
Hide file tree
Showing 5 changed files with 348 additions and 0 deletions.
1 change: 1 addition & 0 deletions be/src/olap/schema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ vectorized::IColumn::MutablePtr Schema::get_predicate_column_ptr(const Field& fi
break;
case FieldType::OLAP_FIELD_TYPE_VARCHAR:
case FieldType::OLAP_FIELD_TYPE_STRING:
case FieldType::OLAP_FIELD_TYPE_JSONB:
if (config::enable_low_cardinality_optimize && reader_type == ReaderType::READER_QUERY) {
ptr = doris::vectorized::ColumnDictionary<doris::vectorized::Int32>::create(
field.type());
Expand Down
85 changes: 85 additions & 0 deletions regression-test/data/json_p0/test_json_predict_is_null.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !select_extract --
1 \N \N
2 null \N
3 true \N
4 false \N
5 100 \N
6 10000 \N
7 1000000000 \N
8 1152921504606846976 \N
9 6.18 \N
10 "abcd" \N
11 {} \N
12 {"k1":"v31","k2":300} "v31"
13 [] \N
14 [123,456] \N
15 ["abc","def"] \N
16 [null,true,false,100,6.18,"abc"] \N
17 [{"k1":"v41","k2":400},1,"a",3.14] \N
18 {"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]} "v31"
26 \N \N
27 {"k1":"v1","k2":200} "v1"
28 {"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"} \N
29 12524337771678448270 \N
30 -9223372036854775808 \N
31 18446744073709551615 \N

-- !select_pred --
1 \N
26 \N

-- !select_delete --
0

-- !select_pred --
2 null
3 true
4 false
5 100
6 10000
7 1000000000
8 1152921504606846976
9 6.18
10 "abcd"
11 {}
12 {"k1":"v31","k2":300}
13 []
14 [123,456]
15 ["abc","def"]
16 [null,true,false,100,6.18,"abc"]
17 [{"k1":"v41","k2":400},1,"a",3.14]
18 {"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]}
27 {"k1":"v1","k2":200}
28 {"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"}
29 12524337771678448270
30 -9223372036854775808
31 18446744073709551615

-- !select_drop --
0

-- !select --
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
27
28
29
30
31

85 changes: 85 additions & 0 deletions regression-test/data/jsonb_p0/test_jsonb_predict_is_null.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !select_extract --
1 \N \N
2 null \N
3 true \N
4 false \N
5 100 \N
6 10000 \N
7 1000000000 \N
8 1152921504606846976 \N
9 6.18 \N
10 "abcd" \N
11 {} \N
12 {"k1":"v31","k2":300} "v31"
13 [] \N
14 [123,456] \N
15 ["abc","def"] \N
16 [null,true,false,100,6.18,"abc"] \N
17 [{"k1":"v41","k2":400},1,"a",3.14] \N
18 {"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]} "v31"
26 \N \N
27 {"k1":"v1","k2":200} "v1"
28 {"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"} \N
29 12524337771678448270 \N
30 -9223372036854775808 \N
31 18446744073709551615 \N

-- !select_pred --
1 \N
26 \N

-- !select_delete --
0

-- !select_pred --
2 null
3 true
4 false
5 100
6 10000
7 1000000000
8 1152921504606846976
9 6.18
10 "abcd"
11 {}
12 {"k1":"v31","k2":300}
13 []
14 [123,456]
15 ["abc","def"]
16 [null,true,false,100,6.18,"abc"]
17 [{"k1":"v41","k2":400},1,"a",3.14]
18 {"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]}
27 {"k1":"v1","k2":200}
28 {"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"}
29 12524337771678448270
30 -9223372036854775808
31 18446744073709551615

-- !select_drop --
0

-- !select --
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
27
28
29
30
31

89 changes: 89 additions & 0 deletions regression-test/suites/json_p0/test_json_predict_is_null.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

import org.codehaus.groovy.runtime.IOGroovyMethods

suite("test_json_predict_is_null", "p0") {
sql """ set experimental_enable_nereids_planner = false """

sql """ set experimental_enable_nereids_planner = true """
sql """ set enable_fallback_to_original_planner = true """

sql "DROP TABLE IF EXISTS j_pred"

sql """
CREATE TABLE IF NOT EXISTS j_pred (
id INT,
j JSON
)
DUPLICATE KEY(id)
DISTRIBUTED BY HASH(id) BUCKETS 10
PROPERTIES("replication_num" = "1", "disable_auto_compaction" = "true");
"""

// insert into valid json rows
sql """INSERT INTO j_pred VALUES(26, NULL)"""
sql """INSERT INTO j_pred VALUES(27, '{"k1":"v1", "k2": 200}')"""
sql """INSERT INTO j_pred VALUES(28, '{"a.b.c":{"k1.a1":"v31", "k2": 300},"a":"niu"}')"""
// int64 value
sql """INSERT INTO j_pred VALUES(29, '12524337771678448270')"""
// int64 min value
sql """INSERT INTO j_pred VALUES(30, '-9223372036854775808')"""
// int64 max value
sql """INSERT INTO j_pred VALUES(31, '18446744073709551615')"""

// load the jsonb data from csv file
streamLoad {
table "j_pred"

file "test_json.csv" // import csv file
set 'max_filter_ratio', '0.3'
time 10000 // limit inflight 10s
set 'strict_mode', 'true'

// if declared a check callback, the default check condition will ignore.
// So you must check all condition
check { result, exception, startTime, endTime ->
if (exception != null) {
throw exception
}
log.info("Stream load result: ${result}".toString())
def json = parseJson(result)

def (code, out, err) = curl("GET", json.ErrorURL)
log.info("error result: " + out)

assertEquals("success", json.Status.toLowerCase())
assertEquals(25, json.NumberTotalRows)
assertEquals(18, json.NumberLoadedRows)
assertEquals(7, json.NumberFilteredRows)
assertTrue(json.LoadBytes > 0)
}
}

qt_select_extract "SELECT id, j, jsonb_extract(j, '\$.k1') FROM j_pred ORDER BY id"

qt_select_pred "select * from j_pred where j is null order by id"

qt_select_delete "delete from j_pred where j is null"

qt_select_pred "select * from j_pred order by id"

qt_select_drop "alter table j_pred DROP COLUMN j"

qt_select "select * from j_pred order by id"
}
88 changes: 88 additions & 0 deletions regression-test/suites/jsonb_p0/test_jsonb_predict_is_null.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

import org.codehaus.groovy.runtime.IOGroovyMethods

suite("test_jsonb_predict_is_null", "p0") {
sql """ set experimental_enable_nereids_planner = false """

sql """ set experimental_enable_nereids_planner = true """
sql """ set enable_fallback_to_original_planner = true """

sql "DROP TABLE IF EXISTS jb_pred"

sql """
CREATE TABLE IF NOT EXISTS jb_pred (
id INT,
j JSONB
)
DUPLICATE KEY(id)
DISTRIBUTED BY HASH(id) BUCKETS 10
PROPERTIES("replication_num" = "1", "disable_auto_compaction" = "true");
"""

// insert into valid json rows
sql """INSERT INTO jb_pred VALUES(26, NULL)"""
sql """INSERT INTO jb_pred VALUES(27, '{"k1":"v1", "k2": 200}')"""
sql """INSERT INTO jb_pred VALUES(28, '{"a.b.c":{"k1.a1":"v31", "k2": 300},"a":"niu"}')"""
// int64 value
sql """INSERT INTO jb_pred VALUES(29, '12524337771678448270')"""
// int64 min value
sql """INSERT INTO jb_pred VALUES(30, '-9223372036854775808')"""
// int64 max value
sql """INSERT INTO jb_pred VALUES(31, '18446744073709551615')"""

// load the jsonb data from csv file
streamLoad {
table "jb_pred"

file "test_jsonb.csv" // import csv file
set 'max_filter_ratio', '0.3'
time 10000 // limit inflight 10s
set 'strict_mode', 'true'

// if declared a check callback, the default check condition will ignore.
// So you must check all condition
check { result, exception, startTime, endTime ->
if (exception != null) {
throw exception
}
log.info("Stream load result: ${result}".toString())
def json = parseJson(result)
def (code, out, err) = curl("GET", json.ErrorURL)
log.info("error result: " + out)

assertEquals("success", json.Status.toLowerCase())
assertEquals(25, json.NumberTotalRows)
assertEquals(18, json.NumberLoadedRows)
assertEquals(7, json.NumberFilteredRows)
assertTrue(json.LoadBytes > 0)
}
}

qt_select_extract "SELECT id, j, jsonb_extract(j, '\$.k1') FROM jb_pred ORDER BY id"

qt_select_pred "select * from jb_pred where j is null order by id"

qt_select_delete "delete from jb_pred where j is null"

qt_select_pred "select * from jb_pred order by id"

qt_select_drop "alter table jb_pred DROP COLUMN j"

qt_select "select * from jb_pred order by id"
}

0 comments on commit 1212b56

Please sign in to comment.