Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions be/src/exprs/bitmap_function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,31 @@ void BitmapFunctions::bitmap_union(FunctionContext* ctx, const StringVal& src, S
}
}

// the dst value could be null
void BitmapFunctions::nullable_bitmap_init(FunctionContext* ctx, StringVal* dst) {
dst->is_null = true;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why initial result bitmap as null? it seems that it will return empty bitmap when result bitmap is null

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The initial result bitmap must be null. Otherwise, the intersection between dst and src will be empty.

}

void BitmapFunctions::bitmap_intersect(FunctionContext* ctx, const StringVal& src, StringVal* dst) {
if (src.is_null) {
return;
}
// if dst is null, the src input is the first value
if (dst->is_null) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would better add a UNLIKELY macros.

dst->is_null = false;
dst->len = sizeof(BitmapValue);
dst->ptr = (uint8_t*)new BitmapValue((char*) src.ptr);
return;
}
auto dst_bitmap = reinterpret_cast<BitmapValue*>(dst->ptr);
// zero size means the src input is a agg object
if (src.len == 0) {
(*dst_bitmap) &= *reinterpret_cast<BitmapValue*>(src.ptr);
} else {
(*dst_bitmap) &= BitmapValue((char*) src.ptr);
}
}

BigIntVal BitmapFunctions::bitmap_count(FunctionContext* ctx, const StringVal& src) {
if (src.is_null) {
return 0;
Expand Down Expand Up @@ -343,12 +368,17 @@ StringVal BitmapFunctions::bitmap_hash(doris_udf::FunctionContext* ctx, const do
}

StringVal BitmapFunctions::bitmap_serialize(FunctionContext* ctx, const StringVal& src) {
if (src.is_null) {
return src;
}

auto src_bitmap = reinterpret_cast<BitmapValue*>(src.ptr);
StringVal result = serialize(ctx, src_bitmap);
delete src_bitmap;
return result;
}

// This is a init function for intersect_count not for bitmap_intersect.
template<typename T, typename ValType>
void BitmapFunctions::bitmap_intersect_init(FunctionContext* ctx, StringVal* dst) {
dst->is_null = false;
Expand Down Expand Up @@ -510,6 +540,7 @@ template void BitmapFunctions::bitmap_update_int<IntVal>(
template void BitmapFunctions::bitmap_update_int<BigIntVal>(
FunctionContext* ctx, const BigIntVal& src, StringVal* dst);

// this is init function for intersect_count not for bitmap_intersect
template void BitmapFunctions::bitmap_intersect_init<int8_t, TinyIntVal>(
FunctionContext* ctx, StringVal* dst);
template void BitmapFunctions::bitmap_intersect_init<int16_t, SmallIntVal>(
Expand Down
6 changes: 5 additions & 1 deletion be/src/exprs/bitmap_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ class BitmapFunctions {
static BigIntVal bitmap_get_value(FunctionContext* ctx, const StringVal& src);

static void bitmap_union(FunctionContext* ctx, const StringVal& src, StringVal* dst);
// the dst value could be null
static void nullable_bitmap_init(FunctionContext* ctx, StringVal* dst);
static void bitmap_intersect(FunctionContext* ctx, const StringVal& src, StringVal* dst);
static BigIntVal bitmap_count(FunctionContext* ctx, const StringVal& src);

static StringVal bitmap_serialize(FunctionContext* ctx, const StringVal& src);
Expand All @@ -68,8 +71,9 @@ class BitmapFunctions {
static BooleanVal bitmap_contains(FunctionContext* ctx, const StringVal& src, const BigIntVal& input);
static BooleanVal bitmap_has_any(FunctionContext* ctx, const StringVal& lhs, const StringVal& rhs);

// bitmap_intersect
// intersect count
template<typename T, typename ValType>
// this is init function for intersect_count not for bitmap_intersect
static void bitmap_intersect_init(FunctionContext* ctx, StringVal* dst);
template<typename T, typename ValType>
static void bitmap_intersect_update(FunctionContext* ctx, const StringVal& src, const ValType& key,
Expand Down
34 changes: 34 additions & 0 deletions be/test/exprs/bitmap_function_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,39 @@ TEST_F(BitmapFunctionsTest, bitmap_union) {
ASSERT_EQ(expected, result);
}

// test bitmap_intersect
TEST_F(BitmapFunctionsTest, bitmap_intersect) {
StringVal dst;
BitmapFunctions::bitmap_intersect_init_real(ctx, &dst);

BitmapValue bitmap1(1);
bitmap1.add(2);
bitmap1.add(3);
StringVal src1 = convert_bitmap_to_string(ctx, bitmap1);
BitmapFunctions::bitmap_intersect(ctx, src1, &dst);

BitmapValue bitmap2(1);
bitmap2.add(2);
StringVal src2 = convert_bitmap_to_string(ctx, bitmap2);
BitmapFunctions::bitmap_intersect(ctx, src2, &dst);

StringVal serialized = BitmapFunctions::bitmap_serialize(ctx, dst);
BigIntVal result = BitmapFunctions::bitmap_count(ctx, serialized);
BigIntVal expected(2);
ASSERT_EQ(expected, result);
}

// test bitmap_intersect with null dst
TEST_F(BitmapFunctionsTest, bitmap_intersect_empty) {
StringVal dst;
BitmapFunctions::bitmap_intersect_init_real(ctx, &dst);

StringVal serialized = BitmapFunctions::bitmap_serialize(ctx, dst);
BigIntVal result = BitmapFunctions::bitmap_count(ctx, serialized);
BigIntVal expected(0);
ASSERT_EQ(expected, result);
}

TEST_F(BitmapFunctionsTest, bitmap_count) {
BitmapValue bitmap(1024);
bitmap.add(1);
Expand All @@ -186,6 +219,7 @@ TEST_F(BitmapFunctionsTest, bitmap_count) {
ASSERT_EQ(BigIntVal(0), null_bitmap);
}

// test intersect_count
template<typename ValType, typename ValueType>
void test_bitmap_intersect(FunctionContext* ctx, ValType key1, ValType key2) {
StringVal bitmap_column("placeholder");
Expand Down
2 changes: 2 additions & 0 deletions docs/.vuepress/sidebar/en.js
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,8 @@ module.exports = [
"bitmap_or",
"bitmap_to_string",
"to_bitmap",
"bitmap_intersect",
"bitmap_union",
],
},
{
Expand Down
2 changes: 2 additions & 0 deletions docs/.vuepress/sidebar/zh-CN.js
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,8 @@ module.exports = [
"bitmap_or",
"bitmap_to_string",
"to_bitmap",
"bitmap_intersect",
"bitmap_union",
],
},
{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
---
{
"title": "bitmap_intersect",
"language": "en"
}
---

<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->

# bitmap_intersect
## description

Aggregation function, used to calculate the bitmap intersection after grouping. Common usage scenarios such as: calculating user retention rate.

### Syntax

`BITMAP BITMAP_INTERSECT(BITMAP value)`

Enter a set of bitmap values, find the intersection of the set of bitmap values, and return.

## example

Table schema

```
KeysType: AGG_KEY
Columns: tag varchar, date datetime, user_id bitmap bitmap_union
```

```
Find the retention of users between 2020-05-18 and 2020-05-19 under different tags.
mysql> select tag, bitmap_intersect(user_id) from (select tag, date, bitmap_union(user_id) user_id from table where date in ('2020-05-18', '2020-05-19') group by tag, date) a group by tag;
```

Used in combination with the bitmap_to_string function to obtain the specific data of the intersection

```
Who are the users retained under different tags between 2020-05-18 and 2020-05-19?
mysql> select tag, bitmap_to_string(bitmap_intersect(user_id)) from (select tag, date, bitmap_union(user_id) user_id from table where date in ('2020-05-18', '2020-05-19') group by tag, date) a group by tag;
```

## keyword

BITMAP_INTERSECT, BITMAP
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
---
{
"title": "bitmap_union",
"language": "en"
}
---

<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->

# bitmap_union
## description

Aggregate function, used to calculate the grouped bitmap union. Common usage scenarios such as: calculating PV, UV.

### Syntax

`BITMAP BITMAP_UNION(BITMAP value)`

Enter a set of bitmap values, find the union of this set of bitmap values, and return.

## example

```
mysql> select page_id, bitmap_union(user_id) from table group by page_id;
```

Combined with the bitmap_count function, the PV data of the web page can be obtained

```
mysql> select page_id, bitmap_count(bitmap_union(user_id)) from table group by page_id;
```

When the user_id field is int, the above query semantics is equivalent to

```
mysql> select page_id, count(distinct user_id) from table group by page_id;
```

## keyword

BITMAP_UNION, BITMAP
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
---
{
"title": "bitmap_intersect",
"language": "zh-CN"
}
---

<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->

# bitmap_intersect
## description

聚合函数,用于计算分组后的 bitmap 交集。常见使用场景如:计算用户留存率。

### Syntax

`BITMAP BITMAP_INTERSECT(BITMAP value)`

输入一组 bitmap 值,求这一组 bitmap 值的交集,并返回。

## example

表结构

```
KeysType: AGG_KEY
Columns: tag varchar, date datetime, user_id bitmap bitmap_union

```

```
求今天和昨天不同 tag 下的用户留存
mysql> select tag, bitmap_intersect(user_id) from (select tag, date, bitmap_union(user_id) user_id from table where date in ('2020-05-18', '2020-05-19') group by tag, date) a group by tag;
```

和 bitmap_to_string 函数组合使用可以获取交集的具体数据

```
求今天和昨天不同 tag 下留存的用户都是哪些
mysql> select tag, bitmap_to_string(bitmap_intersect(user_id)) from (select tag, date, bitmap_union(user_id) user_id from table where date in ('2020-05-18', '2020-05-19') group by tag, date) a group by tag;
```

## keyword

BITMAP_INTERSECT, BITMAP
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
---
{
"title": "bitmap_union",
"language": "zh-CN"
}
---

<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->

# bitmap_union
## description

聚合函数,用于计算分组后的 bitmap 并集。常见使用场景如:计算PV,UV。

### Syntax

`BITMAP BITMAP_UNION(BITMAP value)`

输入一组 bitmap 值,求这一组 bitmap 值的并集,并返回。

## example

```
mysql> select page_id, bitmap_union(user_id) from table group by page_id;
```

和 bitmap_count 函数组合使用可以求得网页的 PV 数据

```
mysql> select page_id, bitmap_count(bitmap_union(user_id)) from table group by page_id;
```

当 user_id 字段为 int 时,上面查询语义等同于

```
mysql> select page_id, count(distinct user_id) from table group by page_id;
```

## keyword

BITMAP_UNION, BITMAP
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,8 @@ private void analyzeBuiltinAggFunction(Analyzer analyzer) throws AnalysisExcepti

if (fnName.getFunction().equalsIgnoreCase(FunctionSet.BITMAP_COUNT)
|| fnName.getFunction().equalsIgnoreCase(FunctionSet.BITMAP_UNION)
|| fnName.getFunction().equalsIgnoreCase(FunctionSet.BITMAP_UNION_COUNT)) {
|| fnName.getFunction().equalsIgnoreCase(FunctionSet.BITMAP_UNION_COUNT)
|| fnName.getFunction().equalsIgnoreCase(FunctionSet.BITMAP_INTERSECT)) {
if (children.size() != 1) {
throw new AnalysisException(fnName + " function could only have one child");
}
Expand Down
Loading