Skip to content
This repository has been archived by the owner on Sep 18, 2023. It is now read-only.

Commit

Permalink
[NSE-1104] fix hashagg w/ empty string (#1099)
Browse files Browse the repository at this point in the history
* fix hashagg w/ empty string

Signed-off-by: Yuan Zhou <yuan.zhou@intel.com>

* adding empty bits in unsafe row

Signed-off-by: Yuan Zhou <yuan.zhou@intel.com>

* fix ut

Signed-off-by: Yuan Zhou <yuan.zhou@intel.com>

Signed-off-by: Yuan Zhou <yuan.zhou@intel.com>
  • Loading branch information
zhouyuan authored Sep 8, 2022
1 parent 0113015 commit 3ee659c
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,21 @@ class NativeSQLConvertedSuite extends QueryTest
Row("val1a", Timestamp.valueOf("2014-04-04 01:02:00.001"))))
}

test("groupby with empty string") {
Seq[(String, Integer, String, String)](
("9", 1, "", "20220608"),
("9", 2, "20220608", ""),
("9", 2, "20220608", ""),
("9", 1, "20220608", ""),
("9", 2, "20220608", ""),
("9", 1, "20220608", ""),
("9", null, "20220608", ""),
("9", 3, "20220608", "")).toDF("a", "b", "c", "d").createOrReplaceTempView("testData")

val df1 = sql( "SELECT a,c,d, COUNT(*) FROM testData group by a, c, d")
checkAnswer(df1, Seq(Row("9","", "20220608", 1), Row("9","20220608","", 7)))
}

test("groupby") {
Seq[(Integer, java.lang.Boolean)](
(1, true),
Expand Down
1 change: 1 addition & 0 deletions native-sql-engine/cpp/src/precompile/unsafe_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ class TypedUnsafeArray<DataType, enable_if_string_like<DataType>> : public Unsaf
appendToUnsafeRow((*unsafe_row).get(), idx_, *(int64_t*)(v.data()));
break;
default:
// empty string will go here
appendToUnsafeRow((*unsafe_row).get(), idx_, v);
}
}
Expand Down
20 changes: 17 additions & 3 deletions native-sql-engine/cpp/src/third_party/row_wise_memory/unsafe_row.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,14 @@ struct UnsafeRow {
char* data = nullptr;
int cursor;
int validity_size;
int is_empty_size;
UnsafeRow() {}
UnsafeRow(int numFields) : numFields(numFields) {
validity_size = (numFields / 8) + 1;
cursor = validity_size;
is_empty_size = (numFields / 8) + 1;
cursor = validity_size + is_empty_size;
data = (char*)nativeMalloc(TEMP_UNSAFEROW_BUFFER_SIZE, MEMTYPE_ROW);
memset(data, 0, validity_size);
memset(data, 0, validity_size + is_empty_size);
}
~UnsafeRow() {
if (data) {
Expand All @@ -61,8 +63,10 @@ struct UnsafeRow {
}
int sizeInBytes() { return cursor; }
void reset() {
validity_size = (numFields / 8) + 1;
is_empty_size = (numFields / 8) + 1;
memset(data, 0, cursor);
cursor = validity_size;
cursor = validity_size + is_empty_size;
}
bool isNullExists() {
for (int i = 0; i < ((numFields / 8) + 1); i++) {
Expand Down Expand Up @@ -99,6 +103,12 @@ static inline void setNullAt(UnsafeRow* row, int index) {
*(row->data + bitSetIdx) |= kBitmask[index % 8];
}

static inline void setEmptyAt(UnsafeRow* row, int index) {
assert((index >= 0) && (index < row->numFields));
auto bitSetIdx = index >> 3; // mod 8
*(row->data + row->validity_size + bitSetIdx) |= kBitmask[index % 8];
}

template <typename T>
using is_number_alike =
std::integral_constant<bool, std::is_arithmetic<T>::value ||
Expand All @@ -115,6 +125,10 @@ static inline void appendToUnsafeRow(UnsafeRow* row, const int& index, const T&

static inline void appendToUnsafeRow(UnsafeRow* row, const int& index,
arrow::util::string_view str) {
if (unlikely(str.size() == 0)) {
setEmptyAt(row, index);
return;
}
if (unlikely(row->cursor + str.size() > TEMP_UNSAFEROW_BUFFER_SIZE))
row->data =
(char*)nativeRealloc(row->data, 2 * TEMP_UNSAFEROW_BUFFER_SIZE, MEMTYPE_ROW);
Expand Down

0 comments on commit 3ee659c

Please sign in to comment.