Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow to use a string as a delimiter in StringAppendOperator #8536

Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
### Performance Improvements
* Try to avoid updating DBOptions if `SetDBOptions()` does not change any option value.

### Public API change
east825 marked this conversation as resolved.
Show resolved Hide resolved
* The constructor of `StringAppendOperator` and the factory method `MergeOperators::CreateStringAppendOperator` additionally accept a string as the delimiter.

## 6.23.0 (2021-07-16)
### Behavior Changes
* Obsolete keys in the bottommost level that were preserved for a snapshot will now be cleaned upon snapshot release in all cases. This form of compaction (snapshot release triggered compaction) previously had an artificial limitation that multiple tombstones needed to be present.
Expand Down
16 changes: 15 additions & 1 deletion java/rocksjni/merge_operator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
* Method: newSharedStringAppendOperator
* Signature: (C)J
*/
jlong Java_org_rocksdb_StringAppendOperator_newSharedStringAppendOperator(
jlong Java_org_rocksdb_StringAppendOperator_newSharedStringAppendOperator__C(
JNIEnv* /*env*/, jclass /*jclazz*/, jchar jdelim) {
auto* sptr_string_append_op =
new std::shared_ptr<ROCKSDB_NAMESPACE::MergeOperator>(
Expand All @@ -39,6 +39,20 @@ jlong Java_org_rocksdb_StringAppendOperator_newSharedStringAppendOperator(
return reinterpret_cast<jlong>(sptr_string_append_op);
}

jlong Java_org_rocksdb_StringAppendOperator_newSharedStringAppendOperator__Ljava_lang_String_2(
JNIEnv* env, jclass /*jclass*/, jstring jdelim) {
jboolean has_exception = JNI_FALSE;
auto delim =
ROCKSDB_NAMESPACE::JniUtil::copyStdString(env, jdelim, &has_exception);
if (has_exception == JNI_TRUE) {
return 0;
}
auto* sptr_string_append_op =
new std::shared_ptr<ROCKSDB_NAMESPACE::MergeOperator>(
ROCKSDB_NAMESPACE::MergeOperators::CreateStringAppendOperator(delim));
return reinterpret_cast<jlong>(sptr_string_append_op);
}

/*
* Class: org_rocksdb_StringAppendOperator
* Method: disposeInternal
Expand Down
5 changes: 5 additions & 0 deletions java/src/main/java/org/rocksdb/StringAppendOperator.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ public StringAppendOperator(char delim) {
super(newSharedStringAppendOperator(delim));
}

public StringAppendOperator(String delim) {
super(newSharedStringAppendOperator(delim));
}

private native static long newSharedStringAppendOperator(final char delim);
private native static long newSharedStringAppendOperator(final String delim);
@Override protected final native void disposeInternal(final long handle);
}
26 changes: 26 additions & 0 deletions java/src/test/java/org/rocksdb/MergeTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,32 @@ public void uint64AddOperatorGcBehaviour()
}
}

@Test
public void emptyStringAsStringAppendDelimiter() throws RocksDBException {
try (final StringAppendOperator stringAppendOperator = new StringAppendOperator("");
final Options opt =
new Options().setCreateIfMissing(true).setMergeOperator(stringAppendOperator);
final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) {
db.put("key".getBytes(), "aa".getBytes());
db.merge("key".getBytes(), "bb".getBytes());
final byte[] value = db.get("key".getBytes());
assertThat(new String(value)).isEqualTo("aabb");
}
}

@Test
public void multiCharStringAsStringAppendDelimiter() throws RocksDBException {
try (final StringAppendOperator stringAppendOperator = new StringAppendOperator("<>");
final Options opt =
new Options().setCreateIfMissing(true).setMergeOperator(stringAppendOperator);
final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) {
db.put("key".getBytes(), "aa".getBytes());
db.merge("key".getBytes(), "bb".getBytes());
final byte[] value = db.get("key".getBytes());
assertThat(new String(value)).isEqualTo("aa<>bb");
}
}

@Test
public void emptyStringInSetMergeOperatorByName() {
try (final Options opt = new Options()
Expand Down
2 changes: 2 additions & 0 deletions utilities/merge_operators.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ class MergeOperators {
static std::shared_ptr<MergeOperator> CreateUInt64AddOperator();
static std::shared_ptr<MergeOperator> CreateStringAppendOperator();
static std::shared_ptr<MergeOperator> CreateStringAppendOperator(char delim_char);
static std::shared_ptr<MergeOperator> CreateStringAppendOperator(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this is a behavior change of StringAppend merge operator, also mention this change in HISTORY.md.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@zhichao-cao This header is not in the public API. Should it still be mentioned in HISTORY or not?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest to mention it in the HISTORY.md, since this merge operator is used by many users and they should be area of this behavior change. So mention it in the HISTORY.md would be better.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I removed the mention of this specific header from HISTORY.md, leaving just StringAppendOperator itself.

const std::string& delim);
static std::shared_ptr<MergeOperator> CreateStringAppendTESTOperator();
static std::shared_ptr<MergeOperator> CreateMaxOperator();
static std::shared_ptr<MergeOperator> CreateBytesXOROperator();
Expand Down
17 changes: 12 additions & 5 deletions utilities/merge_operators/string_append/stringappend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@ namespace ROCKSDB_NAMESPACE {

// Constructor: also specify the delimiter character.
StringAppendOperator::StringAppendOperator(char delim_char)
: delim_(delim_char) {
}
: delim_(1, delim_char) {}

StringAppendOperator::StringAppendOperator(const std::string& delim)
: delim_(delim) {}

// Implementation for the merge operation (concatenates two strings)
bool StringAppendOperator::Merge(const Slice& /*key*/,
Expand All @@ -35,9 +37,9 @@ bool StringAppendOperator::Merge(const Slice& /*key*/,
} else {
// Generic append (existing_value != null).
// Reserve *new_value to correct size, and apply concatenation.
new_value->reserve(existing_value->size() + 1 + value.size());
new_value->assign(existing_value->data(),existing_value->size());
new_value->append(1,delim_);
new_value->reserve(existing_value->size() + delim_.size() + value.size());
new_value->assign(existing_value->data(), existing_value->size());
new_value->append(delim_);
new_value->append(value.data(), value.size());
}

Expand All @@ -56,4 +58,9 @@ std::shared_ptr<MergeOperator> MergeOperators::CreateStringAppendOperator(char d
return std::make_shared<StringAppendOperator>(delim_char);
}

std::shared_ptr<MergeOperator> MergeOperators::CreateStringAppendOperator(
const std::string& delim) {
return std::make_shared<StringAppendOperator>(delim);
}

} // namespace ROCKSDB_NAMESPACE
4 changes: 2 additions & 2 deletions utilities/merge_operators/string_append/stringappend.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class StringAppendOperator : public AssociativeMergeOperator {
public:
// Constructor: specify delimiter
explicit StringAppendOperator(char delim_char);
explicit StringAppendOperator(const std::string& delim);

virtual bool Merge(const Slice& key,
const Slice* existing_value,
Expand All @@ -24,8 +25,7 @@ class StringAppendOperator : public AssociativeMergeOperator {
virtual const char* Name() const override;

private:
char delim_; // The delimiter is inserted between elements

std::string delim_; // The delimiter is inserted between elements
};

} // namespace ROCKSDB_NAMESPACE
16 changes: 9 additions & 7 deletions utilities/merge_operators/string_append/stringappend2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@ namespace ROCKSDB_NAMESPACE {

// Constructor: also specify the delimiter character.
StringAppendTESTOperator::StringAppendTESTOperator(char delim_char)
: delim_(delim_char) {
}
: delim_(1, delim_char) {}

StringAppendTESTOperator::StringAppendTESTOperator(const std::string& delim)
: delim_(delim) {}

// Implementation for the merge operation (concatenates two strings)
bool StringAppendTESTOperator::FullMergeV2(
Expand All @@ -37,7 +39,7 @@ bool StringAppendTESTOperator::FullMergeV2(
size_t numBytes = 0;
for (auto it = merge_in.operand_list.begin();
it != merge_in.operand_list.end(); ++it) {
numBytes += it->size() + 1; // Plus 1 for the delimiter
numBytes += it->size() + delim_.size();
}

// Only print the delimiter after the first entry has been printed
Expand All @@ -51,14 +53,14 @@ bool StringAppendTESTOperator::FullMergeV2(
printDelim = true;
} else if (numBytes) {
merge_out->new_value.reserve(
numBytes - 1); // Minus 1 since we have one less delimiter
numBytes - delim_.size()); // Adjust for one less delimiter
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need a simple explanation for why we minus delim_.size()

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We want to reserve the space for the original value and N following operands with preceding delimiters. The size of "N following operands with preceding delimiters" is stored as numBytes. However, if there is no original value, the very first delimiter becomes redundant, therefore we reduce the amount of space needed by the size of one delimiter.

For instance, if the original value was 'foo' and operands 'bar', 'baz' with delimiter ',', we would need 3 + numBytes = 3 + 2 * (3 + 1) = 11 bytes, however if there is no original value, we need only 0 + numBytes - 1 = 7 bytes.
This adjustment just takes into account that a delimiter can now be of an arbitrary length, not only one byte.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! Can you add these as the comment to the code?

}

// Concatenate the sequence of strings (and add a delimiter between each)
for (auto it = merge_in.operand_list.begin();
it != merge_in.operand_list.end(); ++it) {
if (printDelim) {
merge_out->new_value.append(1, delim_);
merge_out->new_value.append(delim_);
}
merge_out->new_value.append(it->data(), it->size());
printDelim = true;
Expand Down Expand Up @@ -89,15 +91,15 @@ bool StringAppendTESTOperator::_AssocPartialMergeMulti(
for (const auto& operand : operand_list) {
size += operand.size();
}
size += operand_list.size() - 1; // Delimiters
size += (operand_list.size() - 1) * delim_.length(); // Delimiters
new_value->reserve(size);

// Apply concatenation
new_value->assign(operand_list.front().data(), operand_list.front().size());

for (std::deque<Slice>::const_iterator it = operand_list.begin() + 1;
it != operand_list.end(); ++it) {
new_value->append(1, delim_);
new_value->append(delim_);
new_value->append(it->data(), it->size());
}

Expand Down
4 changes: 2 additions & 2 deletions utilities/merge_operators/string_append/stringappend2.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class StringAppendTESTOperator : public MergeOperator {
public:
// Constructor with delimiter
explicit StringAppendTESTOperator(char delim_char);
explicit StringAppendTESTOperator(const std::string& delim);

virtual bool FullMergeV2(const MergeOperationInput& merge_in,
MergeOperationOutput* merge_out) const override;
Expand All @@ -42,8 +43,7 @@ class StringAppendTESTOperator : public MergeOperator {
const std::deque<Slice>& operand_list,
std::string* new_value, Logger* logger) const;

char delim_; // The delimiter is inserted between elements

std::string delim_; // The delimiter is inserted between elements
};

} // namespace ROCKSDB_NAMESPACE
Loading