Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ignore async_io ReadOption if FileSystem doesn't support it #11296

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
## Unreleased
### Behavior changes
* Compaction output file cutting logic now considers range tombstone start keys. For example, SST partitioner now may receive ParitionRequest for range tombstone start keys.
* If the async_io ReadOption is specified for MultiGet or NewIterator on a platform that doesn't support IO uring, the option is ignored and synchronous IO is used.

### Bug Fixes
* Fixed an issue for backward iteration when user defined timestamp is enabled in combination with BlobDB.
Expand Down
3 changes: 3 additions & 0 deletions db/arena_wrapped_db_iter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ void ArenaWrappedDBIter::Init(
read_options_ = read_options;
allow_refresh_ = allow_refresh;
memtable_range_tombstone_iter_ = nullptr;
if (!env->GetFileSystem()->use_async_io()) {
read_options_.async_io = false;
}
}

Status ArenaWrappedDBIter::Refresh() {
Expand Down
29 changes: 16 additions & 13 deletions db/db_basic_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2302,9 +2302,7 @@ TEST_P(DBMultiGetAsyncIOTest, GetFromL0) {
ASSERT_EQ(multiget_io_batch_size.count, 3);
}
#else // ROCKSDB_IOURING_PRESENT
if (GetParam()) {
ASSERT_EQ(statistics()->getTickerCount(MULTIGET_COROUTINE_COUNT), 3);
}
ASSERT_EQ(statistics()->getTickerCount(MULTIGET_COROUTINE_COUNT), 0);
#endif // ROCKSDB_IOURING_PRESENT
}

Expand Down Expand Up @@ -2338,16 +2336,18 @@ TEST_P(DBMultiGetAsyncIOTest, GetFromL1) {
ASSERT_EQ(values[1], "val_l1_" + std::to_string(54));
ASSERT_EQ(values[2], "val_l1_" + std::to_string(102));

#ifdef ROCKSDB_IOURING_PRESENT
HistogramData multiget_io_batch_size;

statistics()->histogramData(MULTIGET_IO_BATCH_SIZE, &multiget_io_batch_size);

#ifdef ROCKSDB_IOURING_PRESENT
// A batch of 3 async IOs is expected, one for each overlapping file in L1
ASSERT_EQ(multiget_io_batch_size.count, 1);
ASSERT_EQ(multiget_io_batch_size.max, 3);
#endif // ROCKSDB_IOURING_PRESENT
ASSERT_EQ(statistics()->getTickerCount(MULTIGET_COROUTINE_COUNT), 3);
#else // ROCKSDB_IOURING_PRESENT
ASSERT_EQ(statistics()->getTickerCount(MULTIGET_COROUTINE_COUNT), 0);
#endif // ROCKSDB_IOURING_PRESENT
}

#ifdef ROCKSDB_IOURING_PRESENT
Expand Down Expand Up @@ -2531,8 +2531,12 @@ TEST_P(DBMultiGetAsyncIOTest, GetFromL2WithRangeOverlapL0L1) {
ASSERT_EQ(values[0], "val_l2_" + std::to_string(19));
ASSERT_EQ(values[1], "val_l2_" + std::to_string(26));

#ifdef ROCKSDB_IOURING_PRESENT
// Bloom filters in L0/L1 will avoid the coroutine calls in those levels
ASSERT_EQ(statistics()->getTickerCount(MULTIGET_COROUTINE_COUNT), 2);
#else // ROCKSDB_IOURING_PRESENT
ASSERT_EQ(statistics()->getTickerCount(MULTIGET_COROUTINE_COUNT), 0);
#endif // ROCKSDB_IOURING_PRESENT
}

#ifdef ROCKSDB_IOURING_PRESENT
Expand Down Expand Up @@ -2623,18 +2627,17 @@ TEST_P(DBMultiGetAsyncIOTest, GetNoIOUring) {
dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(),
keys.data(), values.data(), statuses.data());
ASSERT_EQ(values.size(), 3);
ASSERT_EQ(statuses[0], Status::NotSupported());
ASSERT_EQ(statuses[1], Status::NotSupported());
ASSERT_EQ(statuses[2], Status::NotSupported());
ASSERT_EQ(statuses[0], Status::OK());
ASSERT_EQ(statuses[1], Status::OK());
ASSERT_EQ(statuses[2], Status::OK());

HistogramData multiget_io_batch_size;
HistogramData async_read_bytes;

statistics()->histogramData(MULTIGET_IO_BATCH_SIZE, &multiget_io_batch_size);
statistics()->histogramData(ASYNC_READ_BYTES, &async_read_bytes);

// A batch of 3 async IOs is expected, one for each overlapping file in L1
ASSERT_EQ(multiget_io_batch_size.count, 1);
ASSERT_EQ(multiget_io_batch_size.max, 3);
ASSERT_EQ(statistics()->getTickerCount(MULTIGET_COROUTINE_COUNT), 3);
ASSERT_EQ(async_read_bytes.count, 0);
ASSERT_EQ(statistics()->getTickerCount(MULTIGET_COROUTINE_COUNT), 0);
}

INSTANTIATE_TEST_CASE_P(DBMultiGetAsyncIOTest, DBMultiGetAsyncIOTest,
Expand Down
3 changes: 3 additions & 0 deletions db/forward_iterator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,9 @@ ForwardIterator::ForwardIterator(DBImpl* db, const ReadOptions& read_options,
if (sv_) {
RebuildIterators(false);
}
if (!cfd_->ioptions()->env->GetFileSystem()->use_async_io()) {
read_options_.async_io = false;
}

// immutable_status_ is a local aggregation of the
// status of the immutable Iterators.
Expand Down
2 changes: 1 addition & 1 deletion db/forward_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ class ForwardIterator : public InternalIterator {
void DeleteIterator(InternalIterator* iter, bool is_arena = false);

DBImpl* const db_;
const ReadOptions read_options_;
ReadOptions read_options_;
ColumnFamilyData* const cfd_;
const SliceTransform* const prefix_extractor_;
const Comparator* user_comparator_;
Expand Down
7 changes: 4 additions & 3 deletions db/version_set.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2121,7 +2121,8 @@ Version::Version(ColumnFamilyData* column_family_data, VersionSet* vset,
max_file_size_for_l0_meta_pin_(
MaxFileSizeForL0MetaPin(mutable_cf_options_)),
version_number_(version_number),
io_tracer_(io_tracer) {}
io_tracer_(io_tracer),
use_async_io_(env_->GetFileSystem()->use_async_io()) {}

Status Version::GetBlob(const ReadOptions& read_options, const Slice& user_key,
const Slice& blob_index_slice,
Expand Down Expand Up @@ -2505,7 +2506,7 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
MultiGetRange keys_with_blobs_range(*range, range->begin(), range->end());
#if USE_COROUTINES
if (read_options.async_io && read_options.optimize_multiget_for_io &&
using_coroutines()) {
using_coroutines() && use_async_io_) {
s = MultiGetAsync(read_options, range, &blob_ctxs);
} else
#endif // USE_COROUTINES
Expand All @@ -2531,7 +2532,7 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
// Avoid using the coroutine version if we're looking in a L0 file, since
// L0 files won't be parallelized anyway. The regular synchronous version
// is faster.
if (!read_options.async_io || !using_coroutines() ||
if (!read_options.async_io || !using_coroutines() || !use_async_io_ ||
fp.GetHitFileLevel() == 0 || !fp.RemainingOverlapInLevel()) {
if (f) {
bool skip_filters =
Expand Down
1 change: 1 addition & 0 deletions db/version_set.h
Original file line number Diff line number Diff line change
Expand Up @@ -1075,6 +1075,7 @@ class Version {
// used for debugging and logging purposes only.
uint64_t version_number_;
std::shared_ptr<IOTracer> io_tracer_;
bool use_async_io_;

Version(ColumnFamilyData* cfd, VersionSet* vset, const FileOptions& file_opt,
MutableCFOptions mutable_cf_options,
Expand Down
8 changes: 8 additions & 0 deletions env/fs_posix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1183,6 +1183,14 @@ class PosixFileSystem : public FileSystem {
#endif
}

bool use_async_io() override {
#if defined(ROCKSDB_IOURING_PRESENT)
return IsIOUringEnabled();
#else
return false;
#endif
}

#if defined(ROCKSDB_IOURING_PRESENT)
// io_uring instance
std::unique_ptr<ThreadLocalPtr> thread_local_io_urings_;
Expand Down
Loading