From 12eab540562c1d27e042d83d48db8d7fd2deffa7 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Wed, 16 Aug 2023 16:18:22 +0000 Subject: [PATCH 1/8] Add tests for disk failure * Add tests to verify handling a disk failure while initializing cache * Add tests to verify loading of existing cache volumes * Add a way to inject I/O errors in AIO for tests. This is compiled out by default. --- CMakeLists.txt | 4 + configure.ac | 19 +++ iocore/aio/AIO.cc | 14 ++ iocore/aio/AIO_fault_injection.cc | 122 ++++++++++++++++++ iocore/aio/AIO_fault_injection.h | 72 +++++++++++ iocore/aio/CMakeLists.txt | 3 +- iocore/cache/Cache.cc | 14 +- iocore/cache/Makefile.am | 28 ++++ iocore/cache/test/main.cc | 23 +++- iocore/cache/test/main.h | 3 + iocore/cache/test/storage.config | Bin 24 -> 45 bytes iocore/cache/test/test_Alternate_L_to_S.cc | 2 + .../test/test_Alternate_L_to_S_remove_L.cc | 2 + .../test/test_Alternate_L_to_S_remove_S.cc | 2 + iocore/cache/test/test_Alternate_S_to_L.cc | 2 + .../test/test_Alternate_S_to_L_remove_L.cc | 2 + .../test/test_Alternate_S_to_L_remove_S.cc | 2 + iocore/cache/test/test_Cache.cc | 2 + iocore/cache/test/test_Disk_Failure.cc | 67 ++++++++++ iocore/cache/test/test_Disk_Failure_2.cc | 67 ++++++++++ iocore/cache/test/test_Populated_Cache.cc | 57 ++++++++ .../test/test_Populated_Cache_Disk_Failure.cc | 62 +++++++++ iocore/cache/test/test_RWW.cc | 2 + iocore/cache/test/test_Update_L_to_S.cc | 2 + iocore/cache/test/test_Update_S_to_L.cc | 2 + iocore/cache/test/test_Update_header.cc | 2 + src/tests/CMakeLists.txt | 6 + 27 files changed, 580 insertions(+), 3 deletions(-) create mode 100644 iocore/aio/AIO_fault_injection.cc create mode 100644 iocore/aio/AIO_fault_injection.h create mode 100644 iocore/cache/test/test_Disk_Failure.cc create mode 100644 iocore/cache/test/test_Disk_Failure_2.cc create mode 100644 iocore/cache/test/test_Populated_Cache.cc create mode 100644 iocore/cache/test/test_Populated_Cache_Disk_Failure.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index 3d1ec272f51..2f1064b5748 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -56,6 +56,10 @@ option(ENABLE_JEMALLOC "Use jemalloc (default OFF)") option(ENABLE_LUAJIT "Use LuaJIT (default OFF)") option(ENABLE_MIMALLOC "Use mimalloc (default OFF)") option(ENABLE_DOCS "Build docs (default OFF)") +option(ENABLE_DISK_FAILURE_TESTS "Build disk failure tests (enables AIO fault injection, default OFF)" OFF) +if(ENABLE_DISK_FAILURE_TESTS) + add_compile_definitions("AIO_FAULT_INJECTION") +endif() if(CMAKE_SYSTEM_NAME STREQUAL Linux) set(DEFAULT_POSIX_CAP ON) diff --git a/configure.ac b/configure.ac index e2b5ad7e72e..9b84930f59a 100644 --- a/configure.ac +++ b/configure.ac @@ -592,6 +592,19 @@ AC_ARG_ENABLE([32bit-build], ) AC_MSG_RESULT([$enable_32bit]) +# +# Check if disk failure handling tests should be built. +# This means compiling disk failure simulation code into ATS. +# +AC_MSG_CHECKING([whether to build disk failure handling tests]) +AC_ARG_ENABLE([disk-failure-tests], + [AS_HELP_STRING([--enable-disk-failure-tests],[Build disk failure tests])], + [], + [enable_disk_failure_tests=no] +) +AC_MSG_RESULT([$enable_disk_failure_tests]) +AM_CONDITIONAL([ENABLE_DISK_FAILURE_TESTS], [ test "x${enable_disk_failure_tests}" = "xyes" ]) + # # Installation directories @@ -1103,6 +1116,12 @@ elif test "x${enable_tsan}" = "xstatic"; then TS_ADDTO(AM_CXXFLAGS, [-fsanitize=thread -static-libtsan]) fi +# Build for disk failure simulation +if test "x${enable_disk_failure_tests}" = "xyes"; then + TS_ADDTO(AM_CFLAGS, [-DAIO_FAULT_INJECTION]) + TS_ADDTO(AM_CXXFLAGS, [-DAIO_FAULT_INJECTION]) +fi + # Checks for pointer size. # TODO: Later this is irrelevant, and we should just bail on 32-bit platforms always AC_CHECK_SIZEOF([void*]) diff --git a/iocore/aio/AIO.cc b/iocore/aio/AIO.cc index 52d45fe002d..d8204387a77 100644 --- a/iocore/aio/AIO.cc +++ b/iocore/aio/AIO.cc @@ -32,6 +32,10 @@ #include "P_AIO.h" +#ifdef AIO_FAULT_INJECTION +#include "AIO_fault_injection.h" +#endif + #define MAX_DISKS_POSSIBLE 100 // globals @@ -442,9 +446,19 @@ cache_op(AIOCallbackInternal *op) while (a->aio_nbytes - res > 0) { do { if (read) { +#ifdef AIO_FAULT_INJECTION + err = aioFaultInjection.pread(a->aio_fildes, (static_cast(a->aio_buf)) + res, a->aio_nbytes - res, + a->aio_offset + res); +#else err = pread(a->aio_fildes, (static_cast(a->aio_buf)) + res, a->aio_nbytes - res, a->aio_offset + res); +#endif } else { +#ifdef AIO_FAULT_INJECTION + err = aioFaultInjection.pwrite(a->aio_fildes, (static_cast(a->aio_buf)) + res, a->aio_nbytes - res, + a->aio_offset + res); +#else err = pwrite(a->aio_fildes, (static_cast(a->aio_buf)) + res, a->aio_nbytes - res, a->aio_offset + res); +#endif } } while ((err < 0) && (errno == EINTR || errno == ENOBUFS || errno == ENOMEM)); if (err <= 0) { diff --git a/iocore/aio/AIO_fault_injection.cc b/iocore/aio/AIO_fault_injection.cc new file mode 100644 index 00000000000..28650f5d25c --- /dev/null +++ b/iocore/aio/AIO_fault_injection.cc @@ -0,0 +1,122 @@ +/** @file + +A mechanism to simulate disk failure by injecting faults in userspace. + +@section license License + +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#include "AIO_fault_injection.h" +#include "I_Lock.h" +#include + +AIOFaultInjection aioFaultInjection; + +void +AIOFaultInjection::_decrement_op_count(int fd) +{ + auto it = _state_by_fd.find(fd); + if (it != _state_by_fd.end()) { + it->second.op_count--; + } +} + +AIOFaultInjection::IOFault +AIOFaultInjection::_op_result(int fd) +{ + auto it = _faults_by_fd.find(fd); + if (it != _faults_by_fd.end()) { + std::size_t op_count = _state_by_fd[fd].op_count++; + auto results = it->second; + if (results.find(op_count) != results.end()) { + return results[op_count]; + } + } + + return IOFault{0, false}; +} + +void +AIOFaultInjection::inject_fault(const char *path_regex, int op_index, IOFault fault) +{ + std::lock_guard lock{_mutex}; + _faults_by_regex[path_regex][op_index] = fault; +} + +int +AIOFaultInjection::open(const char *pathname, int flags, mode_t mode) +{ + std::lock_guard lock{_mutex}; + std::filesystem::path abspath = std::filesystem::absolute(pathname); + int fd = ::open(pathname, flags, mode); + if (fd >= 0) { + for (auto &[re_str, faults] : _faults_by_regex) { + std::regex re{re_str}; + std::cmatch m; + if (std::regex_match(abspath.c_str(), m, re)) { + _faults_by_fd.insert_or_assign(fd, faults); + } + } + } + + return fd; +} + +ssize_t +AIOFaultInjection::pread(int fd, void *buf, size_t nbytes, __off_t offset) +{ + std::lock_guard lock{_mutex}; + IOFault result = _op_result(fd); + ssize_t ret = 0; + if (result.skip_io) { + ink_release_assert(result.err_no != 0); + } else { + ret = ::pread(fd, buf, nbytes, offset); + if (ret < 0 && (errno == EINTR || errno == ENOBUFS || errno == ENOMEM)) { + // caller will retry + _decrement_op_count(fd); + } + } + if (result.err_no != 0) { + errno = result.err_no; + ret = -1; + } + return ret; +} + +ssize_t +AIOFaultInjection::pwrite(int fd, const void *buf, size_t n, __off_t offset) +{ + std::lock_guard lock{_mutex}; + IOFault result = _op_result(fd); + ssize_t ret = 0; + if (result.skip_io) { + ink_release_assert(result.err_no != 0); + } else { + ret = ::pwrite(fd, buf, n, offset); + if (ret < 0 && (errno == EINTR || errno == ENOBUFS || errno == ENOMEM)) { + // caller will retry + _decrement_op_count(fd); + } + } + if (result.err_no != 0) { + errno = result.err_no; + ret = -1; + } + return ret; +} diff --git a/iocore/aio/AIO_fault_injection.h b/iocore/aio/AIO_fault_injection.h new file mode 100644 index 00000000000..735d560a4af --- /dev/null +++ b/iocore/aio/AIO_fault_injection.h @@ -0,0 +1,72 @@ +/** @file + + A mechanism to simulate disk failure by injecting faults in userspace. + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +#pragma once + +#include "I_Continuation.h" +#include "I_Lock.h" +#include "tscore/ink_assert.h" +#include "tscore/ink_mutex.h" +#include +#include +#include +#include +#include +#include +#include + +// We need a way to simulate failures determininstically to test disk +// initialization + +constexpr auto TAG{"fault"}; + +class AIOFaultInjection +{ + struct IOFault { + int err_no; + bool skip_io; + }; + using IOFaults = std::unordered_map; + + struct IOFaultState { + std::size_t op_count = 0; + }; + + std::unordered_map _faults_by_regex; + std::unordered_map _faults_by_fd; + std::unordered_map _state_by_fd; + + void _decrement_op_count(int fd); + IOFault _op_result(int fd); + + std::mutex _mutex; + +public: + void inject_fault(const char *path_regex, int op_index, IOFault fault); + + int open(const char *pathname, int flags, mode_t mode); + ssize_t pread(int fd, void *buf, size_t nbytes, __off_t offset); + ssize_t pwrite(int fd, const void *buf, size_t n, __off_t offset); +}; + +extern AIOFaultInjection aioFaultInjection; diff --git a/iocore/aio/CMakeLists.txt b/iocore/aio/CMakeLists.txt index ceb77b26d55..f39e4be2ba4 100644 --- a/iocore/aio/CMakeLists.txt +++ b/iocore/aio/CMakeLists.txt @@ -19,7 +19,8 @@ add_library(aio STATIC) add_library(ts::aio ALIAS aio) -target_sources(aio PRIVATE AIO.cc Inline.cc) +target_sources(aio PRIVATE AIO.cc Inline.cc AIO_fault_injection.cc) + target_include_directories(aio PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} diff --git a/iocore/cache/Cache.cc b/iocore/cache/Cache.cc index 0c7778ce327..1a013dbb0fd 100644 --- a/iocore/cache/Cache.cc +++ b/iocore/cache/Cache.cc @@ -37,6 +37,10 @@ #include "tscore/hugepages.h" +#ifdef AIO_FAULT_INJECTION +#include "AIO_fault_injection.h" +#endif + #include constexpr ts::VersionNumber CACHE_DB_VERSION(CACHE_DB_MAJOR_VERSION, CACHE_DB_MINOR_VERSION); @@ -600,11 +604,19 @@ CacheProcessor::start_internal(int flags) opts |= O_RDONLY; } - int fd = open(paths[gndisks], opts, 0644); +#ifdef AIO_FAULT_INJECTION + int fd = aioFaultInjection.open(paths[gndisks], opts, 0644); +#else + int fd = open(paths[gndisks], opts, 0644); +#endif int64_t blocks = sd->blocks; if (fd < 0 && (opts & O_CREAT)) { // Try without O_DIRECT if this is a file on filesystem, e.g. tmpfs. +#ifdef AIO_FAULT_INJECTION + fd = aioFaultInjection.open(paths[gndisks], DEFAULT_CACHE_OPTIONS | O_CREAT, 0644); +#else fd = open(paths[gndisks], DEFAULT_CACHE_OPTIONS | O_CREAT, 0644); +#endif } if (fd >= 0) { diff --git a/iocore/cache/Makefile.am b/iocore/cache/Makefile.am index 34e65c2bbc3..7bccc002533 100644 --- a/iocore/cache/Makefile.am +++ b/iocore/cache/Makefile.am @@ -134,6 +134,34 @@ check_PROGRAMS = \ test_Update_S_to_L \ test_Update_header +if ENABLE_DISK_FAILURE_TESTS +check_PROGRAMS += \ + test_Disk_Failure \ + test_Disk_Failure_2 \ + test_Populated_Cache_Disk_Failure + +test_Disk_Failure_CPPFLAGS = $(test_CPPFLAGS) +test_Disk_Failure_LDFLAGS = @AM_LDFLAGS@ +test_Disk_Failure_LDADD = $(test_LDADD) +test_Disk_Failure_SOURCES = \ + $(test_main_SOURCES) \ + ./test/test_Disk_Failure.cc + +test_Disk_Failure_2_CPPFLAGS = $(test_CPPFLAGS) +test_Disk_Failure_2_LDFLAGS = @AM_LDFLAGS@ +test_Disk_Failure_2_LDADD = $(test_LDADD) +test_Disk_Failure_2_SOURCES = \ + $(test_main_SOURCES) \ + ./test/test_Disk_Failure_2.cc + +test_Populated_Cache_Disk_Failure_CPPFLAGS = $(test_CPPFLAGS) +test_Populated_Cache_Disk_Failure_LDFLAGS = @AM_LDFLAGS@ +test_Populated_Cache_Disk_Failure_LDADD = $(test_LDADD) +test_Populated_Cache_Disk_Failure_SOURCES = \ + $(test_main_SOURCES) \ + ./test/test_Populated_Cache_Disk_Failure.cc +endif + test_main_SOURCES = \ ./test/main.cc \ ./test/stub.cc \ diff --git a/iocore/cache/test/main.cc b/iocore/cache/test/main.cc index c3f5042a825..0494e75271d 100644 --- a/iocore/cache/test/main.cc +++ b/iocore/cache/test/main.cc @@ -21,6 +21,8 @@ limitations under the License. */ +#include "tscore/ink_config.h" +#include #define CATCH_CONFIG_MAIN #include "main.h" @@ -55,10 +57,25 @@ temp_prefix() Dbg(dbg_ctl_cache_test, "Failed to create directories for test: %s(%s)", prefix.c_str(), err.message().c_str()); } ink_assert(result); + result = swoc::file::create_directories(prefix / "var" / "trafficserver2", err, 0755); + if (!result) { + Dbg(dbg_ctl_cache_test, "Failed to create directories for test: %s(%s)", prefix.c_str(), err.message().c_str()); + } + ink_assert(result); return prefix.string(); } +// Populate the temporary directory with pre-made cache files +static void +populate_cache(const swoc::file::path &prefix) +{ + swoc::file::path src_path{TS_ABS_TOP_SRCDIR}; + std::error_code ec; + swoc::file::copy(src_path / "iocore/cache/test/var/trafficserver/cache.db", prefix / "var/trafficserver/", ec); + swoc::file::copy(src_path / "iocore/cache/test/var/trafficserver2/cache.db", prefix / "var/trafficserver2/", ec); +} + void test_done() { @@ -106,7 +123,11 @@ struct EventProcessorListener : Catch::TestEventListenerBase { diags()->show_location = SHOW_LOCATION_DEBUG; mime_init(); - Layout::create(temp_prefix()); + swoc::file::path prefix = temp_prefix(); + Layout::create(prefix.view()); + if (reuse_existing_cache) { + populate_cache(prefix); + } RecProcessInit(); LibRecordsConfigInit(); ink_net_init(ts::ModuleVersion(1, 0, ts::ModuleVersion::PRIVATE)); diff --git a/iocore/cache/test/main.h b/iocore/cache/test/main.h index 6ceb55e6f9c..547607f6023 100644 --- a/iocore/cache/test/main.h +++ b/iocore/cache/test/main.h @@ -233,3 +233,6 @@ class CacheReadTest : public CacheTestBase IOBufferReader *_reader = nullptr; OverridableHttpConfigParams params; }; + +// Does the test use stored cache files, or initialize new files? +extern bool reuse_existing_cache; diff --git a/iocore/cache/test/storage.config b/iocore/cache/test/storage.config index d86334e356c2d92d5962228d3fe1485382e59044..e3c34e9153d9e4bd19595d6e1022cece8a07234a 100644 GIT binary patch literal 45 jcmXR*EYdG2N=!@3OfF6>DoZU=FgEh#Dnl1F0t*8GfNl@R literal 24 fcmXR*EYdG2N=!@3OfF6>DoZU=FfujsW#9q;a-|3Z diff --git a/iocore/cache/test/test_Alternate_L_to_S.cc b/iocore/cache/test/test_Alternate_L_to_S.cc index b3f263884da..1604b664321 100644 --- a/iocore/cache/test/test_Alternate_L_to_S.cc +++ b/iocore/cache/test/test_Alternate_L_to_S.cc @@ -24,6 +24,8 @@ #define LARGE_FILE 10 * 1024 * 1024 #define SMALL_FILE 10 * 1024 +bool reuse_existing_cache = false; + #include "main.h" class CacheAltReadAgain : public CacheTestHandler diff --git a/iocore/cache/test/test_Alternate_L_to_S_remove_L.cc b/iocore/cache/test/test_Alternate_L_to_S_remove_L.cc index 5abac622335..0245db28f32 100644 --- a/iocore/cache/test/test_Alternate_L_to_S_remove_L.cc +++ b/iocore/cache/test/test_Alternate_L_to_S_remove_L.cc @@ -26,6 +26,8 @@ #include "main.h" +bool reuse_existing_cache = false; + // delete dir Dir dir = {}; diff --git a/iocore/cache/test/test_Alternate_L_to_S_remove_S.cc b/iocore/cache/test/test_Alternate_L_to_S_remove_S.cc index 31e9ce812d8..1f14f058958 100644 --- a/iocore/cache/test/test_Alternate_L_to_S_remove_S.cc +++ b/iocore/cache/test/test_Alternate_L_to_S_remove_S.cc @@ -26,6 +26,8 @@ #include "main.h" +bool reuse_existing_cache = false; + // delete dir Dir dir = {}; diff --git a/iocore/cache/test/test_Alternate_S_to_L.cc b/iocore/cache/test/test_Alternate_S_to_L.cc index 9a67488406d..f8fcf573e45 100644 --- a/iocore/cache/test/test_Alternate_S_to_L.cc +++ b/iocore/cache/test/test_Alternate_S_to_L.cc @@ -26,6 +26,8 @@ #include "main.h" +bool reuse_existing_cache = false; + class CacheAltReadAgain : public CacheTestHandler { public: diff --git a/iocore/cache/test/test_Alternate_S_to_L_remove_L.cc b/iocore/cache/test/test_Alternate_S_to_L_remove_L.cc index 27a5a39335f..61ee475fd64 100644 --- a/iocore/cache/test/test_Alternate_S_to_L_remove_L.cc +++ b/iocore/cache/test/test_Alternate_S_to_L_remove_L.cc @@ -26,6 +26,8 @@ #include "main.h" +bool reuse_existing_cache = false; + // delete dir Dir dir = {}; diff --git a/iocore/cache/test/test_Alternate_S_to_L_remove_S.cc b/iocore/cache/test/test_Alternate_S_to_L_remove_S.cc index b2d275608ed..bc4bddea242 100644 --- a/iocore/cache/test/test_Alternate_S_to_L_remove_S.cc +++ b/iocore/cache/test/test_Alternate_S_to_L_remove_S.cc @@ -26,6 +26,8 @@ #include "main.h" +bool reuse_existing_cache = false; + // delete dir Dir dir = {}; diff --git a/iocore/cache/test/test_Cache.cc b/iocore/cache/test/test_Cache.cc index 63284f51c54..aa30cef345c 100644 --- a/iocore/cache/test/test_Cache.cc +++ b/iocore/cache/test/test_Cache.cc @@ -26,6 +26,8 @@ #define LARGE_FILE 10 * 1024 * 1024 #define SMALL_FILE 10 * 1024 +bool reuse_existing_cache = false; + class CacheCommInit : public CacheInit { public: diff --git a/iocore/cache/test/test_Disk_Failure.cc b/iocore/cache/test/test_Disk_Failure.cc new file mode 100644 index 00000000000..20a41315ffd --- /dev/null +++ b/iocore/cache/test/test_Disk_Failure.cc @@ -0,0 +1,67 @@ +/** @file + + A brief file description + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +#include "main.h" + +#define LARGE_FILE 10 * 1024 * 1024 +#define SMALL_FILE 10 * 1024 + +#ifndef AIO_FAULT_INJECTION +#error Must define AIO_FAULT_INJECTION! +#endif +#include "AIO_fault_injection.h" + +bool reuse_existing_cache = false; +extern int gndisks; + +class CacheCommInit : public CacheInit +{ +public: + CacheCommInit() {} + int + cache_init_success_callback(int event, void *e) override + { + // We initialize two disks and inject failure in one. Ensure that one disk + // remains. + REQUIRE(gndisks == 1); + CacheTestHandler *h = new CacheTestHandler(LARGE_FILE); + CacheTestHandler *h2 = new CacheTestHandler(SMALL_FILE, "http://www.scw11.com"); + TerminalTest *tt = new TerminalTest; + h->add(h2); + h->add(tt); + this_ethread()->schedule_imm(h); + delete this; + return 0; + } +}; + +TEST_CASE("Disk fail on first operation", "cache") +{ + aioFaultInjection.inject_fault(".*/var/trafficserver2/cache.db", 0, {.err_no = EIO, .skip_io = false}); + init_cache(256 * 1024 * 1024); + // large write test + CacheCommInit *init = new CacheCommInit; + + this_ethread()->schedule_imm(init); + this_thread()->execute(); +} diff --git a/iocore/cache/test/test_Disk_Failure_2.cc b/iocore/cache/test/test_Disk_Failure_2.cc new file mode 100644 index 00000000000..1d7a4b55768 --- /dev/null +++ b/iocore/cache/test/test_Disk_Failure_2.cc @@ -0,0 +1,67 @@ +/** @file + + A brief file description + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +#include "main.h" + +#define LARGE_FILE 10 * 1024 * 1024 +#define SMALL_FILE 10 * 1024 + +#ifndef AIO_FAULT_INJECTION +#error Must define AIO_FAULT_INJECTION! +#endif +#include "AIO_fault_injection.h" + +bool reuse_existing_cache = false; +extern int gndisks; + +class CacheCommInit : public CacheInit +{ +public: + CacheCommInit() {} + int + cache_init_success_callback(int event, void *e) override + { + // We initialize two disks and inject failure in one. Ensure that one disk + // remains. + REQUIRE(gndisks == 1); + CacheTestHandler *h = new CacheTestHandler(LARGE_FILE); + CacheTestHandler *h2 = new CacheTestHandler(SMALL_FILE, "http://www.scw11.com"); + TerminalTest *tt = new TerminalTest; + h->add(h2); + h->add(tt); + this_ethread()->schedule_imm(h); + delete this; + return 0; + } +}; + +TEST_CASE("Disk fail on second operation", "cache") +{ + aioFaultInjection.inject_fault(".*/var/trafficserver2/cache.db", 1, {.err_no = EIO, .skip_io = false}); + init_cache(256 * 1024 * 1024); + // large write test + CacheCommInit *init = new CacheCommInit; + + this_ethread()->schedule_imm(init); + this_thread()->execute(); +} diff --git a/iocore/cache/test/test_Populated_Cache.cc b/iocore/cache/test/test_Populated_Cache.cc new file mode 100644 index 00000000000..567f095571d --- /dev/null +++ b/iocore/cache/test/test_Populated_Cache.cc @@ -0,0 +1,57 @@ +/** @file + + A brief file description + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +#include "main.h" + +#define LARGE_FILE 10 * 1024 * 1024 +#define SMALL_FILE 10 * 1024 + +bool reuse_existing_cache = true; + +class CacheCommInit : public CacheInit +{ +public: + CacheCommInit() {} + int + cache_init_success_callback(int event, void *e) override + { + CacheTestHandler *h = new CacheTestHandler(LARGE_FILE, "http://www.example.com"); + CacheTestHandler *h2 = new CacheTestHandler(SMALL_FILE, "http://www.scw12.com"); + TerminalTest *tt = new TerminalTest; + h->add(h2); + h->add(tt); + this_ethread()->schedule_imm(h); + delete this; + return 0; + } +}; + +TEST_CASE("cache write -> read", "cache") +{ + init_cache(256 * 1024 * 1024); + // large write test + CacheCommInit *init = new CacheCommInit; + + this_ethread()->schedule_imm(init); + this_thread()->execute(); +} diff --git a/iocore/cache/test/test_Populated_Cache_Disk_Failure.cc b/iocore/cache/test/test_Populated_Cache_Disk_Failure.cc new file mode 100644 index 00000000000..897d508a084 --- /dev/null +++ b/iocore/cache/test/test_Populated_Cache_Disk_Failure.cc @@ -0,0 +1,62 @@ +/** @file + + A brief file description + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +#include "main.h" + +#define LARGE_FILE 10 * 1024 * 1024 +#define SMALL_FILE 10 * 1024 + +bool reuse_existing_cache = true; +#ifndef AIO_FAULT_INJECTION +#error Must define AIO_FAULT_INJECTION! +#endif +#include "AIO_fault_injection.h" + +class CacheCommInit : public CacheInit +{ +public: + CacheCommInit() {} + int + cache_init_success_callback(int event, void *e) override + { + CacheTestHandler *h = new CacheTestHandler(LARGE_FILE, "http://www.example.com"); + CacheTestHandler *h2 = new CacheTestHandler(SMALL_FILE, "http://www.scw12.com"); + TerminalTest *tt = new TerminalTest; + h->add(h2); + h->add(tt); + this_ethread()->schedule_imm(h); + delete this; + return 0; + } +}; + +TEST_CASE("cache write -> read", "cache") +{ + aioFaultInjection.inject_fault(".*/var/trafficserver2/cache.db", 0, {.err_no = EIO, .skip_io = false}); + init_cache(256 * 1024 * 1024); + // large write test + CacheCommInit *init = new CacheCommInit; + + this_ethread()->schedule_imm(init); + this_thread()->execute(); +} diff --git a/iocore/cache/test/test_RWW.cc b/iocore/cache/test/test_RWW.cc index 7f7f826a54f..9e6605e5139 100644 --- a/iocore/cache/test/test_RWW.cc +++ b/iocore/cache/test/test_RWW.cc @@ -28,6 +28,8 @@ #include "main.h" +bool reuse_existing_cache = false; + namespace { diff --git a/iocore/cache/test/test_Update_L_to_S.cc b/iocore/cache/test/test_Update_L_to_S.cc index 9b4ea8e75fb..0f31c36d330 100644 --- a/iocore/cache/test/test_Update_L_to_S.cc +++ b/iocore/cache/test/test_Update_L_to_S.cc @@ -26,6 +26,8 @@ #include "main.h" +bool reuse_existing_cache = false; + class CacheUpdateReadAgain : public CacheTestHandler { public: diff --git a/iocore/cache/test/test_Update_S_to_L.cc b/iocore/cache/test/test_Update_S_to_L.cc index 5b0f6d16faa..0514bdf8b1f 100644 --- a/iocore/cache/test/test_Update_S_to_L.cc +++ b/iocore/cache/test/test_Update_S_to_L.cc @@ -26,6 +26,8 @@ #include "main.h" +bool reuse_existing_cache = false; + class CacheUpdateReadAgain : public CacheTestHandler { public: diff --git a/iocore/cache/test/test_Update_header.cc b/iocore/cache/test/test_Update_header.cc index 4a9c1183a88..ac57478c339 100644 --- a/iocore/cache/test/test_Update_header.cc +++ b/iocore/cache/test/test_Update_header.cc @@ -26,6 +26,8 @@ #include "main.h" +bool reuse_existing_cache = false; + class CacheUpdateReadAgain : public CacheTestHandler { public: diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 6ba08015e3a..5b7f099a242 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -87,6 +87,12 @@ macro(add_stubbed_test name) endmacro() add_cache_test(Cache ${CMAKE_SOURCE_DIR}/iocore/cache/test/test_Cache.cc) +add_cache_test(Populated_Cache ${CMAKE_SOURCE_DIR}/iocore/cache/test/test_Populated_Cache.cc) +if(ENABLE_DISK_FAILURE_TESTS) +add_cache_test(Disk_Failure ${CMAKE_SOURCE_DIR}/iocore/cache/test/test_Disk_Failure.cc) +add_cache_test(Disk_Failure_2 ${CMAKE_SOURCE_DIR}/iocore/cache/test/test_Disk_Failure_2.cc) +add_cache_test(Populated_Cache_Disk_Failure ${CMAKE_SOURCE_DIR}/iocore/cache/test/test_Populated_Cache_Disk_Failure.cc) +endif() add_cache_test(RWW ${CMAKE_SOURCE_DIR}/iocore/cache/test/test_RWW.cc) add_cache_test(Alternate_L_to_S ${CMAKE_SOURCE_DIR}/iocore/cache/test/test_Alternate_L_to_S.cc) add_cache_test(Alternate_S_to_L ${CMAKE_SOURCE_DIR}/iocore/cache/test/test_Alternate_S_to_L.cc) From 7ddb5e2119c7e2c8e8c88cf80c9520817d048397 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sun, 20 Aug 2023 19:47:21 -0500 Subject: [PATCH 2/8] Refine the tests * parametrically generate single failure tests * expect disks being marked bad --- iocore/aio/AIO.cc | 6 ++++++ iocore/aio/AIO_fault_injection.h | 2 +- iocore/cache/test/CacheTestHandler.cc | 9 ++++++++- iocore/cache/test/CacheTestHandler.h | 7 ++++--- iocore/cache/test/main.cc | 10 +++++++--- iocore/cache/test/main.h | 1 + iocore/cache/test/test_Alternate_L_to_S.cc | 1 + .../cache/test/test_Alternate_L_to_S_remove_L.cc | 1 + .../cache/test/test_Alternate_L_to_S_remove_S.cc | 1 + iocore/cache/test/test_Alternate_S_to_L.cc | 1 + .../cache/test/test_Alternate_S_to_L_remove_L.cc | 1 + .../cache/test/test_Alternate_S_to_L_remove_S.cc | 1 + iocore/cache/test/test_Cache.cc | 1 + iocore/cache/test/test_Disk_Failure.cc | 16 +++++++++++----- ...sk_Failure_2.cc => test_Disk_Init_Failure.cc} | 11 ++++++++--- iocore/cache/test/test_Populated_Cache.cc | 1 + .../test/test_Populated_Cache_Disk_Failure.cc | 1 + iocore/cache/test/test_RWW.cc | 1 + iocore/cache/test/test_Update_L_to_S.cc | 1 + iocore/cache/test/test_Update_S_to_L.cc | 1 + iocore/cache/test/test_Update_header.cc | 1 + src/tests/CMakeLists.txt | 10 ++++++++-- 22 files changed, 67 insertions(+), 18 deletions(-) rename iocore/cache/test/{test_Disk_Failure_2.cc => test_Disk_Init_Failure.cc} (84%) diff --git a/iocore/aio/AIO.cc b/iocore/aio/AIO.cc index d8204387a77..dbd0ed642a8 100644 --- a/iocore/aio/AIO.cc +++ b/iocore/aio/AIO.cc @@ -444,9 +444,12 @@ cache_op(AIOCallbackInternal *op) ssize_t err, res = 0; while (a->aio_nbytes - res > 0) { + static int debug_op_index = 0; do { if (read) { #ifdef AIO_FAULT_INJECTION + Debug("cache.op", "op %d: pread(%d, %p, %zu, %zu)", debug_op_index, a->aio_fildes, + (static_cast(a->aio_buf)) + res, a->aio_nbytes - res, a->aio_offset + res); err = aioFaultInjection.pread(a->aio_fildes, (static_cast(a->aio_buf)) + res, a->aio_nbytes - res, a->aio_offset + res); #else @@ -456,6 +459,8 @@ cache_op(AIOCallbackInternal *op) #ifdef AIO_FAULT_INJECTION err = aioFaultInjection.pwrite(a->aio_fildes, (static_cast(a->aio_buf)) + res, a->aio_nbytes - res, a->aio_offset + res); + Debug("cache.op", "op %d: pwrite(%d, %p, %zu, %zu)", debug_op_index, a->aio_fildes, + (static_cast(a->aio_buf)) + res, a->aio_nbytes - res, a->aio_offset + res); #else err = pwrite(a->aio_fildes, (static_cast(a->aio_buf)) + res, a->aio_nbytes - res, a->aio_offset + res); #endif @@ -466,6 +471,7 @@ cache_op(AIOCallbackInternal *op) op->aio_result = -errno; return (err); } + debug_op_index++; res += err; } op->aio_result = res; diff --git a/iocore/aio/AIO_fault_injection.h b/iocore/aio/AIO_fault_injection.h index 735d560a4af..9357a172fbb 100644 --- a/iocore/aio/AIO_fault_injection.h +++ b/iocore/aio/AIO_fault_injection.h @@ -38,7 +38,7 @@ // We need a way to simulate failures determininstically to test disk // initialization -constexpr auto TAG{"fault"}; +static constexpr auto TAG{"fault"}; class AIOFaultInjection { diff --git a/iocore/cache/test/CacheTestHandler.cc b/iocore/cache/test/CacheTestHandler.cc index 72ceceaf934..9145b95a744 100644 --- a/iocore/cache/test/CacheTestHandler.cc +++ b/iocore/cache/test/CacheTestHandler.cc @@ -24,7 +24,7 @@ TestContChain::TestContChain() : Continuation(new_ProxyMutex()) {} -CacheTestHandler::CacheTestHandler(size_t size, const char *url) +CacheTestHandler::CacheTestHandler(size_t size, const char *url, bool expect_fail) : _expect_open_read_failed(expect_fail) { this->_wt = new CacheWriteTest(size, this, url); this->_rt = new CacheReadTest(size, this, url); @@ -59,6 +59,13 @@ CacheTestHandler::handle_cache_event(int event, CacheTestBase *base) base->close(); delete this; break; + case CACHE_EVENT_OPEN_READ_FAILED: + if (_expect_open_read_failed) { + base->close(); + delete this; + break; + } + [[fallthrough]]; default: REQUIRE(false); base->close(); diff --git a/iocore/cache/test/CacheTestHandler.h b/iocore/cache/test/CacheTestHandler.h index ea8724a8e1b..c60edfc4000 100644 --- a/iocore/cache/test/CacheTestHandler.h +++ b/iocore/cache/test/CacheTestHandler.h @@ -67,15 +67,16 @@ class CacheTestHandler : public TestContChain { public: CacheTestHandler() = default; - CacheTestHandler(size_t size, const char *url = DEFAULT_URL); + CacheTestHandler(size_t size, const char *url = DEFAULT_URL, bool expect_fail = false); int start_test(int event, void *e); virtual void handle_cache_event(int event, CacheTestBase *base); protected: - CacheTestBase *_rt = nullptr; - CacheTestBase *_wt = nullptr; + CacheTestBase *_rt = nullptr; + CacheTestBase *_wt = nullptr; + bool _expect_open_read_failed = false; }; class TerminalTest : public CacheTestHandler diff --git a/iocore/cache/test/main.cc b/iocore/cache/test/main.cc index 0494e75271d..58e1ccffac0 100644 --- a/iocore/cache/test/main.cc +++ b/iocore/cache/test/main.cc @@ -51,15 +51,18 @@ temp_prefix() tmpdir = "/tmp"; } snprintf(buffer, sizeof(buffer), "%s/cachetest.XXXXXX", tmpdir); + ink_assert(cache_vols == 1 || cache_vols == 2); auto prefix = swoc::file::path(mkdtemp(buffer)); bool result = swoc::file::create_directories(prefix / "var" / "trafficserver", err, 0755); if (!result) { Dbg(dbg_ctl_cache_test, "Failed to create directories for test: %s(%s)", prefix.c_str(), err.message().c_str()); } ink_assert(result); - result = swoc::file::create_directories(prefix / "var" / "trafficserver2", err, 0755); - if (!result) { - Dbg(dbg_ctl_cache_test, "Failed to create directories for test: %s(%s)", prefix.c_str(), err.message().c_str()); + if (cache_vols == 2) { + result = swoc::file::create_directories(prefix / "var" / "trafficserver2", err, 0755); + if (!result) { + Dbg(dbg_ctl_cache_test, "Failed to create directories for test: %s(%s)", prefix.c_str(), err.message().c_str()); + } } ink_assert(result); @@ -72,6 +75,7 @@ populate_cache(const swoc::file::path &prefix) { swoc::file::path src_path{TS_ABS_TOP_SRCDIR}; std::error_code ec; + ink_assert(cache_vols == 2); swoc::file::copy(src_path / "iocore/cache/test/var/trafficserver/cache.db", prefix / "var/trafficserver/", ec); swoc::file::copy(src_path / "iocore/cache/test/var/trafficserver2/cache.db", prefix / "var/trafficserver2/", ec); } diff --git a/iocore/cache/test/main.h b/iocore/cache/test/main.h index 547607f6023..1478ccd1418 100644 --- a/iocore/cache/test/main.h +++ b/iocore/cache/test/main.h @@ -235,4 +235,5 @@ class CacheReadTest : public CacheTestBase }; // Does the test use stored cache files, or initialize new files? +extern int cache_vols; extern bool reuse_existing_cache; diff --git a/iocore/cache/test/test_Alternate_L_to_S.cc b/iocore/cache/test/test_Alternate_L_to_S.cc index 1604b664321..c8f8e1554a3 100644 --- a/iocore/cache/test/test_Alternate_L_to_S.cc +++ b/iocore/cache/test/test_Alternate_L_to_S.cc @@ -24,6 +24,7 @@ #define LARGE_FILE 10 * 1024 * 1024 #define SMALL_FILE 10 * 1024 +int cache_vols = 1; bool reuse_existing_cache = false; #include "main.h" diff --git a/iocore/cache/test/test_Alternate_L_to_S_remove_L.cc b/iocore/cache/test/test_Alternate_L_to_S_remove_L.cc index 0245db28f32..c0195805b42 100644 --- a/iocore/cache/test/test_Alternate_L_to_S_remove_L.cc +++ b/iocore/cache/test/test_Alternate_L_to_S_remove_L.cc @@ -26,6 +26,7 @@ #include "main.h" +int cache_vols = 1; bool reuse_existing_cache = false; // delete dir diff --git a/iocore/cache/test/test_Alternate_L_to_S_remove_S.cc b/iocore/cache/test/test_Alternate_L_to_S_remove_S.cc index 1f14f058958..938eda8b8d4 100644 --- a/iocore/cache/test/test_Alternate_L_to_S_remove_S.cc +++ b/iocore/cache/test/test_Alternate_L_to_S_remove_S.cc @@ -26,6 +26,7 @@ #include "main.h" +int cache_vols = 1; bool reuse_existing_cache = false; // delete dir diff --git a/iocore/cache/test/test_Alternate_S_to_L.cc b/iocore/cache/test/test_Alternate_S_to_L.cc index f8fcf573e45..6195eedce27 100644 --- a/iocore/cache/test/test_Alternate_S_to_L.cc +++ b/iocore/cache/test/test_Alternate_S_to_L.cc @@ -26,6 +26,7 @@ #include "main.h" +int cache_vols = 1; bool reuse_existing_cache = false; class CacheAltReadAgain : public CacheTestHandler diff --git a/iocore/cache/test/test_Alternate_S_to_L_remove_L.cc b/iocore/cache/test/test_Alternate_S_to_L_remove_L.cc index 61ee475fd64..57d71c732c8 100644 --- a/iocore/cache/test/test_Alternate_S_to_L_remove_L.cc +++ b/iocore/cache/test/test_Alternate_S_to_L_remove_L.cc @@ -26,6 +26,7 @@ #include "main.h" +int cache_vols = 1; bool reuse_existing_cache = false; // delete dir diff --git a/iocore/cache/test/test_Alternate_S_to_L_remove_S.cc b/iocore/cache/test/test_Alternate_S_to_L_remove_S.cc index bc4bddea242..151ea9bcb92 100644 --- a/iocore/cache/test/test_Alternate_S_to_L_remove_S.cc +++ b/iocore/cache/test/test_Alternate_S_to_L_remove_S.cc @@ -26,6 +26,7 @@ #include "main.h" +int cache_vols = 1; bool reuse_existing_cache = false; // delete dir diff --git a/iocore/cache/test/test_Cache.cc b/iocore/cache/test/test_Cache.cc index aa30cef345c..364eaf9995a 100644 --- a/iocore/cache/test/test_Cache.cc +++ b/iocore/cache/test/test_Cache.cc @@ -26,6 +26,7 @@ #define LARGE_FILE 10 * 1024 * 1024 #define SMALL_FILE 10 * 1024 +int cache_vols = 1; bool reuse_existing_cache = false; class CacheCommInit : public CacheInit diff --git a/iocore/cache/test/test_Disk_Failure.cc b/iocore/cache/test/test_Disk_Failure.cc index 20a41315ffd..94a2f92d63c 100644 --- a/iocore/cache/test/test_Disk_Failure.cc +++ b/iocore/cache/test/test_Disk_Failure.cc @@ -31,6 +31,7 @@ #endif #include "AIO_fault_injection.h" +int cache_vols = 1; bool reuse_existing_cache = false; extern int gndisks; @@ -44,9 +45,10 @@ class CacheCommInit : public CacheInit // We initialize two disks and inject failure in one. Ensure that one disk // remains. REQUIRE(gndisks == 1); - CacheTestHandler *h = new CacheTestHandler(LARGE_FILE); - CacheTestHandler *h2 = new CacheTestHandler(SMALL_FILE, "http://www.scw11.com"); - TerminalTest *tt = new TerminalTest; + CacheTestHandler *h = new CacheTestHandler(LARGE_FILE, DEFAULT_URL, true); + CacheTestHandler *h2 = new CacheTestHandler(SMALL_FILE, "http://www.scw11.com", true); + ; + TerminalTest *tt = new TerminalTest; h->add(h2); h->add(tt); this_ethread()->schedule_imm(h); @@ -55,10 +57,14 @@ class CacheCommInit : public CacheInit } }; -TEST_CASE("Disk fail on first operation", "cache") +TEST_CASE("Disk fail after initialization", "cache") { - aioFaultInjection.inject_fault(".*/var/trafficserver2/cache.db", 0, {.err_no = EIO, .skip_io = false}); + std::vector indices = FAILURE_INDICES; + for (const int i : indices) { + aioFaultInjection.inject_fault(".*/var/trafficserver/cache.db", i, {.err_no = EIO, .skip_io = true}); + } init_cache(256 * 1024 * 1024); + cache_config_max_disk_errors = 1; // large write test CacheCommInit *init = new CacheCommInit; diff --git a/iocore/cache/test/test_Disk_Failure_2.cc b/iocore/cache/test/test_Disk_Init_Failure.cc similarity index 84% rename from iocore/cache/test/test_Disk_Failure_2.cc rename to iocore/cache/test/test_Disk_Init_Failure.cc index 1d7a4b55768..f10b73f6175 100644 --- a/iocore/cache/test/test_Disk_Failure_2.cc +++ b/iocore/cache/test/test_Disk_Init_Failure.cc @@ -31,6 +31,7 @@ #endif #include "AIO_fault_injection.h" +int cache_vols = 2; bool reuse_existing_cache = false; extern int gndisks; @@ -42,8 +43,9 @@ class CacheCommInit : public CacheInit cache_init_success_callback(int event, void *e) override { // We initialize two disks and inject failure in one. Ensure that one disk - // remains. + // remains if the fault is during initialization. REQUIRE(gndisks == 1); + CacheTestHandler *h = new CacheTestHandler(LARGE_FILE); CacheTestHandler *h2 = new CacheTestHandler(SMALL_FILE, "http://www.scw11.com"); TerminalTest *tt = new TerminalTest; @@ -55,9 +57,12 @@ class CacheCommInit : public CacheInit } }; -TEST_CASE("Disk fail on second operation", "cache") +TEST_CASE("Cache disk initialization fail", "cache") { - aioFaultInjection.inject_fault(".*/var/trafficserver2/cache.db", 1, {.err_no = EIO, .skip_io = false}); + std::vector indices = FAILURE_INDICES; + for (const int i : indices) { + aioFaultInjection.inject_fault(".*/var/trafficserver2/cache.db", i, {.err_no = EIO, .skip_io = true}); + } init_cache(256 * 1024 * 1024); // large write test CacheCommInit *init = new CacheCommInit; diff --git a/iocore/cache/test/test_Populated_Cache.cc b/iocore/cache/test/test_Populated_Cache.cc index 567f095571d..9d463028aa0 100644 --- a/iocore/cache/test/test_Populated_Cache.cc +++ b/iocore/cache/test/test_Populated_Cache.cc @@ -26,6 +26,7 @@ #define LARGE_FILE 10 * 1024 * 1024 #define SMALL_FILE 10 * 1024 +int cache_vols = 1; bool reuse_existing_cache = true; class CacheCommInit : public CacheInit diff --git a/iocore/cache/test/test_Populated_Cache_Disk_Failure.cc b/iocore/cache/test/test_Populated_Cache_Disk_Failure.cc index 897d508a084..3cea4c92b87 100644 --- a/iocore/cache/test/test_Populated_Cache_Disk_Failure.cc +++ b/iocore/cache/test/test_Populated_Cache_Disk_Failure.cc @@ -26,6 +26,7 @@ #define LARGE_FILE 10 * 1024 * 1024 #define SMALL_FILE 10 * 1024 +int cache_vols = 2; bool reuse_existing_cache = true; #ifndef AIO_FAULT_INJECTION #error Must define AIO_FAULT_INJECTION! diff --git a/iocore/cache/test/test_RWW.cc b/iocore/cache/test/test_RWW.cc index 9e6605e5139..2b6b3fe48a8 100644 --- a/iocore/cache/test/test_RWW.cc +++ b/iocore/cache/test/test_RWW.cc @@ -28,6 +28,7 @@ #include "main.h" +int cache_vols = 1; bool reuse_existing_cache = false; namespace diff --git a/iocore/cache/test/test_Update_L_to_S.cc b/iocore/cache/test/test_Update_L_to_S.cc index 0f31c36d330..a549ccac893 100644 --- a/iocore/cache/test/test_Update_L_to_S.cc +++ b/iocore/cache/test/test_Update_L_to_S.cc @@ -26,6 +26,7 @@ #include "main.h" +int cache_vols = 1; bool reuse_existing_cache = false; class CacheUpdateReadAgain : public CacheTestHandler diff --git a/iocore/cache/test/test_Update_S_to_L.cc b/iocore/cache/test/test_Update_S_to_L.cc index 0514bdf8b1f..906d93fa2ef 100644 --- a/iocore/cache/test/test_Update_S_to_L.cc +++ b/iocore/cache/test/test_Update_S_to_L.cc @@ -26,6 +26,7 @@ #include "main.h" +int cache_vols = 1; bool reuse_existing_cache = false; class CacheUpdateReadAgain : public CacheTestHandler diff --git a/iocore/cache/test/test_Update_header.cc b/iocore/cache/test/test_Update_header.cc index ac57478c339..ce6b7995cd7 100644 --- a/iocore/cache/test/test_Update_header.cc +++ b/iocore/cache/test/test_Update_header.cc @@ -26,6 +26,7 @@ #include "main.h" +int cache_vols = 1; bool reuse_existing_cache = false; class CacheUpdateReadAgain : public CacheTestHandler diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 5b7f099a242..d740f1a2c11 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -89,8 +89,14 @@ endmacro() add_cache_test(Cache ${CMAKE_SOURCE_DIR}/iocore/cache/test/test_Cache.cc) add_cache_test(Populated_Cache ${CMAKE_SOURCE_DIR}/iocore/cache/test/test_Populated_Cache.cc) if(ENABLE_DISK_FAILURE_TESTS) -add_cache_test(Disk_Failure ${CMAKE_SOURCE_DIR}/iocore/cache/test/test_Disk_Failure.cc) -add_cache_test(Disk_Failure_2 ${CMAKE_SOURCE_DIR}/iocore/cache/test/test_Disk_Failure_2.cc) +foreach(i RANGE 1) + add_cache_test(Disk_Init_Failure_${i} ${CMAKE_SOURCE_DIR}/iocore/cache/test/test_Disk_Init_Failure.cc) + target_compile_definitions(Disk_Init_Failure_${i} PUBLIC FAILURE_INDICES={${i}}) +endforeach() +foreach(i RANGE 2 20) + add_cache_test(Disk_Failure_${i} ${CMAKE_SOURCE_DIR}/iocore/cache/test/test_Disk_Failure.cc) + target_compile_definitions(Disk_Failure_${i} PUBLIC FAILURE_INDICES={${i}}) +endforeach() add_cache_test(Populated_Cache_Disk_Failure ${CMAKE_SOURCE_DIR}/iocore/cache/test/test_Populated_Cache_Disk_Failure.cc) endif() add_cache_test(RWW ${CMAKE_SOURCE_DIR}/iocore/cache/test/test_RWW.cc) From c2523f4bd956a44d70340ea875c41f1cc1814855 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 28 Jul 2023 13:29:16 -0500 Subject: [PATCH 3/8] Make bad disk detection more robust -Report disk I/O failure synchronously. -Fix a signed/unsigned bug when checking disk I/O result. -Rename a couple of symbols for clarity. --- iocore/aio/AIO.cc | 10 +++++----- iocore/aio/I_AIO.h | 2 +- iocore/aio/P_AIO.h | 20 ++++++++++++++------ iocore/cache/Cache.cc | 26 +++++++++++++++++++------- iocore/cache/CacheDir.cc | 2 +- iocore/cache/CacheDisk.cc | 6 +++--- iocore/cache/CacheVol.cc | 2 +- iocore/cache/P_CacheVol.h | 4 ++-- 8 files changed, 46 insertions(+), 26 deletions(-) diff --git a/iocore/aio/AIO.cc b/iocore/aio/AIO.cc index dbd0ed642a8..e15f45b0649 100644 --- a/iocore/aio/AIO.cc +++ b/iocore/aio/AIO.cc @@ -62,8 +62,8 @@ int thread_is_created = 0; RecInt cache_config_threads_per_disk = 12; RecInt api_config_threads_per_disk = 12; -RecRawStatBlock *aio_rsb = nullptr; -Continuation *aio_err_callbck = nullptr; +RecRawStatBlock *aio_rsb = nullptr; +Continuation *aio_err_callback = nullptr; // AIO Stats std::atomic aio_num_read = 0; std::atomic aio_bytes_read = 0; @@ -157,9 +157,9 @@ new_AIOCallback() } void -ink_aio_set_callback(Continuation *callback) +ink_aio_set_err_callback(Continuation *callback) { - aio_err_callbck = callback; + aio_err_callback = callback; } void @@ -475,7 +475,7 @@ cache_op(AIOCallbackInternal *op) res += err; } op->aio_result = res; - ink_assert(op->aio_result == (int64_t)a->aio_nbytes); + ink_assert(op->ok()); } return 1; } diff --git a/iocore/aio/I_AIO.h b/iocore/aio/I_AIO.h index 47dc35c4449..0782640d6a3 100644 --- a/iocore/aio/I_AIO.h +++ b/iocore/aio/I_AIO.h @@ -77,7 +77,7 @@ struct AIOCallback : public Continuation { }; void ink_aio_init(ts::ModuleVersion version, AIOBackend backend = AIO_BACKEND_AUTO); -void ink_aio_set_callback(Continuation *error_callback); +void ink_aio_set_err_callback(Continuation *error_callback); int ink_aio_read(AIOCallback *op, int fromAPI = 0); // fromAPI is a boolean to indicate if this is from an API call such as upload proxy feature diff --git a/iocore/aio/P_AIO.h b/iocore/aio/P_AIO.h index a6ff603b7fa..0b3c12c397f 100644 --- a/iocore/aio/P_AIO.h +++ b/iocore/aio/P_AIO.h @@ -45,10 +45,10 @@ static constexpr ts::ModuleVersion AIO_MODULE_INTERNAL_VERSION{AIO_MODULE_PUBLIC TS_INLINE int AIOCallback::ok() { - return (off_t)aiocb.aio_nbytes == (off_t)aio_result; + return (aiocb.aio_nbytes == static_cast(aio_result)) && (aio_result >= 0); } -extern Continuation *aio_err_callbck; +extern Continuation *aio_err_callback; struct AIO_Reqs; @@ -92,13 +92,21 @@ AIOCallbackInternal::io_complete(int event, void *data) { (void)event; (void)data; - if (aio_err_callbck && !ok()) { + // DEBUG + if (!ok() && !aio_err_callback) { + Warning("NO ERROR HANDLER!"); + } + // END DEBUG + if (aio_err_callback && !ok()) { AIOCallback *err_op = new AIOCallbackInternal(); err_op->aiocb.aio_fildes = this->aiocb.aio_fildes; err_op->aiocb.aio_lio_opcode = this->aiocb.aio_lio_opcode; - err_op->mutex = aio_err_callbck->mutex; - err_op->action = aio_err_callbck; - eventProcessor.schedule_imm(err_op); + err_op->mutex = aio_err_callback->mutex; + err_op->action = aio_err_callback; + + // Take this lock in-line because we want to stop other I/O operations on this disk ASAP + SCOPED_MUTEX_LOCK(lock, aio_err_callback->mutex, this_ethread()); + err_op->action.continuation->handleEvent(EVENT_NONE, err_op); } if (!action.cancelled && action.continuation) { action.continuation->handleEvent(AIO_EVENT_DONE, this); diff --git a/iocore/cache/Cache.cc b/iocore/cache/Cache.cc index 1a013dbb0fd..bb429d2b43b 100644 --- a/iocore/cache/Cache.cc +++ b/iocore/cache/Cache.cc @@ -573,7 +573,7 @@ CacheProcessor::start_internal(int flags) memset(sds, 0, sizeof(Span *) * gndisks); gndisks = 0; - ink_aio_set_callback(new AIO_Callback_handler()); + ink_aio_set_err_callback(new AIO_failure_handler()); config_volumes.read_config_file(); @@ -1275,7 +1275,7 @@ Vol::handle_dir_clear(int event, void *data) if (event == AIO_EVENT_DONE) { op = static_cast(data); - if (static_cast(op->aio_result) != op->aiocb.aio_nbytes) { + if (!op->ok()) { Warning("unable to clear cache directory '%s'", hash_text.get()); disk->incrErrors(op); fd = -1; @@ -1304,7 +1304,7 @@ Vol::handle_dir_read(int event, void *data) AIOCallback *op = static_cast(data); if (event == AIO_EVENT_DONE) { - if (static_cast(op->aio_result) != op->aiocb.aio_nbytes) { + if (!op->ok()) { Note("Directory read failed: clearing cache directory %s", this->hash_text.get()); clear_dir(); return EVENT_DONE; @@ -1399,7 +1399,7 @@ Vol::handle_recover_from_data(int event, void * /* data ATS_UNUSED */) io.aiocb.aio_nbytes = (skip + len) - recover_pos; } } else if (event == AIO_EVENT_DONE) { - if (io.aiocb.aio_nbytes != static_cast(io.aio_result)) { + if (!io.ok()) { Warning("disk read error on recover '%s', clearing", hash_text.get()); disk->incrErrors(&io); goto Lclear; @@ -1659,7 +1659,7 @@ Vol::handle_header_read(int event, void *data) for (auto &i : hf) { ink_assert(op != nullptr); i = static_cast(op->aiocb.aio_buf); - if (static_cast(op->aio_result) != op->aiocb.aio_nbytes) { + if (!op->ok()) { Note("Header read failed: clearing cache directory %s", this->hash_text.get()); clear_dir(); return EVENT_DONE; @@ -1941,7 +1941,7 @@ CacheProcessor::has_online_storage() const } int -AIO_Callback_handler::handle_disk_failure(int /* event ATS_UNUSED */, void *data) +AIO_failure_handler::handle_disk_failure(int /* event ATS_UNUSED */, void *data) { /* search for the matching file descriptor */ if (!CacheProcessor::cache_ready) { @@ -1950,6 +1950,10 @@ AIO_Callback_handler::handle_disk_failure(int /* event ATS_UNUSED */, void *data int disk_no = 0; AIOCallback *cb = static_cast(data); + // DEBUG + ink_release_assert(cb != nullptr); + bool found = false; + // END DEBUG for (; disk_no < gndisks; disk_no++) { CacheDisk *d = gdisks[disk_no]; @@ -1966,9 +1970,17 @@ AIO_Callback_handler::handle_disk_failure(int /* event ATS_UNUSED */, void *data Warning("%s", message); cacheProcessor.mark_storage_offline(d); // take it out of service } + // DEBUG + found = true; + // END DEBUG break; } } + // DEBUG + if (!found) { + Warning("Didn't find failing disk!"); + } + // END DEBUG delete cb; return EVENT_DONE; @@ -2363,7 +2375,7 @@ CacheVC::removeEvent(int /* event ATS_UNUSED */, Event * /* e ATS_UNUSED */) goto Lcollision; } // check read completed correct FIXME: remove bad vols - if (static_cast(io.aio_result) != io.aiocb.aio_nbytes) { + if (!io.ok()) { goto Ldone; } { diff --git a/iocore/cache/CacheDir.cc b/iocore/cache/CacheDir.cc index b4bf725e4e8..adfe4467fd2 100644 --- a/iocore/cache/CacheDir.cc +++ b/iocore/cache/CacheDir.cc @@ -1103,7 +1103,7 @@ CacheSync::mainEvent(int event, Event *e) if (event == AIO_EVENT_DONE) { // AIO Thread - if (io.aio_result != static_cast(io.aiocb.aio_nbytes)) { + if (!io.ok()) { Warning("vol write error during directory sync '%s'", gvol[vol_idx]->hash_text.get()); event = EVENT_NONE; goto Ldone; diff --git a/iocore/cache/CacheDisk.cc b/iocore/cache/CacheDisk.cc index 35743acd0d9..4b7b66d1e97 100644 --- a/iocore/cache/CacheDisk.cc +++ b/iocore/cache/CacheDisk.cc @@ -147,7 +147,7 @@ CacheDisk::clearDone(int event, void * /* data ATS_UNUSED */) { ink_assert(event == AIO_EVENT_DONE); - if (io.aiocb.aio_nbytes != static_cast(io.aio_result)) { + if (!io.ok()) { Warning("Could not clear disk header for disk %s: declaring disk bad", path); incrErrors(&io); SET_DISK_BAD(this); @@ -163,7 +163,7 @@ CacheDisk::openStart(int event, void * /* data ATS_UNUSED */) { ink_assert(event == AIO_EVENT_DONE); - if (io.aiocb.aio_nbytes != static_cast(io.aio_result)) { + if (!io.ok()) { Warning("could not read disk header for disk %s: declaring disk bad", path); // the header could have random values by the AIO read error @@ -237,7 +237,7 @@ CacheDisk::syncDone(int event, void * /* data ATS_UNUSED */) { ink_assert(event == AIO_EVENT_DONE); - if (io.aiocb.aio_nbytes != static_cast(io.aio_result)) { + if (!io.ok()) { Warning("Error writing disk header for disk %s:disk bad", path); incrErrors(&io); SET_DISK_BAD(this); diff --git a/iocore/cache/CacheVol.cc b/iocore/cache/CacheVol.cc index 989c06a95f4..1613632868c 100644 --- a/iocore/cache/CacheVol.cc +++ b/iocore/cache/CacheVol.cc @@ -210,7 +210,7 @@ CacheVC::scanObject(int /* event ATS_UNUSED */, Event * /* e ATS_UNUSED */) goto Lread; } - if (static_cast(io.aio_result) != io.aiocb.aio_nbytes) { + if (!io.ok()) { result = (void *)-ECACHE_READ_FAIL; goto Ldone; } diff --git a/iocore/cache/P_CacheVol.h b/iocore/cache/P_CacheVol.h index 2800e899369..9c9e99592a6 100644 --- a/iocore/cache/P_CacheVol.h +++ b/iocore/cache/P_CacheVol.h @@ -256,10 +256,10 @@ struct Vol : public Continuation { ~Vol() override { ats_free(agg_buffer); } }; -struct AIO_Callback_handler : public Continuation { +struct AIO_failure_handler : public Continuation { int handle_disk_failure(int event, void *data); - AIO_Callback_handler() : Continuation(new_ProxyMutex()) { SET_HANDLER(&AIO_Callback_handler::handle_disk_failure); } + AIO_failure_handler() : Continuation(new_ProxyMutex()) { SET_HANDLER(&AIO_failure_handler::handle_disk_failure); } }; struct CacheVol { From 4dcde92abb7a4b06708e85654d2689c732759679 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Wed, 23 Aug 2023 13:27:54 -0500 Subject: [PATCH 4/8] Generate tests --- iocore/aio/Makefile.am | 4 +- iocore/cache/I_CacheDefs.h | 1 + iocore/cache/Makefile.am | 162 ++++++++++++++++++++-- iocore/cache/test/CacheTestHandler.cc | 7 +- iocore/cache/test/CacheTestHandler.h | 6 +- iocore/cache/test/test_Populated_Cache.cc | 2 +- src/tests/CMakeLists.txt | 2 +- 7 files changed, 163 insertions(+), 21 deletions(-) diff --git a/iocore/aio/Makefile.am b/iocore/aio/Makefile.am index f816f97298b..c0babc8da23 100644 --- a/iocore/aio/Makefile.am +++ b/iocore/aio/Makefile.am @@ -33,7 +33,9 @@ libinkaio_a_SOURCES = \ AIO.cc \ I_AIO.h \ Inline.cc \ - P_AIO.h + P_AIO.h \ + AIO_fault_injection.h \ + AIO_fault_injection.cc test_AIO_LDFLAGS = \ @AM_LDFLAGS@ \ diff --git a/iocore/cache/I_CacheDefs.h b/iocore/cache/I_CacheDefs.h index 15645070f51..d1fff896265 100644 --- a/iocore/cache/I_CacheDefs.h +++ b/iocore/cache/I_CacheDefs.h @@ -24,6 +24,7 @@ #pragma once #include "I_Event.h" +#include "I_VConnection.h" #include "tscore/I_Version.h" #include "tscore/CryptoHash.h" diff --git a/iocore/cache/Makefile.am b/iocore/cache/Makefile.am index 7bccc002533..63daee25d48 100644 --- a/iocore/cache/Makefile.am +++ b/iocore/cache/Makefile.am @@ -132,27 +132,156 @@ check_PROGRAMS = \ test_Alternate_S_to_L_remove_L \ test_Update_L_to_S \ test_Update_S_to_L \ - test_Update_header + test_Update_header \ + test_Populated_Cache if ENABLE_DISK_FAILURE_TESTS check_PROGRAMS += \ - test_Disk_Failure \ - test_Disk_Failure_2 \ - test_Populated_Cache_Disk_Failure + test_Disk_Init_Failure_0 \ + test_Disk_Init_Failure_1 \ + test_Disk_Failure_5 \ + test_Disk_Failure_6 \ + test_Disk_Failure_7 \ + test_Disk_Failure_8 \ + test_Disk_Failure_9 \ + test_Disk_Failure_10 \ + test_Disk_Failure_11 \ + test_Disk_Failure_12 \ + test_Disk_Failure_13 \ + test_Disk_Failure_14 \ + test_Disk_Failure_15 \ + test_Disk_Failure_16 \ + test_Disk_Failure_17 \ + test_Disk_Failure_18 \ + test_Disk_Failure_19 \ + test_Disk_Failure_20 \ + test_Populated_Cache_Disk_Failure -test_Disk_Failure_CPPFLAGS = $(test_CPPFLAGS) -test_Disk_Failure_LDFLAGS = @AM_LDFLAGS@ -test_Disk_Failure_LDADD = $(test_LDADD) -test_Disk_Failure_SOURCES = \ +test_Disk_Init_Failure_0_CPPFLAGS = $(test_CPPFLAGS) -DFAILURE_INDICES={0} +test_Disk_Init_Failure_0_LDFLAGS = AM_LDFLAGS@ +test_Disk_Init_Failure_0_LDADD = $(test_LDADD) +test_Disk_Init_Failure_0_SOURCES = \ + $(test_main_SOURCES) \ + ./test/test_Disk_Init_Failure.cc + +test_Disk_Init_Failure_1_CPPFLAGS = $(test_CPPFLAGS) -DFAILURE_INDICES={1} +test_Disk_Init_Failure_1_LDFLAGS = @AM_LDFLAGS@ +test_Disk_Init_Failure_1_LDADD = $(test_LDADD) +test_Disk_Init_Failure_1_SOURCES = \ + $(test_main_SOURCES) \ + ./test/test_Disk_Init_Failure.cc + +test_Disk_Failure_5_CPPFLAGS = $(test_CPPFLAGS) -DFAILURE_INDICES={5} +test_Disk_Failure_5_LDFLAGS = @AM_LDFLAGS@ +test_Disk_Failure_5_LDADD = $(test_LDADD) +test_Disk_Failure_5_SOURCES = \ $(test_main_SOURCES) \ ./test/test_Disk_Failure.cc -test_Disk_Failure_2_CPPFLAGS = $(test_CPPFLAGS) -test_Disk_Failure_2_LDFLAGS = @AM_LDFLAGS@ -test_Disk_Failure_2_LDADD = $(test_LDADD) -test_Disk_Failure_2_SOURCES = \ +test_Disk_Failure_6_CPPFLAGS = $(test_CPPFLAGS) -DFAILURE_INDICES={6} +test_Disk_Failure_6_LDFLAGS = @AM_LDFLAGS@ +test_Disk_Failure_6_LDADD = $(test_LDADD) +test_Disk_Failure_6_SOURCES = \ $(test_main_SOURCES) \ - ./test/test_Disk_Failure_2.cc + ./test/test_Disk_Failure.cc + +test_Disk_Failure_7_CPPFLAGS = $(test_CPPFLAGS) -DFAILURE_INDICES={7} +test_Disk_Failure_7_LDFLAGS = @AM_LDFLAGS@ +test_Disk_Failure_7_LDADD = $(test_LDADD) +test_Disk_Failure_7_SOURCES = \ + $(test_main_SOURCES) \ + ./test/test_Disk_Failure.cc + +test_Disk_Failure_8_CPPFLAGS = $(test_CPPFLAGS) -DFAILURE_INDICES={8} +test_Disk_Failure_8_LDFLAGS = @AM_LDFLAGS@ +test_Disk_Failure_8_LDADD = $(test_LDADD) +test_Disk_Failure_8_SOURCES = \ + $(test_main_SOURCES) \ + ./test/test_Disk_Failure.cc + +test_Disk_Failure_9_CPPFLAGS = $(test_CPPFLAGS) -DFAILURE_INDICES={9} +test_Disk_Failure_9_LDFLAGS = @AM_LDFLAGS@ +test_Disk_Failure_9_LDADD = $(test_LDADD) +test_Disk_Failure_9_SOURCES = \ + $(test_main_SOURCES) \ + ./test/test_Disk_Failure.cc + +test_Disk_Failure_10_CPPFLAGS = $(test_CPPFLAGS) -DFAILURE_INDICES={10} +test_Disk_Failure_10_LDFLAGS = @AM_LDFLAGS@ +test_Disk_Failure_10_LDADD = $(test_LDADD) +test_Disk_Failure_10_SOURCES = \ + $(test_main_SOURCES) \ + ./test/test_Disk_Failure.cc + +test_Disk_Failure_11_CPPFLAGS = $(test_CPPFLAGS) -DFAILURE_INDICES={11} +test_Disk_Failure_11_LDFLAGS = @AM_LDFLAGS@ +test_Disk_Failure_11_LDADD = $(test_LDADD) +test_Disk_Failure_11_SOURCES = \ + $(test_main_SOURCES) \ + ./test/test_Disk_Failure.cc + +test_Disk_Failure_12_CPPFLAGS = $(test_CPPFLAGS) -DFAILURE_INDICES={12} +test_Disk_Failure_12_LDFLAGS = @AM_LDFLAGS@ +test_Disk_Failure_12_LDADD = $(test_LDADD) +test_Disk_Failure_12_SOURCES = \ + $(test_main_SOURCES) \ + ./test/test_Disk_Failure.cc + +test_Disk_Failure_13_CPPFLAGS = $(test_CPPFLAGS) -DFAILURE_INDICES={13} +test_Disk_Failure_13_LDFLAGS = @AM_LDFLAGS@ +test_Disk_Failure_13_LDADD = $(test_LDADD) +test_Disk_Failure_13_SOURCES = \ + $(test_main_SOURCES) \ + ./test/test_Disk_Failure.cc + +test_Disk_Failure_14_CPPFLAGS = $(test_CPPFLAGS) -DFAILURE_INDICES={14} +test_Disk_Failure_14_LDFLAGS = @AM_LDFLAGS@ +test_Disk_Failure_14_LDADD = $(test_LDADD) +test_Disk_Failure_14_SOURCES = \ + $(test_main_SOURCES) \ + ./test/test_Disk_Failure.cc + +test_Disk_Failure_15_CPPFLAGS = $(test_CPPFLAGS) -DFAILURE_INDICES={15} +test_Disk_Failure_15_LDFLAGS = @AM_LDFLAGS@ +test_Disk_Failure_15_LDADD = $(test_LDADD) +test_Disk_Failure_15_SOURCES = \ + $(test_main_SOURCES) \ + ./test/test_Disk_Failure.cc + +test_Disk_Failure_16_CPPFLAGS = $(test_CPPFLAGS) -DFAILURE_INDICES={16} +test_Disk_Failure_16_LDFLAGS = @AM_LDFLAGS@ +test_Disk_Failure_16_LDADD = $(test_LDADD) +test_Disk_Failure_16_SOURCES = \ + $(test_main_SOURCES) \ + ./test/test_Disk_Failure.cc + +test_Disk_Failure_17_CPPFLAGS = $(test_CPPFLAGS) -DFAILURE_INDICES={17} +test_Disk_Failure_17_LDFLAGS = @AM_LDFLAGS@ +test_Disk_Failure_17_LDADD = $(test_LDADD) +test_Disk_Failure_17_SOURCES = \ + $(test_main_SOURCES) \ + ./test/test_Disk_Failure.cc + +test_Disk_Failure_18_CPPFLAGS = $(test_CPPFLAGS) -DFAILURE_INDICES={18} +test_Disk_Failure_18_LDFLAGS = @AM_LDFLAGS@ +test_Disk_Failure_18_LDADD = $(test_LDADD) +test_Disk_Failure_18_SOURCES = \ + $(test_main_SOURCES) \ + ./test/test_Disk_Failure.cc + +test_Disk_Failure_19_CPPFLAGS = $(test_CPPFLAGS) -DFAILURE_INDICES={19} +test_Disk_Failure_19_LDFLAGS = @AM_LDFLAGS@ +test_Disk_Failure_19_LDADD = $(test_LDADD) +test_Disk_Failure_19_SOURCES = \ + $(test_main_SOURCES) \ + ./test/test_Disk_Failure.cc + +test_Disk_Failure_20_CPPFLAGS = $(test_CPPFLAGS) -DFAILURE_INDICES={20} +test_Disk_Failure_20_LDFLAGS = @AM_LDFLAGS@ +test_Disk_Failure_20_LDADD = $(test_LDADD) +test_Disk_Failure_20_SOURCES = \ + $(test_main_SOURCES) \ + ./test/test_Disk_Failure.cc test_Populated_Cache_Disk_Failure_CPPFLAGS = $(test_CPPFLAGS) test_Populated_Cache_Disk_Failure_LDFLAGS = @AM_LDFLAGS@ @@ -244,6 +373,13 @@ test_Update_header_SOURCES = \ $(test_main_SOURCES) \ ./test/test_Update_header.cc +test_Populated_Cache_CPPFLAGS = $(test_CPPFLAGS) +test_Populated_Cache_LDFLAGS = @AM_LDFLAGS@ +test_Populated_Cache_LDADD = $(test_LDADD) +test_Populated_Cache_SOURCES = \ + $(test_main_SOURCES) \ + ./test/test_Populated_Cache.cc + include $(top_srcdir)/mk/tidy.mk clang-tidy-local: $(DIST_SOURCES) diff --git a/iocore/cache/test/CacheTestHandler.cc b/iocore/cache/test/CacheTestHandler.cc index 9145b95a744..fa602a3bb5f 100644 --- a/iocore/cache/test/CacheTestHandler.cc +++ b/iocore/cache/test/CacheTestHandler.cc @@ -19,12 +19,13 @@ limitations under the License. */ +#include "I_CacheDefs.h" #include "main.h" #include "CacheTestHandler.h" TestContChain::TestContChain() : Continuation(new_ProxyMutex()) {} -CacheTestHandler::CacheTestHandler(size_t size, const char *url, bool expect_fail) : _expect_open_read_failed(expect_fail) +CacheTestHandler::CacheTestHandler(size_t size, const char *url, bool expect_fail) : _expect_fail(expect_fail) { this->_wt = new CacheWriteTest(size, this, url); this->_rt = new CacheReadTest(size, this, url); @@ -60,7 +61,9 @@ CacheTestHandler::handle_cache_event(int event, CacheTestBase *base) delete this; break; case CACHE_EVENT_OPEN_READ_FAILED: - if (_expect_open_read_failed) { + case CACHE_EVENT_OPEN_WRITE_FAILED: + case VC_EVENT_ERROR: + if (_expect_fail) { base->close(); delete this; break; diff --git a/iocore/cache/test/CacheTestHandler.h b/iocore/cache/test/CacheTestHandler.h index c60edfc4000..c816cc123cd 100644 --- a/iocore/cache/test/CacheTestHandler.h +++ b/iocore/cache/test/CacheTestHandler.h @@ -74,9 +74,9 @@ class CacheTestHandler : public TestContChain virtual void handle_cache_event(int event, CacheTestBase *base); protected: - CacheTestBase *_rt = nullptr; - CacheTestBase *_wt = nullptr; - bool _expect_open_read_failed = false; + CacheTestBase *_rt = nullptr; + CacheTestBase *_wt = nullptr; + bool _expect_fail = false; }; class TerminalTest : public CacheTestHandler diff --git a/iocore/cache/test/test_Populated_Cache.cc b/iocore/cache/test/test_Populated_Cache.cc index 9d463028aa0..abfdd472fc2 100644 --- a/iocore/cache/test/test_Populated_Cache.cc +++ b/iocore/cache/test/test_Populated_Cache.cc @@ -26,7 +26,7 @@ #define LARGE_FILE 10 * 1024 * 1024 #define SMALL_FILE 10 * 1024 -int cache_vols = 1; +int cache_vols = 2; bool reuse_existing_cache = true; class CacheCommInit : public CacheInit diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index d740f1a2c11..65eb92d5c8c 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -93,7 +93,7 @@ foreach(i RANGE 1) add_cache_test(Disk_Init_Failure_${i} ${CMAKE_SOURCE_DIR}/iocore/cache/test/test_Disk_Init_Failure.cc) target_compile_definitions(Disk_Init_Failure_${i} PUBLIC FAILURE_INDICES={${i}}) endforeach() -foreach(i RANGE 2 20) +foreach(i RANGE 5 20) add_cache_test(Disk_Failure_${i} ${CMAKE_SOURCE_DIR}/iocore/cache/test/test_Disk_Failure.cc) target_compile_definitions(Disk_Failure_${i} PUBLIC FAILURE_INDICES={${i}}) endforeach() From 218c4d237368a0598865136808cf4114eea7e9f9 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sat, 26 Aug 2023 05:06:47 -0500 Subject: [PATCH 5/8] Fix spelling of off_t in pread/pwrite --- iocore/aio/AIO_fault_injection.cc | 4 ++-- iocore/aio/AIO_fault_injection.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/iocore/aio/AIO_fault_injection.cc b/iocore/aio/AIO_fault_injection.cc index 28650f5d25c..9bf6dbd8f47 100644 --- a/iocore/aio/AIO_fault_injection.cc +++ b/iocore/aio/AIO_fault_injection.cc @@ -78,7 +78,7 @@ AIOFaultInjection::open(const char *pathname, int flags, mode_t mode) } ssize_t -AIOFaultInjection::pread(int fd, void *buf, size_t nbytes, __off_t offset) +AIOFaultInjection::pread(int fd, void *buf, size_t nbytes, off_t offset) { std::lock_guard lock{_mutex}; IOFault result = _op_result(fd); @@ -100,7 +100,7 @@ AIOFaultInjection::pread(int fd, void *buf, size_t nbytes, __off_t offset) } ssize_t -AIOFaultInjection::pwrite(int fd, const void *buf, size_t n, __off_t offset) +AIOFaultInjection::pwrite(int fd, const void *buf, size_t n, off_t offset) { std::lock_guard lock{_mutex}; IOFault result = _op_result(fd); diff --git a/iocore/aio/AIO_fault_injection.h b/iocore/aio/AIO_fault_injection.h index 9357a172fbb..b237d1a6be7 100644 --- a/iocore/aio/AIO_fault_injection.h +++ b/iocore/aio/AIO_fault_injection.h @@ -65,8 +65,8 @@ class AIOFaultInjection void inject_fault(const char *path_regex, int op_index, IOFault fault); int open(const char *pathname, int flags, mode_t mode); - ssize_t pread(int fd, void *buf, size_t nbytes, __off_t offset); - ssize_t pwrite(int fd, const void *buf, size_t n, __off_t offset); + ssize_t pread(int fd, void *buf, size_t nbytes, off_t offset); + ssize_t pwrite(int fd, const void *buf, size_t n, off_t offset); }; extern AIOFaultInjection aioFaultInjection; From 16f9e6ffb5bec0c92f282bee4ec6297bb2fc31cb Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sat, 26 Aug 2023 10:06:15 -0500 Subject: [PATCH 6/8] Remove unnecessary debug output from cache_op() --- iocore/aio/AIO.cc | 6 ------ 1 file changed, 6 deletions(-) diff --git a/iocore/aio/AIO.cc b/iocore/aio/AIO.cc index e15f45b0649..9cd6446d940 100644 --- a/iocore/aio/AIO.cc +++ b/iocore/aio/AIO.cc @@ -444,12 +444,9 @@ cache_op(AIOCallbackInternal *op) ssize_t err, res = 0; while (a->aio_nbytes - res > 0) { - static int debug_op_index = 0; do { if (read) { #ifdef AIO_FAULT_INJECTION - Debug("cache.op", "op %d: pread(%d, %p, %zu, %zu)", debug_op_index, a->aio_fildes, - (static_cast(a->aio_buf)) + res, a->aio_nbytes - res, a->aio_offset + res); err = aioFaultInjection.pread(a->aio_fildes, (static_cast(a->aio_buf)) + res, a->aio_nbytes - res, a->aio_offset + res); #else @@ -459,8 +456,6 @@ cache_op(AIOCallbackInternal *op) #ifdef AIO_FAULT_INJECTION err = aioFaultInjection.pwrite(a->aio_fildes, (static_cast(a->aio_buf)) + res, a->aio_nbytes - res, a->aio_offset + res); - Debug("cache.op", "op %d: pwrite(%d, %p, %zu, %zu)", debug_op_index, a->aio_fildes, - (static_cast(a->aio_buf)) + res, a->aio_nbytes - res, a->aio_offset + res); #else err = pwrite(a->aio_fildes, (static_cast(a->aio_buf)) + res, a->aio_nbytes - res, a->aio_offset + res); #endif @@ -471,7 +466,6 @@ cache_op(AIOCallbackInternal *op) op->aio_result = -errno; return (err); } - debug_op_index++; res += err; } op->aio_result = res; From 2f756dc331eed04c942bc3e48637b632ef994cd4 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Wed, 30 Aug 2023 13:39:59 -0500 Subject: [PATCH 7/8] Revert "Make bad disk detection more robust" This reverts commit c2523f4bd956a44d70340ea875c41f1cc1814855. --- iocore/aio/AIO.cc | 10 +++++----- iocore/aio/I_AIO.h | 2 +- iocore/aio/P_AIO.h | 20 ++++++-------------- iocore/cache/Cache.cc | 26 +++++++------------------- iocore/cache/CacheDir.cc | 2 +- iocore/cache/CacheDisk.cc | 6 +++--- iocore/cache/CacheVol.cc | 2 +- iocore/cache/P_CacheVol.h | 4 ++-- 8 files changed, 26 insertions(+), 46 deletions(-) diff --git a/iocore/aio/AIO.cc b/iocore/aio/AIO.cc index 9cd6446d940..d8204387a77 100644 --- a/iocore/aio/AIO.cc +++ b/iocore/aio/AIO.cc @@ -62,8 +62,8 @@ int thread_is_created = 0; RecInt cache_config_threads_per_disk = 12; RecInt api_config_threads_per_disk = 12; -RecRawStatBlock *aio_rsb = nullptr; -Continuation *aio_err_callback = nullptr; +RecRawStatBlock *aio_rsb = nullptr; +Continuation *aio_err_callbck = nullptr; // AIO Stats std::atomic aio_num_read = 0; std::atomic aio_bytes_read = 0; @@ -157,9 +157,9 @@ new_AIOCallback() } void -ink_aio_set_err_callback(Continuation *callback) +ink_aio_set_callback(Continuation *callback) { - aio_err_callback = callback; + aio_err_callbck = callback; } void @@ -469,7 +469,7 @@ cache_op(AIOCallbackInternal *op) res += err; } op->aio_result = res; - ink_assert(op->ok()); + ink_assert(op->aio_result == (int64_t)a->aio_nbytes); } return 1; } diff --git a/iocore/aio/I_AIO.h b/iocore/aio/I_AIO.h index 0782640d6a3..47dc35c4449 100644 --- a/iocore/aio/I_AIO.h +++ b/iocore/aio/I_AIO.h @@ -77,7 +77,7 @@ struct AIOCallback : public Continuation { }; void ink_aio_init(ts::ModuleVersion version, AIOBackend backend = AIO_BACKEND_AUTO); -void ink_aio_set_err_callback(Continuation *error_callback); +void ink_aio_set_callback(Continuation *error_callback); int ink_aio_read(AIOCallback *op, int fromAPI = 0); // fromAPI is a boolean to indicate if this is from an API call such as upload proxy feature diff --git a/iocore/aio/P_AIO.h b/iocore/aio/P_AIO.h index 0b3c12c397f..a6ff603b7fa 100644 --- a/iocore/aio/P_AIO.h +++ b/iocore/aio/P_AIO.h @@ -45,10 +45,10 @@ static constexpr ts::ModuleVersion AIO_MODULE_INTERNAL_VERSION{AIO_MODULE_PUBLIC TS_INLINE int AIOCallback::ok() { - return (aiocb.aio_nbytes == static_cast(aio_result)) && (aio_result >= 0); + return (off_t)aiocb.aio_nbytes == (off_t)aio_result; } -extern Continuation *aio_err_callback; +extern Continuation *aio_err_callbck; struct AIO_Reqs; @@ -92,21 +92,13 @@ AIOCallbackInternal::io_complete(int event, void *data) { (void)event; (void)data; - // DEBUG - if (!ok() && !aio_err_callback) { - Warning("NO ERROR HANDLER!"); - } - // END DEBUG - if (aio_err_callback && !ok()) { + if (aio_err_callbck && !ok()) { AIOCallback *err_op = new AIOCallbackInternal(); err_op->aiocb.aio_fildes = this->aiocb.aio_fildes; err_op->aiocb.aio_lio_opcode = this->aiocb.aio_lio_opcode; - err_op->mutex = aio_err_callback->mutex; - err_op->action = aio_err_callback; - - // Take this lock in-line because we want to stop other I/O operations on this disk ASAP - SCOPED_MUTEX_LOCK(lock, aio_err_callback->mutex, this_ethread()); - err_op->action.continuation->handleEvent(EVENT_NONE, err_op); + err_op->mutex = aio_err_callbck->mutex; + err_op->action = aio_err_callbck; + eventProcessor.schedule_imm(err_op); } if (!action.cancelled && action.continuation) { action.continuation->handleEvent(AIO_EVENT_DONE, this); diff --git a/iocore/cache/Cache.cc b/iocore/cache/Cache.cc index bb429d2b43b..1a013dbb0fd 100644 --- a/iocore/cache/Cache.cc +++ b/iocore/cache/Cache.cc @@ -573,7 +573,7 @@ CacheProcessor::start_internal(int flags) memset(sds, 0, sizeof(Span *) * gndisks); gndisks = 0; - ink_aio_set_err_callback(new AIO_failure_handler()); + ink_aio_set_callback(new AIO_Callback_handler()); config_volumes.read_config_file(); @@ -1275,7 +1275,7 @@ Vol::handle_dir_clear(int event, void *data) if (event == AIO_EVENT_DONE) { op = static_cast(data); - if (!op->ok()) { + if (static_cast(op->aio_result) != op->aiocb.aio_nbytes) { Warning("unable to clear cache directory '%s'", hash_text.get()); disk->incrErrors(op); fd = -1; @@ -1304,7 +1304,7 @@ Vol::handle_dir_read(int event, void *data) AIOCallback *op = static_cast(data); if (event == AIO_EVENT_DONE) { - if (!op->ok()) { + if (static_cast(op->aio_result) != op->aiocb.aio_nbytes) { Note("Directory read failed: clearing cache directory %s", this->hash_text.get()); clear_dir(); return EVENT_DONE; @@ -1399,7 +1399,7 @@ Vol::handle_recover_from_data(int event, void * /* data ATS_UNUSED */) io.aiocb.aio_nbytes = (skip + len) - recover_pos; } } else if (event == AIO_EVENT_DONE) { - if (!io.ok()) { + if (io.aiocb.aio_nbytes != static_cast(io.aio_result)) { Warning("disk read error on recover '%s', clearing", hash_text.get()); disk->incrErrors(&io); goto Lclear; @@ -1659,7 +1659,7 @@ Vol::handle_header_read(int event, void *data) for (auto &i : hf) { ink_assert(op != nullptr); i = static_cast(op->aiocb.aio_buf); - if (!op->ok()) { + if (static_cast(op->aio_result) != op->aiocb.aio_nbytes) { Note("Header read failed: clearing cache directory %s", this->hash_text.get()); clear_dir(); return EVENT_DONE; @@ -1941,7 +1941,7 @@ CacheProcessor::has_online_storage() const } int -AIO_failure_handler::handle_disk_failure(int /* event ATS_UNUSED */, void *data) +AIO_Callback_handler::handle_disk_failure(int /* event ATS_UNUSED */, void *data) { /* search for the matching file descriptor */ if (!CacheProcessor::cache_ready) { @@ -1950,10 +1950,6 @@ AIO_failure_handler::handle_disk_failure(int /* event ATS_UNUSED */, void *data) int disk_no = 0; AIOCallback *cb = static_cast(data); - // DEBUG - ink_release_assert(cb != nullptr); - bool found = false; - // END DEBUG for (; disk_no < gndisks; disk_no++) { CacheDisk *d = gdisks[disk_no]; @@ -1970,17 +1966,9 @@ AIO_failure_handler::handle_disk_failure(int /* event ATS_UNUSED */, void *data) Warning("%s", message); cacheProcessor.mark_storage_offline(d); // take it out of service } - // DEBUG - found = true; - // END DEBUG break; } } - // DEBUG - if (!found) { - Warning("Didn't find failing disk!"); - } - // END DEBUG delete cb; return EVENT_DONE; @@ -2375,7 +2363,7 @@ CacheVC::removeEvent(int /* event ATS_UNUSED */, Event * /* e ATS_UNUSED */) goto Lcollision; } // check read completed correct FIXME: remove bad vols - if (!io.ok()) { + if (static_cast(io.aio_result) != io.aiocb.aio_nbytes) { goto Ldone; } { diff --git a/iocore/cache/CacheDir.cc b/iocore/cache/CacheDir.cc index adfe4467fd2..b4bf725e4e8 100644 --- a/iocore/cache/CacheDir.cc +++ b/iocore/cache/CacheDir.cc @@ -1103,7 +1103,7 @@ CacheSync::mainEvent(int event, Event *e) if (event == AIO_EVENT_DONE) { // AIO Thread - if (!io.ok()) { + if (io.aio_result != static_cast(io.aiocb.aio_nbytes)) { Warning("vol write error during directory sync '%s'", gvol[vol_idx]->hash_text.get()); event = EVENT_NONE; goto Ldone; diff --git a/iocore/cache/CacheDisk.cc b/iocore/cache/CacheDisk.cc index 4b7b66d1e97..35743acd0d9 100644 --- a/iocore/cache/CacheDisk.cc +++ b/iocore/cache/CacheDisk.cc @@ -147,7 +147,7 @@ CacheDisk::clearDone(int event, void * /* data ATS_UNUSED */) { ink_assert(event == AIO_EVENT_DONE); - if (!io.ok()) { + if (io.aiocb.aio_nbytes != static_cast(io.aio_result)) { Warning("Could not clear disk header for disk %s: declaring disk bad", path); incrErrors(&io); SET_DISK_BAD(this); @@ -163,7 +163,7 @@ CacheDisk::openStart(int event, void * /* data ATS_UNUSED */) { ink_assert(event == AIO_EVENT_DONE); - if (!io.ok()) { + if (io.aiocb.aio_nbytes != static_cast(io.aio_result)) { Warning("could not read disk header for disk %s: declaring disk bad", path); // the header could have random values by the AIO read error @@ -237,7 +237,7 @@ CacheDisk::syncDone(int event, void * /* data ATS_UNUSED */) { ink_assert(event == AIO_EVENT_DONE); - if (!io.ok()) { + if (io.aiocb.aio_nbytes != static_cast(io.aio_result)) { Warning("Error writing disk header for disk %s:disk bad", path); incrErrors(&io); SET_DISK_BAD(this); diff --git a/iocore/cache/CacheVol.cc b/iocore/cache/CacheVol.cc index 1613632868c..989c06a95f4 100644 --- a/iocore/cache/CacheVol.cc +++ b/iocore/cache/CacheVol.cc @@ -210,7 +210,7 @@ CacheVC::scanObject(int /* event ATS_UNUSED */, Event * /* e ATS_UNUSED */) goto Lread; } - if (!io.ok()) { + if (static_cast(io.aio_result) != io.aiocb.aio_nbytes) { result = (void *)-ECACHE_READ_FAIL; goto Ldone; } diff --git a/iocore/cache/P_CacheVol.h b/iocore/cache/P_CacheVol.h index 9c9e99592a6..2800e899369 100644 --- a/iocore/cache/P_CacheVol.h +++ b/iocore/cache/P_CacheVol.h @@ -256,10 +256,10 @@ struct Vol : public Continuation { ~Vol() override { ats_free(agg_buffer); } }; -struct AIO_failure_handler : public Continuation { +struct AIO_Callback_handler : public Continuation { int handle_disk_failure(int event, void *data); - AIO_failure_handler() : Continuation(new_ProxyMutex()) { SET_HANDLER(&AIO_failure_handler::handle_disk_failure); } + AIO_Callback_handler() : Continuation(new_ProxyMutex()) { SET_HANDLER(&AIO_Callback_handler::handle_disk_failure); } }; struct CacheVol { From 941d36710b9ede66e8618aa1e47fd243f44ac358 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Thu, 31 Aug 2023 15:47:51 -0500 Subject: [PATCH 8/8] Fix build error caused by bad merge --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index ce16639cb6c..9aea1519115 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -59,6 +59,7 @@ option(ENABLE_DOCS "Build docs (default OFF)") option(ENABLE_DISK_FAILURE_TESTS "Build disk failure tests (enables AIO fault injection, default OFF)" OFF) if(ENABLE_DISK_FAILURE_TESTS) add_compile_definitions("AIO_FAULT_INJECTION") +endif() option(ENABLE_BENCHMARKS "Build benchmarks (default OFF)") # Setup user