Skip to content

Commit ea6e973

Browse files
alexander-e1offsyuzvinsky
authored andcommitted
Fix sanitizer issues [BMQ,MWC] (bloomberg#373)
1 parent 978c7ea commit ea6e973

11 files changed

+62
-122
lines changed

etc/msansup.txt

+2
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
1+
# MemorySanitizer suppressions file for BlazingMQ.
2+
13
# Locking APIs from non-instrumented `libpthread` yields false-positives.
24
fun:_ZN11BloombergLP5bslmt9MutexImplINS0_8Platform12PosixThreadsEE*

etc/tsansup.txt

+12-14
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,32 @@
1-
# ThreadSanitizer suppressions file for BMQ
1+
# ThreadSanitizer suppressions file for BlazingMQ.
22

33
# There's a lengthy comment in ObjectPool::getObject that explains why this
44
# isn't a race. I'm not smart enough to figure out if it's right, but I'll
55
# assume it's right
66
race:BloombergLP::bdlcc::ObjectPool<*>::getObject
77
race:BloombergLP::bdlcc::ObjectPool<*>::releaseObject
8+
9+
# Issue 176120121 is created for tracking.
810
race:BloombergLP::bdlma::ConcurrentPool::deallocate
911
race:BloombergLP::bdlma::ConcurrentPool::allocate
1012
race:BloombergLP::bdlma::ConcurrentPool::deleteObject
1113

12-
# Similar to above, there is a lengthy comment in bcema_Pool::allocate which
13-
# attempts to explain why there is no race.
14-
race:BloombergLP::bcema_Pool<*>::allocate
15-
16-
# Not sure what the problem is here, but tsan can't show the other stack, so
17-
# there's nothing to look into
18-
race:__tsan_atomic32_fetch_add
19-
2014
# Don't warn about using cout from multiple threads
2115
race:std::basic_ostream<char, *>& bsl::operator<< <*>(std::basic_ostream<*>&, bsl::basic_string<*> const&)
2216

2317
# Looks like ball::LoggerManager uses a plain ptr to store its singleton, and
2418
# it makes tsan warn in some cases
2519
race:BloombergLP::ball::LoggerManager::isInitialized()
2620

27-
# Suppress TSan report in a routine used in bmqimp::Brokersession test driver.
28-
# It is a benign race in the test driver, but should be looked into at some
29-
# point.
30-
race:TestSession::waitForChannelClose
31-
race:TestSession::arriveAtStepWithCfgs
21+
# Suppress sporadically appearing data race in bmqimp::BrokerSession test driver.
22+
# In TestSession::arriveAtStepWithCfgs() there is a call of queue->setOptions() method,
23+
# at nearly same time in other thread bmqimp::BrokerSession::onConfigureQueueResponse() calls
24+
# queue->options().suspendsOnBadHostHealth() method which is detected as data race.
25+
# bmqt::QueueOptions and bmqimp::Queue classes are not thread safe by design,
26+
# and bmqimp::BrokerSession::onConfigureQueueResponse() callback access them in
27+
# not thread-safe manner, probably also by design, assuming that it will be called again
28+
# if something is changed. Further investigation is required, suppress it for now.
29+
race:BloombergLP::bmqt::QueueOptions::suspendsOnBadHostHealth
3230

3331
# Since we use mqbmock::Dispatcher in unit tests, this method does not get
3432
# enqueued correctly back to cluster-dispatcher thread, causing potential

etc/ubsansup.txt

+2-6
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,2 @@
1-
# bmqp::Crc32c carries out misaligned access of Int64 in one of the internal
2-
# routines, but only on x86/64. Misaligned accesses are handled gracefully by
3-
# this hardware, unlike SPARC. So we simply suppress this warning. Other
4-
# option is to update the code, which can be done by someone feeling
5-
# adventurous.
6-
alignment:crc32c1024SseInt
1+
# UndefinedBehaviorSanitizer suppressions file for BlazingMQ.
2+
# See details https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html#runtime-suppressions.

src/groups/bmq/bmqimp/bmqimp_brokersession.t.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -2196,12 +2196,12 @@ bool TestSession::waitForChannelClose(const bsls::TimeInterval& timeout)
21962196
// Wait for the close to be called on the base channel
21972197
const bsls::TimeInterval expireAfter =
21982198
bsls::SystemTime::nowRealtimeClock() + timeout;
2199-
while (d_testChannel.closeCalls().empty() &&
2199+
while (d_testChannel.closeCallsEmpty() &&
22002200
bsls::SystemTime::nowRealtimeClock() < expireAfter) {
22012201
bslmt::ThreadUtil::microSleep(k_TIME_SOURCE_STEP.totalMicroseconds());
22022202
}
22032203

2204-
if (d_testChannel.closeCalls().empty()) {
2204+
if (d_testChannel.closeCallsEmpty()) {
22052205
return false; // RETURN
22062206
}
22072207

src/groups/bmq/bmqp/bmqp_messageguidgenerator.t.cpp

+5-1
Original file line numberDiff line numberDiff line change
@@ -661,7 +661,11 @@ static void test3_multithreadUseIP()
661661

662662
const int k_NUM_THREADS = 10;
663663

664-
#if defined(__has_feature)
664+
#ifdef BSLS_PLATFORM_OS_SOLARIS
665+
// This test case times out if 'k_NUM_GUIDS' is close to 1 million
666+
// (it's unable to complete in 90 seconds).
667+
const int k_NUM_GUIDS = 500000; // 500k
668+
#elif defined(__has_feature)
665669
// Avoid timeout under MemorySanitizer
666670
const int k_NUM_GUIDS = __has_feature(memory_sanitizer) ? 500000 // 500k
667671
: 1000000; // 1M

src/groups/mqb/mqbs/mqbs_datastore.t.cpp

+4-1
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,10 @@ static void test2_defaultHashUniqueness()
134134

135135
mwctst::TestHelper::printTestName("DEFAULT HASH UNIQUENESS");
136136

137-
#if defined(__has_feature)
137+
#ifdef BSLS_PLATFORM_OS_SOLARIS
138+
// This test case times out if 'k_NUM_GUIDS' is close to 1 million
139+
const bsls::Types::Int64 k_NUM_KEYS = 1000000; // 1M
140+
#elif defined(__has_feature)
138141
// Avoid timeout under MemorySanitizer
139142
const bsls::Types::Int64 k_NUM_KEYS = __has_feature(memory_sanitizer)
140143
? 1000000 // 1M

src/groups/mqb/mqbu/mqbu_messageguidutil.t.cpp

+5-1
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,11 @@ static void test2_multithread()
182182

183183
const int k_NUM_THREADS = 10;
184184

185-
#if defined(__has_feature)
185+
#ifdef BSLS_PLATFORM_OS_SOLARIS
186+
// This test case times out if 'k_NUM_GUIDS' is close to 1 million
187+
// (it's unable to complete in 90 seconds).
188+
const int k_NUM_GUIDS = 500000; // 500k
189+
#elif defined(__has_feature)
186190
// Avoid timeout under MemorySanitizer
187191
const int k_NUM_GUIDS = __has_feature(memory_sanitizer) ? 500000 // 500k
188192
: 1000000; // 1M

src/groups/mwc/mwcio/mwcio_ntcchannelfactory.t.cpp

+12-90
Original file line numberDiff line numberDiff line change
@@ -98,19 +98,22 @@ static const ChannelWatermarkType::Enum WAT_HIGH =
9898
static const ChannelWatermarkType::Enum WAT_LOW =
9999
ChannelWatermarkType::e_LOW_WATERMARK;
100100

101+
// Adjust attempt interval depending on the platform
101102
#ifdef BSLS_PLATFORM_OS_SOLARIS
102-
static const bool skipTest = true;
103+
static const int k_ATTEMPT_INTERVAL_MS = 300;
103104
#elif defined( \
104105
__has_feature) // Clang-supported method for checking sanitizers.
105-
static const bool skipTest = __has_feature(memory_sanitizer) ||
106-
__has_feature(thread_sanitizer) ||
107-
__has_feature(undefined_behavior_sanitizer);
106+
static const int k_ATTEMPT_INTERVAL_MS =
107+
(__has_feature(memory_sanitizer) || __has_feature(thread_sanitizer) ||
108+
__has_feature(undefined_behavior_sanitizer))
109+
? 400
110+
: 1;
108111
#elif defined(__SANITIZE_MEMORY__) || defined(__SANITIZE_THREAD__) || \
109112
defined(__SANITIZE_UNDEFINED__)
110113
// GCC-supported macros for checking MSAN, TSAN and UBSAN.
111-
static const bool skipTest = true;
114+
static const int k_ATTEMPT_INTERVAL_MS = 400;
112115
#else
113-
static const bool skipTest = false; // Default to running the test.
116+
static const int k_ATTEMPT_INTERVAL_MS = 1;
114117
#endif
115118

116119
// ========================
@@ -658,10 +661,9 @@ void Tester::connect(int line,
658661

659662
ConnectOptions reqOptions(options, s_allocator_p);
660663

661-
reqOptions.setAttemptInterval(
662-
bsls::TimeInterval(0, 10 * bdlt::TimeUnitRatio::k_NS_PER_MS));
663-
664-
reqOptions.setAttemptInterval(bsls::TimeInterval(0, 1));
664+
reqOptions.setAttemptInterval(bsls::TimeInterval(
665+
0,
666+
k_ATTEMPT_INTERVAL_MS * bdlt::TimeUnitRatio::k_NS_PER_MS));
665667

666668
HandleMap::iterator serverIter = d_handleMap.find(endpointOrServer);
667669
if (serverIter != d_handleMap.end()) {
@@ -1055,22 +1057,6 @@ static void test6_preCreationCbTest()
10551057
{
10561058
mwctst::TestHelper::printTestName("Pre Creation Cb Test");
10571059

1058-
if (skipTest) {
1059-
// This test has been disabled for Solaris/MSan/TSan/UBSan build. This
1060-
// test relies on the timings of certain callbacks being fired before
1061-
// or after certain operations. Normally this timing is always
1062-
// observed, but in msan/tsan/ubsan enabled build, the timing gets
1063-
// changed, leading to test failure. Of course, the right fix is to
1064-
// not rely on these timings, which can be worked on if the test starts
1065-
// failing in non-instrumented builds. Additionally, we could try to
1066-
// enable this test in MSan/TSan/UBSan build once all MSan/TSan/UBSan
1067-
// reports have been fixed to see if that helps (see `msansup.txt`,
1068-
// `tsansup.txt` and `ubsansup.txt`).
1069-
bsl::cout << "Test skipped (running on Solaris or under sanitizer)"
1070-
<< bsl::endl;
1071-
return; // RETURN
1072-
}
1073-
10741060
Tester t(s_allocator_p);
10751061

10761062
// Concern 'a'
@@ -1106,22 +1092,6 @@ static void test5_visitChannelsTest()
11061092
{
11071093
mwctst::TestHelper::printTestName("Cancel Handle Test");
11081094

1109-
if (skipTest) {
1110-
// This test has been disabled for Solaris/MSan/TSan/UBSan build. This
1111-
// test relies on the timings of certain callbacks being fired before
1112-
// or after certain operations. Normally this timing is always
1113-
// observed, but in msan/tsan/ubsan enabled build, the timing gets
1114-
// changed, leading to test failure. Of course, the right fix is to
1115-
// not rely on these timings, which can be worked on if the test starts
1116-
// failing in non-instrumented builds. Additionally, we could try to
1117-
// enable this test in MSan/TSan/UBSan build once all MSan/TSan/UBSan
1118-
// reports have been fixed to see if that helps (see `msansup.txt`,
1119-
// `tsansup.txt` and `ubsansup.txt`).
1120-
bsl::cout << "Test skipped (running on Solaris or under sanitizer)"
1121-
<< bsl::endl;
1122-
return; // RETURN
1123-
}
1124-
11251095
Tester t(s_allocator_p);
11261096

11271097
// Concerns 'a'
@@ -1162,22 +1132,6 @@ static void test4_cancelHandleTest()
11621132
{
11631133
mwctst::TestHelper::printTestName("Cancel Handle Test");
11641134

1165-
if (skipTest) {
1166-
// This test has been disabled for Solaris/MSan/TSan/UBSan build. This
1167-
// test relies on the timings of certain callbacks being fired before
1168-
// or after certain operations. Normally this timing is always
1169-
// observed, but in msan/tsan/ubsan enabled build, the timing gets
1170-
// changed, leading to test failure. Of course, the right fix is to
1171-
// not rely on these timings, which can be worked on if the test starts
1172-
// failing in non-instrumented builds. Additionally, we could try to
1173-
// enable this test in MSan/TSan/UBSan build once all MSan/TSan/UBSan
1174-
// reports have been fixed to see if that helps (see `msansup.txt`,
1175-
// `tsansup.txt` and `ubsansup.txt`).
1176-
bsl::cout << "Test skipped (running on Solaris or under sanitizer)"
1177-
<< bsl::endl;
1178-
return; // RETURN
1179-
}
1180-
11811135
Tester t(s_allocator_p);
11821136

11831137
// Concerns 'a'
@@ -1232,22 +1186,6 @@ static void test3_watermarkTest()
12321186
{
12331187
mwctst::TestHelper::printTestName("Watermark Test");
12341188

1235-
if (skipTest) {
1236-
// This test has been disabled for Solaris/MSan/TSan/UBSan build. This
1237-
// test relies on the timings of certain callbacks being fired before
1238-
// or after certain operations. Normally this timing is always
1239-
// observed, but in msan/tsan/ubsan enabled build, the timing gets
1240-
// changed, leading to test failure. Of course, the right fix is to
1241-
// not rely on these timings, which can be worked on if the test starts
1242-
// failing in non-instrumented builds. Additionally, we could try to
1243-
// enable this test in MSan/TSan/UBSan build once all MSan/TSan/UBSan
1244-
// reports have been fixed to see if that helps (see `msansup.txt`,
1245-
// `tsansup.txt` and `ubsansup.txt`).
1246-
bsl::cout << "Test skipped (running on Solaris or under sanitizer)"
1247-
<< bsl::endl;
1248-
return; // RETURN
1249-
}
1250-
12511189
Tester t(s_allocator_p);
12521190

12531191
// Concern 'a'
@@ -1337,22 +1275,6 @@ static void test1_breathingTest()
13371275
{
13381276
mwctst::TestHelper::printTestName("Breathing Test");
13391277

1340-
if (skipTest) {
1341-
// This test has been disabled for Solaris/MSan/TSan/UBSan build. This
1342-
// test relies on the timings of certain callbacks being fired before
1343-
// or after certain operations. Normally this timing is always
1344-
// observed, but in msan/tsan/ubsan enabled build, the timing gets
1345-
// changed, leading to test failure. Of course, the right fix is to
1346-
// not rely on these timings, which can be worked on if the test starts
1347-
// failing in non-instrumented builds. Additionally, we could try to
1348-
// enable this test in MSan/TSan/UBSan build once all MSan/TSan/UBSan
1349-
// reports have been fixed to see if that helps (see `msansup.txt`,
1350-
// `tsansup.txt` and `ubsansup.txt`).
1351-
bsl::cout << "Test skipped (running on Solaris or under sanitizer)"
1352-
<< bsl::endl;
1353-
return; // RETURN
1354-
}
1355-
13561278
Tester t(s_allocator_p);
13571279
t.init(L_);
13581280

src/groups/mwc/mwcio/mwcio_testchannel.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -293,5 +293,11 @@ bool TestChannel::hasNoMoreWriteCalls() const
293293
return d_hasNoMoreWriteCalls;
294294
}
295295

296+
bool TestChannel::closeCallsEmpty() const
297+
{
298+
bslmt::LockGuard<bslmt::Mutex> guard(&d_mutex); // LOCK
299+
return d_closeCalls.empty();
300+
}
301+
296302
} // close package namespace
297303
} // close enterprise namespace

src/groups/mwc/mwcio/mwcio_testchannel.h

+5-1
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ class TestChannel : public Channel {
184184
bsl::deque<OnWatermarkCall> d_onWatermarkCalls;
185185
mwct::PropertyBag d_properties;
186186
bsl::string d_peerUri;
187-
bslmt::Mutex d_mutex;
187+
mutable bslmt::Mutex d_mutex;
188188
bslmt::Condition d_condition;
189189
bool d_isFinal;
190190
bool d_hasNoMoreWriteCalls;
@@ -258,6 +258,10 @@ class TestChannel : public Channel {
258258
const Status& writeStatus() const;
259259
bool hasNoMoreWriteCalls() const;
260260

261+
/// Lock mutex and return `true` if d_closeCalls collection is empty,
262+
/// `false` otherwise.
263+
bool closeCallsEmpty() const;
264+
261265
// Channel
262266
void read(Status* status,
263267
int numBytes,

src/groups/mwc/mwcma/mwcma_countingallocator.t.cpp

+7-6
Original file line numberDiff line numberDiff line change
@@ -191,23 +191,24 @@ static void test3_deallocate()
191191
mwctst::TestHelper::printTestName("DEALLOCATE");
192192

193193
// This test twice-deallocates the same block of memory, to verify that
194-
// such an operation fails. If we're running under MemorySanitizer or
195-
// AddressSanitizer, we must skip this test to avoid detecting the issue
196-
// and aborting.
194+
// such an operation fails. If we're running under MemorySanitizer,
195+
// AddressSanitizer or ThreadSanitizer, we must skip this test to avoid
196+
// detecting the issue and aborting.
197197
//
198198
// Under MSan, we would instead try to explicitly "unpoison" the memory,
199199
// but CountingAllocator keeps a hidden "header" block, which we have no
200200
// good way of accessing to unpoison.
201201
//
202202
// Under ASan, we might be able to use the `no_sanitize` attribute, but
203-
// GCC doesn't support it before versio 8.0 - so for now, better just to
203+
// GCC doesn't support it before version 8.0 - so for now, better just to
204204
// skip the testcase.
205205
#if defined(__has_feature) // Clang-supported method for checking sanitizers.
206206
const bool skipTestForSanitizers = __has_feature(address_sanitizer) ||
207207
__has_feature(memory_sanitizer) ||
208208
__has_feature(thread_sanitizer);
209-
#elif defined(__SANITIZE_ADDRESS__) || defined(__SANITIZE_THREAD__)
210-
// GCC-supported macros for checking ASAN and TSAN.
209+
#elif defined(__SANITIZE_ADDRESS__) || defined(__SANITIZE_MEMORY__) || \
210+
defined(__SANITIZE_THREAD__)
211+
// GCC-supported macros for checking ASAN, MSAN and TSAN.
211212
const bool skipTestForSanitizers = true;
212213
#else
213214
const bool skipTestForSanitizers = false; // Default to running the test.

0 commit comments

Comments
 (0)