Skip to content

[SYCL] Fix flaky failure of concurrent cache eviction UT #16522

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions sycl/include/sycl/detail/os_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,15 +95,16 @@ class __SYCL_EXPORT OSUtil {
// exporting them as ABI. They are only used in persistent cache
// implementation and should not be exposed to the end users.
// Get size of directory in bytes.
size_t getDirectorySize(const std::string &Path);
size_t getDirectorySize(const std::string &Path, bool ignoreErrors = false);

// Get size of file in bytes.
size_t getFileSize(const std::string &Path);

// Function to recursively iterate over the directory and execute
// 'Func' on each regular file.
void fileTreeWalk(const std::string Path,
std::function<void(const std::string)> Func);
std::function<void(const std::string)> Func,
bool ignoreErrors = false);

} // namespace detail
} // namespace _V1
Expand Down
36 changes: 24 additions & 12 deletions sycl/source/detail/os_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,33 +248,45 @@ size_t getFileSize(const std::string &Path) {
// Function to recursively iterate over the directory and execute
// 'Func' on each regular file.
void fileTreeWalk(const std::string Path,
std::function<void(const std::string)> Func) {
std::function<void(const std::string)> Func,
bool ignoreErrors) {

std::error_code EC;
for (auto It = fs::recursive_directory_iterator(Path, EC);
It != fs::recursive_directory_iterator(); It.increment(EC)) {

// Errors can happen if a file was removed/added during the iteration.
if (EC)
throw sycl::exception(
make_error_code(errc::runtime),
"Failed to do File Tree Walk. Ensure that the directory is not "
"getting updated while FileTreeWalk is in progress.: " +
Path + "\n" + EC.message());

if (fs::is_regular_file(It->path()))
Func(It->path().string());
if (EC) {
if (ignoreErrors) {
EC.clear();
continue;
} else
throw sycl::exception(
make_error_code(errc::runtime),
"Failed to do File Tree Walk. Ensure that the directory is not "
"getting updated while FileTreeWalk is in progress.: " +
Path + "\n" + EC.message());
}

try {
if (fs::is_regular_file(It->path()))
Func(It->path().string());
} catch (...) {
// Ignore errors if ignoreErrors is set to true.
if (!ignoreErrors)
throw;
}
}
}

// Get size of a directory in bytes.
size_t getDirectorySize(const std::string &Path) {
size_t getDirectorySize(const std::string &Path, bool ignoreErrors) {
size_t DirSizeVar = 0;

auto CollectFIleSize = [&DirSizeVar](const std::string Path) {
DirSizeVar += getFileSize(Path);
};
fileTreeWalk(Path, CollectFIleSize);
fileTreeWalk(Path, CollectFIleSize, ignoreErrors);

return DirSizeVar;
}
Expand Down
2 changes: 1 addition & 1 deletion sycl/source/detail/persistent_device_code_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ void PersistentDeviceCodeCache::repopulateCacheSizeFile(
// Calculate the size of the cache directory.
// During directory size calculation, do not add anything
// in the cache. Otherwise, we'll get a std::fs_error.
size_t CacheSize = getDirectorySize(CacheRoot);
size_t CacheSize = getDirectorySize(CacheRoot, /*Ignore Error*/ true);

std::ofstream FileStream{CacheSizeFile};
FileStream << CacheSize;
Expand Down
33 changes: 33 additions & 0 deletions sycl/unittests/kernel-and-program/PersistentDeviceCodeCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -653,6 +653,39 @@ TEST_P(PersistentDeviceCodeCache, ConcurentReadWriteCacheEviction) {
ConcurentReadWriteCache(2, 100);
}

// Unit test for ensuring that os_utils::getDirectorySize is thread-safe.
TEST_P(PersistentDeviceCodeCache, ConcurentDirectorySizeCalculation) {
// Cleanup the cache directory.
std::string CacheRoot = detail::PersistentDeviceCodeCache::getRootDir();
ASSERT_NO_ERROR(llvm::sys::fs::remove_directories(CacheRoot));
ASSERT_NO_ERROR(llvm::sys::fs::create_directories(CacheRoot));

// Spawn multiple threads to calculate the size of the directory concurrently
// and adding/removing file simultaneously.
constexpr size_t ThreadCount = 50;
Barrier b(ThreadCount);
{
auto testLambda = [&](std::size_t threadId) {
b.wait();
// Create a file named: test_file_<thread_id>.txt
std::string FileName =
CacheRoot + "/test_file_" + std::to_string(threadId) + ".txt";
std::ofstream FileStream(FileName,
std::ofstream::out | std::ofstream::trunc);
FileStream << "Test file for thread: " << threadId;
FileStream.close();

// Calculate the size of the directory.
[[maybe_unused]] auto i = detail::getDirectorySize(CacheRoot, true);

// Remove the created file.
ASSERT_NO_ERROR(llvm::sys::fs::remove(FileName));
};

ThreadPool MPool(ThreadCount, testLambda);
}
}

INSTANTIATE_TEST_SUITE_P(PersistentDeviceCodeCacheImpl,
PersistentDeviceCodeCache,
::testing::Values(SYCL_DEVICE_BINARY_TYPE_SPIRV,
Expand Down
Loading