From 806342b8ef54ec07511d0ce5d3d1335451e952da Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Thu, 28 May 2020 14:24:30 +0200 Subject: [PATCH] [clangd] Resolve driver symlinks, and look up unknown relative drivers in PATH. Summary: This fixes a reported bug: if clang and libc++ are installed under /usr/lib/llvm-11/... but there'- a symlink /usr/bin/clang++-11, then a compile_commands.json with "/usr/bin/clang++-11 -stdlib=libc++" would previously look for libc++ under /usr/include instead of /usr/lib/llvm-11/include. The PATH change makes this work if the compiler is just "clang++-11" too. As this is now doing IO potentially on every getCompileCommand(), we cache the results for each distinct driver. While here: - Added a Memoize helper for this as multithreaded caching is a bit noisy. - Used this helper to simplify QueryDriverDatabase and reduce blocking there. (This makes use of the fact that llvm::Regex is now threadsafe) Reviewers: kadircet Subscribers: jyknight, ormris, ilya-biryukov, MaskRay, jkorous, arphaman, jfb, usaxena95, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D75414 --- clang-tools-extra/clangd/ClangdLSPServer.cpp | 2 +- clang-tools-extra/clangd/CompileCommands.cpp | 69 +++++++++++++---- clang-tools-extra/clangd/CompileCommands.h | 6 +- .../clangd/QueryDriverDatabase.cpp | 28 +++---- clang-tools-extra/clangd/support/Threading.h | 38 ++++++++++ .../clangd/unittests/CompileCommandsTests.cpp | 75 ++++++++++++++++++- .../unittests/support/ThreadingTests.cpp | 61 +++++++++++++++ 7 files changed, 242 insertions(+), 37 deletions(-) diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp index b3b2bdd976bf1d..db9932a43e5a3a 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.cpp +++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp @@ -514,7 +514,7 @@ void ClangdLSPServer::onInitialize(const InitializeParams &Params, if (ClangdServerOpts.ResourceDir) Mangler.ResourceDir = *ClangdServerOpts.ResourceDir; CDB.emplace(BaseCDB.get(), Params.initializationOptions.fallbackFlags, - tooling::ArgumentsAdjuster(Mangler)); + tooling::ArgumentsAdjuster(std::move(Mangler))); { // Switch caller's context with LSPServer's background context. Since we // rather want to propagate information from LSPServer's context into the diff --git a/clang-tools-extra/clangd/CompileCommands.cpp b/clang-tools-extra/clangd/CompileCommands.cpp index 84f72f5f58c72a..a73312a521e841 100644 --- a/clang-tools-extra/clangd/CompileCommands.cpp +++ b/clang-tools-extra/clangd/CompileCommands.cpp @@ -119,6 +119,48 @@ std::string detectStandardResourceDir() { return CompilerInvocation::GetResourcesPath("clangd", (void *)&Dummy); } +// The path passed to argv[0] is important: +// - its parent directory is Driver::Dir, used for library discovery +// - its basename affects CLI parsing (clang-cl) and other settings +// Where possible it should be an absolute path with sensible directory, but +// with the original basename. +static std::string resolveDriver(llvm::StringRef Driver, bool FollowSymlink, + llvm::Optional ClangPath) { + auto SiblingOf = [&](llvm::StringRef AbsPath) { + llvm::SmallString<128> Result = llvm::sys::path::parent_path(AbsPath); + llvm::sys::path::append(Result, llvm::sys::path::filename(Driver)); + return Result.str().str(); + }; + + // First, eliminate relative paths. + std::string Storage; + if (!llvm::sys::path::is_absolute(Driver)) { + // If the driver is a generic like "g++" with no path, add clang dir. + if (ClangPath && + (Driver == "clang" || Driver == "clang++" || Driver == "gcc" || + Driver == "g++" || Driver == "cc" || Driver == "c++")) { + return SiblingOf(*ClangPath); + } + // Otherwise try to look it up on PATH. This won't change basename. + auto Absolute = llvm::sys::findProgramByName(Driver); + if (Absolute && llvm::sys::path::is_absolute(*Absolute)) + Driver = Storage = std::move(*Absolute); + else if (ClangPath) // If we don't find it, use clang dir again. + return SiblingOf(*ClangPath); + else // Nothing to do: can't find the command and no detected dir. + return Driver.str(); + } + + // Now we have an absolute path, but it may be a symlink. + assert(llvm::sys::path::is_absolute(Driver)); + if (FollowSymlink) { + llvm::SmallString<256> Resolved; + if (!llvm::sys::fs::real_path(Driver, Resolved)) + return SiblingOf(Resolved); + } + return Driver.str(); +} + } // namespace CommandMangler CommandMangler::detect() { @@ -162,25 +204,22 @@ void CommandMangler::adjust(std::vector &Cmd) const { Cmd.push_back(*Sysroot); } - // If the driver is a generic name like "g++" with no path, add a clang path. - // This makes it easier for us to find the standard libraries on mac. - if (ClangPath && llvm::sys::path::is_absolute(*ClangPath) && !Cmd.empty()) { - std::string &Driver = Cmd.front(); - if (Driver == "clang" || Driver == "clang++" || Driver == "gcc" || - Driver == "g++" || Driver == "cc" || Driver == "c++") { - llvm::SmallString<128> QualifiedDriver = - llvm::sys::path::parent_path(*ClangPath); - llvm::sys::path::append(QualifiedDriver, Driver); - Driver = std::string(QualifiedDriver.str()); - } + if (!Cmd.empty()) { + bool FollowSymlink = !Has("-no-canonical-prefixes"); + Cmd.front() = + (FollowSymlink ? ResolvedDrivers : ResolvedDriversNoFollow) + .get(Cmd.front(), [&, this] { + return resolveDriver(Cmd.front(), FollowSymlink, ClangPath); + }); } } -CommandMangler::operator clang::tooling::ArgumentsAdjuster() { - return [Mangler{*this}](const std::vector &Args, - llvm::StringRef File) { +CommandMangler::operator clang::tooling::ArgumentsAdjuster() && { + // ArgumentsAdjuster is a std::function and so must be copyable. + return [Mangler = std::make_shared(std::move(*this))]( + const std::vector &Args, llvm::StringRef File) { auto Result = Args; - Mangler.adjust(Result); + Mangler->adjust(Result); return Result; }; } diff --git a/clang-tools-extra/clangd/CompileCommands.h b/clang-tools-extra/clangd/CompileCommands.h index 02b1a76c5bff9c..51a5574d13d3a8 100644 --- a/clang-tools-extra/clangd/CompileCommands.h +++ b/clang-tools-extra/clangd/CompileCommands.h @@ -8,8 +8,10 @@ #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_COMPILECOMMANDS_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_COMPILECOMMANDS_H +#include "support/Threading.h" #include "clang/Tooling/ArgumentsAdjusters.h" #include "clang/Tooling/CompilationDatabase.h" +#include "llvm/ADT/StringMap.h" #include #include @@ -40,10 +42,12 @@ struct CommandMangler { static CommandMangler detect(); void adjust(std::vector &Cmd) const; - explicit operator clang::tooling::ArgumentsAdjuster(); + explicit operator clang::tooling::ArgumentsAdjuster() &&; private: CommandMangler() = default; + Memoize> ResolvedDrivers; + Memoize> ResolvedDriversNoFollow; }; } // namespace clangd diff --git a/clang-tools-extra/clangd/QueryDriverDatabase.cpp b/clang-tools-extra/clangd/QueryDriverDatabase.cpp index 2ab217dac15590..11d74203ae944b 100644 --- a/clang-tools-extra/clangd/QueryDriverDatabase.cpp +++ b/clang-tools-extra/clangd/QueryDriverDatabase.cpp @@ -88,7 +88,7 @@ std::vector parseDriverOutput(llvm::StringRef Output) { std::vector extractSystemIncludes(PathRef Driver, llvm::StringRef Lang, llvm::ArrayRef CommandLine, - llvm::Regex &QueryDriverRegex) { + const llvm::Regex &QueryDriverRegex) { trace::Span Tracer("Extract system includes"); SPAN_ATTACH(Tracer, "driver", Driver); SPAN_ATTACH(Tracer, "lang", Lang); @@ -267,19 +267,12 @@ class QueryDriverDatabase : public GlobalCompilationDatabase { llvm::SmallString<128> Driver(Cmd->CommandLine.front()); llvm::sys::fs::make_absolute(Cmd->Directory, Driver); - auto Key = std::make_pair(Driver.str().str(), Lang.str()); - std::vector SystemIncludes; - { - std::lock_guard Lock(Mu); - - auto It = DriverToIncludesCache.find(Key); - if (It != DriverToIncludesCache.end()) - SystemIncludes = It->second; - else - DriverToIncludesCache[Key] = SystemIncludes = extractSystemIncludes( - Key.first, Key.second, Cmd->CommandLine, QueryDriverRegex); - } + std::vector SystemIncludes = + QueriedDrivers.get(/*Key=*/(Driver + ":" + Lang).str(), [&] { + return extractSystemIncludes(Driver, Lang, Cmd->CommandLine, + QueryDriverRegex); + }); return addSystemIncludes(*Cmd, SystemIncludes); } @@ -289,12 +282,9 @@ class QueryDriverDatabase : public GlobalCompilationDatabase { } private: - mutable std::mutex Mu; - // Caches includes extracted from a driver. - mutable std::map, - std::vector> - DriverToIncludesCache; - mutable llvm::Regex QueryDriverRegex; + // Caches includes extracted from a driver. Key is driver:lang. + Memoize>> QueriedDrivers; + llvm::Regex QueryDriverRegex; std::unique_ptr Base; CommandChanged::Subscription BaseChanged; diff --git a/clang-tools-extra/clangd/support/Threading.h b/clang-tools-extra/clangd/support/Threading.h index 310dd7bc5a2466..5155ac193fd18a 100644 --- a/clang-tools-extra/clangd/support/Threading.h +++ b/clang-tools-extra/clangd/support/Threading.h @@ -131,6 +131,44 @@ std::future runAsync(llvm::unique_function Action) { std::move(Action), Context::current().clone()); } +/// Memoize is a cache to store and reuse computation results based on a key. +/// +/// Memoize> PrimeCache; +/// for (int I : RepetitiveNumbers) +/// if (PrimeCache.get(I, [&] { return expensiveIsPrime(I); })) +/// llvm::errs() << "Prime: " << I << "\n"; +/// +/// The computation will only be run once for each key. +/// This class is threadsafe. Concurrent calls for the same key may run the +/// computation multiple times, but each call will return the same result. +template class Memoize { + mutable Container Cache; + std::unique_ptr Mu; + +public: + Memoize() : Mu(std::make_unique()) {} + + template + typename Container::mapped_type get(T &&Key, Func Compute) const { + { + std::lock_guard Lock(*Mu); + auto It = Cache.find(Key); + if (It != Cache.end()) + return It->second; + } + // Don't hold the mutex while computing. + auto V = Compute(); + { + std::lock_guard Lock(*Mu); + auto R = Cache.try_emplace(std::forward(Key), V); + // Insert into cache may fail if we raced with another thread. + if (!R.second) + return R.first->second; // Canonical value, from other thread. + } + return V; + } +}; + } // namespace clangd } // namespace clang #endif diff --git a/clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp b/clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp index cad9872916234b..ffef28b92d3dee 100644 --- a/clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp +++ b/clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp @@ -9,7 +9,11 @@ #include "CompileCommands.h" #include "TestFS.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -103,12 +107,81 @@ TEST(CommandMangler, ClangPath) { Cmd = {"unknown-binary", "foo.cc"}; Mangler.adjust(Cmd); - EXPECT_EQ("unknown-binary", Cmd.front()); + EXPECT_EQ(testPath("fake/unknown-binary"), Cmd.front()); Cmd = {testPath("path/clang++"), "foo.cc"}; Mangler.adjust(Cmd); EXPECT_EQ(testPath("path/clang++"), Cmd.front()); + + Cmd = {"foo/unknown-binary", "foo.cc"}; + Mangler.adjust(Cmd); + EXPECT_EQ(testPath("fake/unknown-binary"), Cmd.front()); +} + +// Only run the PATH/symlink resolving test on unix, we need to fiddle +// with permissions and environment variables... +#ifdef LLVM_ON_UNIX +MATCHER(Ok, "") { + if (arg) { + *result_listener << arg.message(); + return false; + } + return true; +} + +TEST(CommandMangler, ClangPathResolve) { + // Set up filesystem: + // /temp/ + // bin/ + // foo -> temp/lib/bar + // lib/ + // bar + llvm::SmallString<256> TempDir; + ASSERT_THAT(llvm::sys::fs::createUniqueDirectory("ClangPathResolve", TempDir), + Ok()); + auto CleanDir = llvm::make_scope_exit( + [&] { llvm::sys::fs::remove_directories(TempDir); }); + ASSERT_THAT(llvm::sys::fs::create_directory(TempDir + "/bin"), Ok()); + ASSERT_THAT(llvm::sys::fs::create_directory(TempDir + "/lib"), Ok()); + int FD; + ASSERT_THAT(llvm::sys::fs::openFileForWrite(TempDir + "/lib/bar", FD), Ok()); + ASSERT_THAT(llvm::sys::Process::SafelyCloseFileDescriptor(FD), Ok()); + ::chmod((TempDir + "/lib/bar").str().c_str(), 0755); // executable + ASSERT_THAT( + llvm::sys::fs::create_link(TempDir + "/lib/bar", TempDir + "/bin/foo"), + Ok()); + + // Test the case where the driver is an absolute path to a symlink. + auto Mangler = CommandMangler::forTests(); + Mangler.ClangPath = testPath("fake/clang"); + std::vector Cmd = {(TempDir + "/bin/foo").str(), "foo.cc"}; + Mangler.adjust(Cmd); + // Directory based on resolved symlink, basename preserved. + EXPECT_EQ((TempDir + "/lib/foo").str(), Cmd.front()); + + // Set PATH to point to temp/bin so we can find 'foo' on it. + ASSERT_TRUE(::getenv("PATH")); + auto RestorePath = + llvm::make_scope_exit([OldPath = std::string(::getenv("PATH"))] { + ::setenv("PATH", OldPath.c_str(), 1); + }); + ::setenv("PATH", (TempDir + "/bin").str().c_str(), /*overwrite=*/1); + + // Test the case where the driver is a $PATH-relative path to a symlink. + Mangler = CommandMangler::forTests(); + Mangler.ClangPath = testPath("fake/clang"); + // Driver found on PATH. + Cmd = {"foo", "foo.cc"}; + Mangler.adjust(Cmd); + // Found the symlink and resolved the path as above. + EXPECT_EQ((TempDir + "/lib/foo").str(), Cmd.front()); + + // Symlink not resolved with -no-canonical-prefixes. + Cmd = {"foo", "-no-canonical-prefixes", "foo.cc"}; + Mangler.adjust(Cmd); + EXPECT_EQ((TempDir + "/bin/foo").str(), Cmd.front()); } +#endif } // namespace } // namespace clangd diff --git a/clang-tools-extra/clangd/unittests/support/ThreadingTests.cpp b/clang-tools-extra/clangd/unittests/support/ThreadingTests.cpp index 015af092012a15..e265ad2eabeaea 100644 --- a/clang-tools-extra/clangd/unittests/support/ThreadingTests.cpp +++ b/clang-tools-extra/clangd/unittests/support/ThreadingTests.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "support/Threading.h" +#include "llvm/ADT/DenseMap.h" +#include "gmock/gmock.h" #include "gtest/gtest.h" #include @@ -60,5 +62,64 @@ TEST_F(ThreadingTest, TaskRunner) { std::lock_guard Lock(Mutex); ASSERT_EQ(Counter, TasksCnt * IncrementsPerTask); } + +TEST_F(ThreadingTest, Memoize) { + const unsigned NumThreads = 5; + const unsigned NumKeys = 100; + const unsigned NumIterations = 100; + + Memoize> Cache; + std::atomic ComputeCount(0); + std::atomic ComputeResult[NumKeys]; + std::fill(std::begin(ComputeResult), std::end(ComputeResult), -1); + + AsyncTaskRunner Tasks; + for (unsigned I = 0; I < NumThreads; ++I) + Tasks.runAsync("worker" + std::to_string(I), [&] { + for (unsigned J = 0; J < NumIterations; J++) + for (unsigned K = 0; K < NumKeys; K++) { + int Result = Cache.get(K, [&] { return ++ComputeCount; }); + EXPECT_THAT(ComputeResult[K].exchange(Result), + testing::AnyOf(-1, Result)) + << "Got inconsistent results from memoize"; + } + }); + Tasks.wait(); + EXPECT_GE(ComputeCount, NumKeys) << "Computed each key once"; + EXPECT_LE(ComputeCount, NumThreads * NumKeys) + << "Worst case, computed each key in every thread"; + for (int Result : ComputeResult) + EXPECT_GT(Result, 0) << "All results in expected domain"; +} + +TEST_F(ThreadingTest, MemoizeDeterministic) { + Memoize> Cache; + + // Spawn two parallel computations, A and B. + // Force concurrency: neither can finish until both have started. + // Verify that cache returns consistent results. + AsyncTaskRunner Tasks; + std::atomic ValueA(0), ValueB(0); + Notification ReleaseA, ReleaseB; + Tasks.runAsync("A", [&] { + ValueA = Cache.get(0, [&] { + ReleaseB.notify(); + ReleaseA.wait(); + return 'A'; + }); + }); + Tasks.runAsync("A", [&] { + ValueB = Cache.get(0, [&] { + ReleaseA.notify(); + ReleaseB.wait(); + return 'B'; + }); + }); + Tasks.wait(); + + ASSERT_EQ(ValueA, ValueB); + ASSERT_THAT(ValueA.load(), testing::AnyOf('A', 'B')); +} + } // namespace clangd } // namespace clang