From 85e2bd860f7fdfc23be4932a44ce69c8bd4dea92 Mon Sep 17 00:00:00 2001 From: Chris Cummins Date: Thu, 3 Jun 2021 02:07:33 +0100 Subject: [PATCH 1/7] [docs] Use doxygen-format comments for LLVM service. --- compiler_gym/envs/llvm/service/ActionSpace.h | 29 ++--- compiler_gym/envs/llvm/service/Benchmark.h | 102 +++++++++++++++--- .../envs/llvm/service/BenchmarkFactory.cc | 2 +- .../envs/llvm/service/BenchmarkFactory.h | 75 +++++++++---- compiler_gym/envs/llvm/service/Cost.h | 64 ++++++++--- compiler_gym/envs/llvm/service/LlvmSession.h | 44 ++++++-- .../envs/llvm/service/ObservationSpaces.h | 81 ++++++++------ compiler_gym/service/runtime/BenchmarkCache.h | 2 +- compiler_gym/spaces/named_discrete.py | 1 + .../cc/compiler_gym/envs/llvm/service.rst | 50 ++++----- 10 files changed, 321 insertions(+), 129 deletions(-) diff --git a/compiler_gym/envs/llvm/service/ActionSpace.h b/compiler_gym/envs/llvm/service/ActionSpace.h index 606c9e416..9a6abe50a 100644 --- a/compiler_gym/envs/llvm/service/ActionSpace.h +++ b/compiler_gym/envs/llvm/service/ActionSpace.h @@ -12,21 +12,26 @@ namespace compiler_gym::llvm_service { // LLVM transforms. Generated by //compiler_gym/envs/llvm/service/passes:action-genfiles. #include "compiler_gym/envs/llvm/service/passes/ActionEnum.h" // @donotremove -// The available action spaces for LLVM. -// -// NOTE(cummins): Housekeeping rules - to add a new action space: -// 1. Add a new entry to this LlvmActionSpace enum. -// 2. Add a new switch case to getLlvmActionSpaceList() to return the -// ActionSpace. -// 3. Add a new switch case to LlvmSession::step() to compute -// the actual action. -// 4. Run `bazel test //compiler_gym/...` and update the newly failing tests. +/** + * The available action spaces for LLVM. + * + * \note Implementation housekeeping rules - to add a new action space: + * 1. Add a new entry to this LlvmActionSpace enum. + * 2. Add a new switch case to getLlvmActionSpaceList() to return the + * ActionSpace. + * 3. Add a new switch case to LlvmSession::step() to compute + * the actual action. + * 4. Run `bazel test //compiler_gym/...` and update the newly failing tests. + */ enum class LlvmActionSpace { - // The full set of transform passes for LLVM. - PASSES_ALL, + PASSES_ALL, ///< The full set of transform passes for LLVM. }; -// Get the list of LLVM action spaces. +/** + * Get the list of LLVM action spaces. + * + * @return A list of ActionSpace instances. + */ std::vector getLlvmActionSpaceList(); } // namespace compiler_gym::llvm_service diff --git a/compiler_gym/envs/llvm/service/Benchmark.h b/compiler_gym/envs/llvm/service/Benchmark.h index 5f92857d7..4074cd788 100644 --- a/compiler_gym/envs/llvm/service/Benchmark.h +++ b/compiler_gym/envs/llvm/service/Benchmark.h @@ -17,62 +17,138 @@ namespace compiler_gym::llvm_service { -// A 160 bits SHA1 that identifies an LLVM module. +/** + * A 160 bits SHA1 that identifies an LLVM module. + */ using BenchmarkHash = llvm::ModuleHash; +/** + * A bitcode. + */ using Bitcode = llvm::SmallString<0>; +/** + * Read a bitcode file from disk. + * + * @param path The path of the bitcode file to read. + * @param bitcode The destination bitcode. + * @return `OK` on success, `NOT_FOUND` if the file is not found, or + * `INVALID_ARGUMENT` if the file is invalid. + */ grpc::Status readBitcodeFile(const boost::filesystem::path& path, Bitcode* bitcode); -// Parses the given bitcode into a module and strips the identifying ModuleID -// and source_filename attributes. Returns nullptr on error and sets status. +/** + * Construct an LLVM module from a bitcode. + * + * Parses the given bitcode into a module and strips the identifying `ModuleID` + * and `source_filename` attributes. + * + * @param context An LLVM context for the new module. + * @param bitcode The bitcode to parse. + * @param name The name of the module. + * @param status An error status that is set to `OK` on success or + * `INVALID_ARGUMENT` if the bitcode cannot be parsed. + * @return A unique pointer to an LLVM module, or `nullptr` on error and sets + * `status`. + */ std::unique_ptr makeModule(llvm::LLVMContext& context, const Bitcode& bitcode, const std::string& name, grpc::Status* status); -// A benchmark is an LLVM module and the LLVM context that owns it. A benchmark -// is mutable and can be changed over the course of a session. +/** + * An LLVM module and the LLVM context that owns it. + * + * A benchmark is mutable and can be changed over the course of a session. + */ class Benchmark { public: + /** + * Construct a benchmark from a bitcode. + */ Benchmark(const std::string& name, const Bitcode& bitcode, const boost::filesystem::path& workingDirectory, const BaselineCosts& baselineCosts); + /** + * Construct a benchmark from an LLVM module. + */ Benchmark(const std::string& name, std::unique_ptr context, std::unique_ptr module, size_t bitcodeSize, const boost::filesystem::path& workingDirectory, const BaselineCosts& baselineCosts); - // Make a copy of the benchmark. + /** + * Make a copy of the benchmark. + * + * @param workingDirectory The working directory for the new benchmark. + * @return A copy of the benchmark. + */ std::unique_ptr clone(const boost::filesystem::path& workingDirectory) const; - // Compute and return a SHA1 hash of the module. + /** + * Compute and return a SHA1 hash of the module. + * + * @return A SHA1 hash of the module. + */ BenchmarkHash module_hash() const; - // Wrapper around llvm::verifyModule() which returns an error status on - // failure. + /** + * Wrapper around `llvm::verifyModule()` which returns an error status on + * failure. + * + * @return `OK` on success, else `DATA_LOSS` if verification fails. + */ grpc::Status verify_module(); + /** + * The name of the benchmark. + */ inline const std::string& name() const { return name_; } + /** + * The size of the bitcode that was parsed to produce the initial benchmark. + */ inline const size_t bitcodeSize() const { return bitcodeSize_; } + /** + * The underlying LLVM module. + */ inline llvm::Module& module() { return *module_; } + /** + * The underlying LLVM module. + */ inline const llvm::Module& module() const { return *module_; } + /** + * The underlying LLVM context. + */ inline llvm::LLVMContext& context() { return *context_; } + /** + * The underlying LLVM context. + */ inline const llvm::LLVMContext& context() const { return *context_; } inline const BaselineCosts& baselineCosts() const { return baselineCosts_; } // Accessors for the underlying raw pointers. + + /** + * A pointer to the underlying LLVM context. + */ inline const llvm::LLVMContext* context_ptr() const { return context_.get(); } + /** + * A pointer to the underlying LLVM module. + */ inline const llvm::Module* module_ptr() const { return module_.get(); } - // Replace the benchmark module with a new one. This is to enable - // out-of-process modification of the IR by serializing the benchmark to a - // file, modifying the file, then loading the modified file and updating the - // module pointer here. + /** Replace the benchmark module with a new one. + * + * This is to enable out-of-process modification of the IR by serializing the + * benchmark to a file, modifying the file, then loading the modified file and + * updating the module pointer here. + * + * @param module A new module. + */ inline void replaceModule(std::unique_ptr module) { module_ = std::move(module); } private: diff --git a/compiler_gym/envs/llvm/service/BenchmarkFactory.cc b/compiler_gym/envs/llvm/service/BenchmarkFactory.cc index 83decb8ee..d63af6d32 100644 --- a/compiler_gym/envs/llvm/service/BenchmarkFactory.cc +++ b/compiler_gym/envs/llvm/service/BenchmarkFactory.cc @@ -56,7 +56,7 @@ Status BenchmarkFactory::getBenchmark(const BenchmarkProto& benchmarkMessage, break; } case compiler_gym::File::DataCase::kUri: { - // Check that protocol of the benmchmark URI. + // Check the protocol of the benchmark URI. if (programFile.uri().find("file:///") != 0) { return Status(StatusCode::INVALID_ARGUMENT, fmt::format("Invalid benchmark data URI. " diff --git a/compiler_gym/envs/llvm/service/BenchmarkFactory.h b/compiler_gym/envs/llvm/service/BenchmarkFactory.h index 73ba7b26e..75c865bbc 100644 --- a/compiler_gym/envs/llvm/service/BenchmarkFactory.h +++ b/compiler_gym/envs/llvm/service/BenchmarkFactory.h @@ -21,21 +21,41 @@ namespace compiler_gym::llvm_service { -// Benchmarks are loaded from disk and cached in-memory so that future uses -// do not require a disk access. The number of benchmarks that may be -// simultaneously loaded is limited by the combined size of the bitcodes, in -// bytes. Once this size is reached, benchmarks are offloaded so that they must -// be re-read from disk. +/** + * Maximum number of bytes before benchmark cache eviction. + * + * Benchmarks are loaded from disk and cached in-memory so that future uses do + * not require a disk access. The number of benchmarks that may be + * simultaneously loaded is limited by the combined size of the bitcodes, in + * bytes. Once this size is reached, benchmarks are offloaded so that they must + * be re-read from disk. + */ constexpr size_t kMaxLoadedBenchmarkSize = 512 * 1024 * 1024; -// A factory object for instantiating LLVM modules for use in optimization -// sessions. Example usage: -// -// BenchmarkFactory factory; -// auto benchmark = factory.getBenchmark("file:////tmp/my_bitcode.bc"); -// // ... do fun stuff +/** + * A factory object for instantiating LLVM modules for use in optimization + * sessions. + * + * Example usage: + * + * \code{.cpp} + * BenchmarkFactory factory; + * auto benchmark = factory.getBenchmark("file:////tmp/my_bitcode.bc"); + * // ... do fun stuff + * \endcode + */ class BenchmarkFactory { public: + /** + * Return the global benchmark factory singleton. + * + * @param workingDirectory The working directory. + * @param rand An optional random number generator. This is used for cache + * evictions. + * @param maxLoadedBenchmarkSize The maximum size in bytes of the benchmark + * cache before evictions. + * @return The benchmark factory singleton instance. + */ static BenchmarkFactory& getSingleton(const boost::filesystem::path& workingDirectory, std::optional rand = std::nullopt, size_t maxLoadedBenchmarkSize = kMaxLoadedBenchmarkSize) { @@ -43,7 +63,14 @@ class BenchmarkFactory { return instance; } - // Get the requested named benchmark. + /** + * Get the requested named benchmark. + * + * @param benchmarkMessage A Benchmark protocol message. + * @param benchmark A benchmark instance to assign this benchmark to. + * @return `OK` on success, or `INVALID_ARGUMENT` if the protocol message is + * invalid. + */ [[nodiscard]] grpc::Status getBenchmark(const compiler_gym::Benchmark& benchmarkMessage, std::unique_ptr* benchmark); @@ -53,11 +80,17 @@ class BenchmarkFactory { [[nodiscard]] grpc::Status addBitcode(const std::string& uri, const boost::filesystem::path& path); - // Construct a benchmark factory. rand is a random seed used to control the - // selection of random benchmarks. maxLoadedBenchmarkSize is the maximum - // combined size of the bitcodes that may be cached in memory. Once this - // size is reached, benchmarks are offloaded so that they must be re-read from - // disk. + /** + * Construct a benchmark factory. + * + * @param workingDirectory A filesystem directory to use for storing temporary + * files. + * @param rand is a random seed used to control the selection of random + * benchmarks. + * @param maxLoadedBenchmarkSize is the maximum combined size of the bitcodes + * that may be cached in memory. Once this size is reached, benchmarks are + * offloaded so that they must be re-read from disk. + */ BenchmarkFactory(const boost::filesystem::path& workingDirectory, std::optional rand = std::nullopt, size_t maxLoadedBenchmarkSize = kMaxLoadedBenchmarkSize); @@ -65,12 +98,16 @@ class BenchmarkFactory { BenchmarkFactory(const BenchmarkFactory&) = delete; BenchmarkFactory& operator=(const BenchmarkFactory&) = delete; - // A mapping from URI to benchmarks which have been loaded into memory. + /** + * A mapping from URI to benchmarks which have been loaded into memory. + */ std::unordered_map benchmarks_; const boost::filesystem::path workingDirectory_; std::mt19937_64 rand_; - // The current and maximum allowed sizes of the loaded benchmarks. + /** + * The current and maximum allowed sizes of the loaded benchmarks. + */ size_t loadedBenchmarksSize_; const size_t maxLoadedBenchmarkSize_; }; diff --git a/compiler_gym/envs/llvm/service/Cost.h b/compiler_gym/envs/llvm/service/Cost.h index 4b520cc97..c5a467100 100644 --- a/compiler_gym/envs/llvm/service/Cost.h +++ b/compiler_gym/envs/llvm/service/Cost.h @@ -14,22 +14,35 @@ namespace compiler_gym::llvm_service { +/** + * A cost function for LLVM benchmarks. + */ enum class LlvmCostFunction { - // The number of instructions in the LLVM-IR module. This is fast to compute - // and deterministic. + /** + * The number of instructions in the LLVM-IR module. + * + * IR instruction count is fast to compute and deterministic. + */ IR_INSTRUCTION_COUNT, - // Returns the size (in bytes) of the .TEXT section of the compiled module. + /** + * Returns the size (in bytes) of the .TEXT section of the compiled module. + */ OBJECT_TEXT_SIZE_BYTES, #ifdef COMPILER_GYM_EXPERIMENTAL_TEXT_SIZE_COST - // Returns the size (in bytes) of the .TEXT section of the compiled binary. + /** + * Returns the size (in bytes) of the .TEXT section of the compiled binary. + */ TEXT_SIZE_BYTES, #endif }; +/** + * LLVM's builtin policies. + */ enum class LlvmBaselinePolicy { - O0, // No optimizations. - O3, // -O3 optimizations. - Oz, // -Oz optimizations. + O0, ///< No optimizations. + O3, ///< `-O3` optimizations. + Oz, ///< `-Oz` optimizations. }; constexpr size_t numCosts = magic_enum::enum_count(); @@ -38,20 +51,41 @@ constexpr size_t numBaselineCosts = magic_enum::enum_count() using BaselineCosts = std::array; using PreviousCosts = std::array, numCosts>; -// TODO(cummins): Refactor cost calculation to allow graceful error handling -// by returning a grpc::Status. - -// Compute the cost using a given cost function. A lower cost is better. +/** + * Compute the cost using a given cost function. A lower cost is better. + * + * @param costFunction The cost function to use. + * @param module The module to compute the cost for. + * @param workingDirectory A directory that can be used for temporary file + * storage. + * @param cost The cost to write. + * @return `OK` on success. + */ [[nodiscard]] grpc::Status setCost(const LlvmCostFunction& costFunction, llvm::Module& module, const boost::filesystem::path& workingDirectory, double* cost); -// Return a baseline cost. +/** + * Return a baseline cost. + * + * @param baselineCosts The baseline costs list. + * @param policy The baseline policy to return the cost of. + * @param cost The cost function to use. + * @return A cost. + */ double getBaselineCost(const BaselineCosts& baselineCosts, LlvmBaselinePolicy policy, LlvmCostFunction cost); -// Compute the costs of baseline policies. The unoptimizedModule parameter is -// unmodified, but is not const because various LLVM API calls require a mutable -// reference. +/** + * Compute the costs of baseline policies. + * + * \note The `unoptimizedModule` parameter is unmodified, but is not const + * because various LLVM API calls require a mutable reference. + * + * @param unoptimizedModule The module to compute the baseline costs of. + * @param baselineCosts The costs to write. + * @param workingDirectory A directory that can be used for temporary file + * storage. + */ [[nodiscard]] grpc::Status setBaselineCosts(llvm::Module& unoptimizedModule, BaselineCosts* baselineCosts, const boost::filesystem::path& workingDirectory); diff --git a/compiler_gym/envs/llvm/service/LlvmSession.h b/compiler_gym/envs/llvm/service/LlvmSession.h index a15db54c3..69e050792 100644 --- a/compiler_gym/envs/llvm/service/LlvmSession.h +++ b/compiler_gym/envs/llvm/service/LlvmSession.h @@ -28,11 +28,13 @@ namespace compiler_gym::llvm_service { -// This class exposes the LLVM optimization pipeline for an LLVM module as an -// interactive environment. -// -// It can be used directly as a C++ API, or it can be accessed through an RPC -// interface using the compiler_gym::service::LlvmService class. +/** + * An interactive LLVM compilation session. + * + * This class exposes the LLVM optimization pipeline for an LLVM module as an + * interactive environment. It can be used directly as a C++ API, or it can be + * accessed through an RPC interface using the CompilerGym RPC runtime. + */ class LlvmSession final : public CompilationSession { public: LlvmSession(const boost::filesystem::path& workingDirectory); @@ -76,21 +78,41 @@ class LlvmSession final : public CompilationSession { return *benchmark_; } - // Run the requested action. + /** + * Run the requested action. + * + * @param action An action to apply. + * @param actionHadNoEffect Set to true if LLVM reported that any passes that + * were run made no modifications to the module. + * @return `OK` on success. + */ [[nodiscard]] grpc::Status applyPassAction(LlvmAction action, bool& actionHadNoEffect); - // Run the given pass, possibly modifying the underlying LLVM module. Return - // whether the module was modified. + /** + * Run the given pass, possibly modifying the underlying LLVM module. + * + * @return Whether the module was modified. + */ bool runPass(llvm::Pass* pass); + + /** + * Run the given pass, possibly modifying the underlying LLVM module. + * + * @return Whether the module was modified. + */ bool runPass(llvm::FunctionPass* pass); - // Run the commandline `opt` tool on the current LLVM module with the given - // arguments, replacing the environment state with the generated output. + /** + * Run the commandline `opt` tool on the current LLVM module with the given + * arguments, replacing the environment state with the generated output. + */ [[nodiscard]] grpc::Status runOptWithArgs(const std::vector& optArgs); inline const llvm::TargetLibraryInfoImpl& tlii() const { return tlii_; } - // Setup pass manager with depdendent passes and the specified pass. + /** + * Setup pass manager with depdendent passes and the specified pass. + */ template inline void setupPassManager(PassManager* passManager, Pass* pass) { passManager->add(new llvm::ProfileSummaryInfoWrapperPass()); diff --git a/compiler_gym/envs/llvm/service/ObservationSpaces.h b/compiler_gym/envs/llvm/service/ObservationSpaces.h index 358ebaa73..e15371e3e 100644 --- a/compiler_gym/envs/llvm/service/ObservationSpaces.h +++ b/compiler_gym/envs/llvm/service/ObservationSpaces.h @@ -10,60 +10,81 @@ namespace compiler_gym::llvm_service { -// The available observation spaces for LLVM. -// -// NOTE(cummins): Housekeeping rules - to add a new observation space: -// 1. Add a new entry to this LlvmObservationSpace enum. -// 2. Add a new switch case to getLlvmObservationSpaceList() to return the -// ObserverationSpace. -// 3. Add a new switch case to LlvmSession::getObservation() to compute -// the actual observation. -// 4. Run `bazel test //compiler_gym/...` and update the newly failing tests. +/** + * The available observation spaces for LLVM. + * + * \note Housekeeping rules - to add a new observation space: + * 1. Add a new entry to this LlvmObservationSpace enum. + * 2. Add a new switch case to getLlvmObservationSpaceList() to return the + * ObserverationSpace. + * 3. Add a new switch case to LlvmSession::getObservation() to compute + * the actual observation. + * 4. Run `bazel test //compiler_gym/...` and update the newly failing tests. + */ enum class LlvmObservationSpace { - // The entire LLVM module as an IR string. This allows the user to do its own - // feature extraction. + /** + * The entire LLVM module as an IR string. + * + * This allows the user to do their own feature extraction. + */ IR, - // The 40-digit hex SHA1 checksum of the LLVM module. + /** The 40-digit hex SHA1 checksum of the LLVM module. */ IR_SHA1, - // Write the bitcode to a file. Returns a string, which is the path of the - // written file. + /** Write the bitcode to a file and return its path as a string. */ BITCODE_FILE, - // The counts of all instructions in a program. + /** The counts of all instructions in a program. */ INST_COUNT, - // The Autophase feature vector from: - // - // Huang, Q., Haj-Ali, A., Moses, W., Xiang, J., Stoica, I., Asanovic, K., & - // Wawrzynek, J. (2019). Autophase: Compiler phase-ordering for HLS with - // deep reinforcement learning. FCCM. + /** + * The Autophase feature vector. + * + * From: + * + * Huang, Q., Haj-Ali, A., Moses, W., Xiang, J., Stoica, I., Asanovic, K., + * & Wawrzynek, J. (2019). Autophase: Compiler phase-ordering for HLS with + * deep reinforcement learning. FCCM. + */ AUTOPHASE, - // Returns the graph representation of a program from: - // - // Cummins, C., Fisches, Z. V., Ben-Nun, T., Hoefler, T., & Leather, H. - // (2020). ProGraML: Graph-based Deep Learning for Program Optimization - // and Analysis. ArXiv:2003.10536. https://arxiv.org/abs/2003.10536 + /** + * Returns the graph representation of a program. + * + * From: + * + * Cummins, C., Fisches, Z. V., Ben-Nun, T., Hoefler, T., & Leather, H. + * (2020). ProGraML: Graph-based Deep Learning for Program Optimization + * and Analysis. ArXiv:2003.10536. https://arxiv.org/abs/2003.10536 + */ PROGRAML, - // A JSON dictionary of properties describing the CPU. + /** A JSON dictionary of properties describing the CPU. */ CPU_INFO, - // The number of LLVM-IR instructions in the current module. + /** The number of LLVM-IR instructions in the current module. */ IR_INSTRUCTION_COUNT, + /** The number of LLVM-IR instructions normalized to `-O0`. */ IR_INSTRUCTION_COUNT_O0, + /** The number of LLVM-IR instructions normalized to `-O3`. */ IR_INSTRUCTION_COUNT_O3, + /** The number of LLVM-IR instructions normalized to `-Oz`. */ IR_INSTRUCTION_COUNT_OZ, - // The size of the .text section of the lowered module. Platform dependent. + /** The platform-dependent size of the .text section of the lowered module. */ OBJECT_TEXT_SIZE_BYTES, + /** The platform-dependent size of the .text section of the lowered module. */ OBJECT_TEXT_SIZE_O0, + /** The platform-dependent size of the .text section of the lowered module. */ OBJECT_TEXT_SIZE_O3, + /** The platform-dependent size of the .text section of the lowered module. */ OBJECT_TEXT_SIZE_OZ, #ifdef COMPILER_GYM_EXPERIMENTAL_TEXT_SIZE_COST - // The size of the .text section of the compiled binary. Platform dependent. + /** The platform-dependent size of the .text section of the compiled binary. */ TEXT_SIZE_BYTES, + /** The platform-dependent size of the .text section of the compiled binary. */ TEXT_SIZE_O0, + /** The platform-dependent size of the .text section of the compiled binary. */ TEXT_SIZE_O3, + /** The platform-dependent size of the .text section of the compiled binary. */ TEXT_SIZE_OZ, #endif }; -// Return the list of available observation spaces. +/** Return the list of available observation spaces. */ std::vector getLlvmObservationSpaceList(); } // namespace compiler_gym::llvm_service diff --git a/compiler_gym/service/runtime/BenchmarkCache.h b/compiler_gym/service/runtime/BenchmarkCache.h index d54645ae7..7f48d87f3 100644 --- a/compiler_gym/service/runtime/BenchmarkCache.h +++ b/compiler_gym/service/runtime/BenchmarkCache.h @@ -42,7 +42,7 @@ class BenchmarkCache { * Lookup a benchmark. The pointer set by this method is valid only until the * next call to add(). * - * @param uri The URI of the benchmark + * @param uri The URI of the benchmark. * @return A Benchmark pointer. */ const Benchmark* get(const std::string& uri) const; diff --git a/compiler_gym/spaces/named_discrete.py b/compiler_gym/spaces/named_discrete.py index 604d4d36b..043bd192a 100644 --- a/compiler_gym/spaces/named_discrete.py +++ b/compiler_gym/spaces/named_discrete.py @@ -13,6 +13,7 @@ class NamedDiscrete(Discrete): :ivar name: The name of the space. :code:`None` if the space has no name. :vartype name: Optional[str] + :ivar names: A list of names for each element in the space. :vartype names: List[str] diff --git a/docs/source/cc/compiler_gym/envs/llvm/service.rst b/docs/source/cc/compiler_gym/envs/llvm/service.rst index eb9d886b2..95b888291 100644 --- a/docs/source/cc/compiler_gym/envs/llvm/service.rst +++ b/docs/source/cc/compiler_gym/envs/llvm/service.rst @@ -2,38 +2,41 @@ compiler_gym/envs/llvm/service ============================== This directory contains the core C++ implementation of the LLVM environment for -CompilerGym. +CompilerGym. The base session is implemented by a +:code:`compiler_gym::llvm_service::LlvmSession` class, defined in +:ref:`LlvmSession.h `. .. contents:: :local: -LlvmService.h +ActionSpace.h ------------- -:code:`#include "compiler_gym/envs/llvm/service/LlvmService.h"` +:code:`#include "compiler_gym/envs/llvm/service/ActionSpace.h"` -.. doxygenfile:: compiler_gym/envs/llvm/service/LlvmService.h +.. doxygenfile:: compiler_gym/envs/llvm/service/ActionSpace.h -Cost.h ------- +Benchmark.h +----------- -:code:`#include "compiler_gym/envs/llvm/service/Cost.h"` +:code:`#include "compiler_gym/envs/llvm/service/Benchmark.h"` -.. doxygenfile:: compiler_gym/envs/llvm/service/Cost.h +.. doxygenfile:: compiler_gym/envs/llvm/service/Benchmark.h -ActionSpace.h -------------- +BenchmarkFactory.h +------------------ -:code:`#include "compiler_gym/envs/llvm/service/ActionSpace.h"` +:code:`#include "compiler_gym/envs/llvm/service/BenchmarkFactory.h"` -.. doxygenfile:: compiler_gym/envs/llvm/service/ActionSpace.h +.. doxygenfile:: compiler_gym/envs/llvm/service/BenchmarkFactory.h -ObservationSpaces.h -------------------- +Cost.h +------ -:code:`#include "compiler_gym/envs/llvm/service/ObservationSpaces.h"` +:code:`#include "compiler_gym/envs/llvm/service/Cost.h"` + +.. doxygenfile:: compiler_gym/envs/llvm/service/Cost.h -.. doxygenfile:: compiler_gym/envs/llvm/service/ObservationSpaces.h LlvmSession.h ------------- @@ -42,16 +45,9 @@ LlvmSession.h .. doxygenfile:: compiler_gym/envs/llvm/service/LlvmSession.h -Benchmark.h ------------ - -:code:`#include "compiler_gym/envs/llvm/service/Benchmark.h"` - -.. doxygenfile:: compiler_gym/envs/llvm/service/Benchmark.h - -BenchmarkFactory.h ------------------- +ObservationSpaces.h +------------------- -:code:`#include "compiler_gym/envs/llvm/service/BenchmarkFactory.h"` +:code:`#include "compiler_gym/envs/llvm/service/ObservationSpaces.h"` -.. doxygenfile:: compiler_gym/envs/llvm/service/BenchmarkFactory.h +.. doxygenfile:: compiler_gym/envs/llvm/service/ObservationSpaces.h From 6ce79fd1d21a7188db3c7961db637bdc8c84ab5a Mon Sep 17 00:00:00 2001 From: Chris Cummins Date: Thu, 3 Jun 2021 02:07:57 +0100 Subject: [PATCH 2/7] [docs] Merge "Developer Manual" into "User Guide" --- docs/source/index.rst | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 48bfd14e0..6b928c78e 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -11,7 +11,9 @@ for applying reinforcement learning to compiler optimizations. llvm/index cli about + rpc changelog + contributing faq .. @@ -22,13 +24,6 @@ for applying reinforcement learning to compiler optimizations. tutorial/reinforcement_learning tutorial/example_service -.. toctree:: - :maxdepth: 3 - :caption: Developer Manual - - contributing - rpc.rst - .. toctree:: :maxdepth: 3 :caption: Python API Reference @@ -47,10 +42,10 @@ for applying reinforcement learning to compiler optimizations. :maxdepth: 3 :caption: C++ API Reference - cc/compiler_gym/envs/llvm/service.rst - cc/compiler_gym/service.rst - cc/compiler_gym/service/runtime.rst - cc/compiler_gym/util.rst + cc/compiler_gym/envs/llvm/service + cc/compiler_gym/service + cc/compiler_gym/service/runtime + cc/compiler_gym/util Indices and tables From bada607eef30196869e798b9a803cfdb6dd8bbc3 Mon Sep 17 00:00:00 2001 From: Chris Cummins Date: Thu, 3 Jun 2021 11:07:09 +0100 Subject: [PATCH 3/7] [service] Remove unused RunService implementation. --- compiler_gym/util/BUILD | 23 ---------- compiler_gym/util/RunService.cc | 12 ----- compiler_gym/util/RunService.h | 50 -------------------- compiler_gym/util/RunServiceImpl.h | 73 ------------------------------ 4 files changed, 158 deletions(-) delete mode 100644 compiler_gym/util/RunService.cc delete mode 100644 compiler_gym/util/RunService.h delete mode 100644 compiler_gym/util/RunServiceImpl.h diff --git a/compiler_gym/util/BUILD b/compiler_gym/util/BUILD index 9d1dc1f45..e7f5862d1 100644 --- a/compiler_gym/util/BUILD +++ b/compiler_gym/util/BUILD @@ -66,29 +66,6 @@ cc_library( ], ) -cc_library( - name = "RunService", - srcs = ["RunService.cc"], - hdrs = ["RunService.h"], - visibility = ["//visibility:public"], - deps = [ - ":RunServiceImpl", - "@boost//:filesystem", - "@gflags", - "@glog", - ], -) - -cc_library( - name = "RunServiceImpl", - hdrs = ["RunServiceImpl.h"], - deps = [ - "@boost//:filesystem", - "@com_github_grpc_grpc//:grpc++", - "@glog", - ], -) - cc_library( name = "StrLenConstexpr", hdrs = ["StrLenConstexpr.h"], diff --git a/compiler_gym/util/RunService.cc b/compiler_gym/util/RunService.cc deleted file mode 100644 index e484c7109..000000000 --- a/compiler_gym/util/RunService.cc +++ /dev/null @@ -1,12 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -// -// This source code is licensed under the MIT license found in the -// LICENSE file in the root directory of this source tree. -#include "compiler_gym/util/RunService.h" - -DEFINE_string( - working_dir, "", - "The working directory to use. Must be an existing directory with write permissions."); -DEFINE_string(port, "0", - "The port to listen on. If 0, an unused port will be selected. The selected port is " - "written to /port.txt."); diff --git a/compiler_gym/util/RunService.h b/compiler_gym/util/RunService.h deleted file mode 100644 index 60893653c..000000000 --- a/compiler_gym/util/RunService.h +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -// -// This source code is licensed under the MIT license found in the -// LICENSE file in the root directory of this source tree. -#pragma once - -#include -#include -#include - -#include - -#include "compiler_gym/util/RunServiceImpl.h" - -DECLARE_string(port); -DECLARE_string(working_dir); - -namespace compiler_gym::util { - -// Create a service, configured using --port and --working_dir flags, and -// run it. This function never returns. -// -// Service must be a subclass of CompilerGymService::Service that implements all -// RPC endpoints and takes a single-argument working directory constructor: -// -// class MyService final : public CompilerGymService::Service { -// public: -// explicit MyService(const boost::filesystem::path& workingDirectory); -// } -// -// Usage: -// -// int main(int argc, char** argv) { -// return runService(&argc, &argv, "usage string"); -// } -template -int runService(int* argc, char*** argv, const char* usage) { - gflags::SetUsageMessage(std::string(usage)); - gflags::ParseCommandLineFlags(argc, argv, /*remove_flags=*/false); - - CHECK(!FLAGS_working_dir.empty()) << "--working_dir flag not set"; - CHECK(!FLAGS_port.empty()) << "--port flag not set"; - - FLAGS_log_dir = std::string(FLAGS_working_dir) + "/logs"; - google::InitGoogleLogging((*argv)[0]); - - return createAndRunService(FLAGS_working_dir, FLAGS_port); -} - -} // namespace compiler_gym::util diff --git a/compiler_gym/util/RunServiceImpl.h b/compiler_gym/util/RunServiceImpl.h deleted file mode 100644 index a5b6759e0..000000000 --- a/compiler_gym/util/RunServiceImpl.h +++ /dev/null @@ -1,73 +0,0 @@ -// Private implementation header for //compiler_gym/util:RunService. -// -// Copyright (c) Facebook, Inc. and its affiliates. -// -// This source code is licensed under the MIT license found in the -// LICENSE file in the root directory of this source tree. -#pragma once - -#include -#include -#include -#include - -#include -#include -#include - -#include "boost/filesystem.hpp" - -namespace compiler_gym::util { - -// Create a service and run it. This function never returns. -template -int createAndRunService(const boost::filesystem::path& workingDirectory, - const std::string& requestedPort) { - CHECK(boost::filesystem::is_directory(workingDirectory)) - << "Directory not found: " << workingDirectory.string(); - Service service{workingDirectory}; - - grpc::ServerBuilder builder; - builder.RegisterService(&service); - - // Increase maximum message size beyond the 4MB default as inbound message - // may be larger (e.g., in the case of IR strings). - builder.SetMaxMessageSize(512 * 1024 * 1024); - - // Start a channel on the port. - int port; - std::string serverAddress = "0.0.0.0:" + requestedPort; - builder.AddListeningPort(serverAddress, grpc::InsecureServerCredentials(), &port); - - // Start the server. - std::unique_ptr server(builder.BuildAndStart()); - CHECK(server) << "Failed to build RPC service"; - - { - // Write the port to a /port.txt file, which an external - // process can read to determine how to get in touch. First write the port - // to a temporary file and rename it, since renaming is atomic. - const boost::filesystem::path portPath = workingDirectory / "port.txt"; - std::ofstream out(portPath.string() + ".tmp"); - out << std::to_string(port) << std::endl; - out.close(); - boost::filesystem::rename(portPath.string() + ".tmp", portPath); - } - - { - // Write the process ID to a /pid.txt file, which can - // external process can later use to determine if this service is still - // alive. - const boost::filesystem::path pidPath = workingDirectory / "pid.txt"; - std::ofstream out(pidPath.string()); - out << std::to_string(getpid()) << std::endl; - out.close(); - } - - LOG(INFO) << "Service " << workingDirectory << " listening on " << port << ", PID = " << getpid(); - - server->Wait(); - return 0; -} - -} // namespace compiler_gym::util From 3f0e2172648e022bad8e66bfb16c8bb958ad6a4c Mon Sep 17 00:00:00 2001 From: Chris Cummins Date: Thu, 3 Jun 2021 11:07:51 +0100 Subject: [PATCH 4/7] [docs] Populate C++ service documentation. --- .../envs/llvm/service/BenchmarkFactory.h | 6 +- compiler_gym/service/CompilationSession.h | 129 +++++++++++++----- compiler_gym/service/compilation_session.py | 6 +- compiler_gym/service/runtime/BenchmarkCache.h | 4 +- .../service/runtime/CompilerGymService.h | 11 +- compiler_gym/service/runtime/Runtime.h | 22 +++ .../create_and_run_compiler_gym_service.py | 26 +++- compiler_gym/util/EnumUtil.h | 4 +- compiler_gym/util/GrpcStatusMacros.h | 27 ++-- compiler_gym/util/RunfilesPath.h | 20 +-- compiler_gym/util/StrLenConstexpr.h | 11 +- compiler_gym/util/Unreachable.h | 11 +- docs/source/cc/compiler_gym/service.rst | 2 + .../cc/compiler_gym/service/runtime.rst | 23 ++-- docs/source/cc/compiler_gym/util.rst | 21 +-- 15 files changed, 232 insertions(+), 91 deletions(-) diff --git a/compiler_gym/envs/llvm/service/BenchmarkFactory.h b/compiler_gym/envs/llvm/service/BenchmarkFactory.h index 75c865bbc..f274641af 100644 --- a/compiler_gym/envs/llvm/service/BenchmarkFactory.h +++ b/compiler_gym/envs/llvm/service/BenchmarkFactory.h @@ -39,9 +39,9 @@ constexpr size_t kMaxLoadedBenchmarkSize = 512 * 1024 * 1024; * Example usage: * * \code{.cpp} - * BenchmarkFactory factory; - * auto benchmark = factory.getBenchmark("file:////tmp/my_bitcode.bc"); - * // ... do fun stuff + * BenchmarkFactory factory; + * auto benchmark = factory.getBenchmark("file:////tmp/my_bitcode.bc"); + * // ... do fun stuff * \endcode */ class BenchmarkFactory { diff --git a/compiler_gym/service/CompilationSession.h b/compiler_gym/service/CompilationSession.h index a97b2eb7d..6bcd77ddc 100644 --- a/compiler_gym/service/CompilationSession.h +++ b/compiler_gym/service/CompilationSession.h @@ -14,57 +14,104 @@ namespace compiler_gym { -// Base class for encapsulating an incremental compilation session. -// -// To add support for a new compiler, subclass from this base and provide -// implementations of the abstract methods, then call -// createAndRunCompilerGymService() and parametrize it with your class type: -// -// #include "compiler_gym/service/CompilationSession.h" -// #include "compiler_gym/service/runtime/Runtime.h" -// -// using namespace compiler_gym; -// -// class MyCompilationSession final : public CompilationSession { ... } -// -// int main(int argc, char** argv) { -// runtime::createAndRunCompilerGymService(); -// } -// +/** + * Base class for encapsulating an incremental compilation session. + * + * To add support for a new compiler, subclass from this base and provide + * implementations of the abstract methods, then call + * createAndRunCompilerGymService() and parametrize it with your class type: + * + * \code{.cpp} + * #include "compiler_gym/service/CompilationSession.h" + * #include "compiler_gym/service/runtime/Runtime.h" + * + * using namespace compiler_gym; + * + * class MyCompilationSession final : public CompilationSession { ... } + * + * int main(int argc, char** argv) { + * runtime::createAndRunCompilerGymService(); + * } + * \endcode + */ class CompilationSession { public: - // Get the compiler version. + /** + * Get the compiler version. + * + * @return A string indicating the compiler version. + */ virtual std::string getCompilerVersion() const; - // A list of action spaces describing the capabilities of the compiler. + /** + * A list of action spaces describing the capabilities of the compiler. + * + * @return A list of ActionSpace instances. + */ virtual std::vector getActionSpaces() const = 0; - // A list of feature vectors that this compiler provides. + /** + * A list of feature vectors that this compiler provides. + * + * @return A list of ObservationSpace instances. + */ virtual std::vector getObservationSpaces() const = 0; - // Start a CompilationSession. This will be called after construction and - // before applyAction() or computeObservation(). This will only be called - // once. + /** + * Start a CompilationSession. + * + * This will be called after construction and before applyAction() or + * computeObservation(). This will only be called once. + * + * @param actionSpace The action space to use. + * @param benchmark The benchmark to use. + * @return `OK` on success, else an error code and message. + */ [[nodiscard]] virtual grpc::Status init(const ActionSpace& actionSpace, const Benchmark& benchmark) = 0; - // Initialize the state from another CompilerSession. This will be called - // after construction and before applyAction() or computeObservation(). This - // will only be called once. + /** + * Initialize a CompilationSession from another CompilerSession. + * + * Think of this like a copy constructor, except that this method is allowed + * to fail. + * + * This will be called after construction and before applyAction() or + * computeObservation(). This will only be called once. + * + * @param other The CompilationSession to initialize from. + * @return `OK` on success, else an errro code and message. + */ [[nodiscard]] virtual grpc::Status init(CompilationSession* other); - // Apply an action. + /** + * Apply an action. + * + * @param action The action to apply. + * @param newActionSpace If applying the action mutated the action space, set + * this value to the new action space. + * @param actionHadNoEffect If the action had no effect, set this to true. + * @return `OK` on success, else an errro code and message. + */ [[nodiscard]] virtual grpc::Status applyAction(const Action& action, bool& endOfEpisode, std::optional& newActionSpace, bool& actionHadNoEffect) = 0; - // Compute an observation. + /** + * Compute an observation. + * + * @return `OK` on success, else an errro code and message. + */ [[nodiscard]] virtual grpc::Status computeObservation(const ObservationSpace& observationSpace, Observation& observation) = 0; - // Optional. This will be called after all applyAction() and - // computeObservation() in a step. Use this method if you would like to - // perform post-transform validation of compiler state. + /** + * Optional. This will be called after all applyAction() and + * computeObservation() in a step. Use this method if you would like to + * perform post-transform validation of compiler state. + * + * @return `OK` on success, else an errro code and message. + */ [[nodiscard]] virtual grpc::Status endOfStep(bool actionHadNoEffect, bool& endOfEpisode, std::optional& newActionSpace); @@ -73,9 +120,23 @@ class CompilationSession { virtual ~CompilationSession() = default; protected: - // Get the working directory, which is a local filesystem directory that this - // CompilationSession can use to store temporary files such as build - // artifacts. + /** + * Get the working directory. + * + * The working directory is a local filesystem directory that this + * CompilationSession can use to store temporary files such as build + * artifacts. The directory exists. + * + * \note If you need to store very large files for a CompilationSession then + * consider using an alternate filesystem path as, when possible, an + * in-memory filesystem will be used for the working directory. + * + * \note A single working directory may be shared by multiple + * CompilationSession instances. Do not assume that you have exclusive + * access. + * + * @return A path. + */ inline const boost::filesystem::path& workingDirectory() { return workingDirectory_; } private: diff --git a/compiler_gym/service/compilation_session.py b/compiler_gym/service/compilation_session.py index 20c2f7632..278dbc396 100644 --- a/compiler_gym/service/compilation_session.py +++ b/compiler_gym/service/compilation_session.py @@ -82,9 +82,11 @@ def get_observation(self, observation_space: ObservationSpace) -> Observation: raise NotImplementedError def fork(self) -> "CompilationSession": - """Optional. Create a copy of current session state. + """Create a copy of current session state. - :return: A new CopmilationSession with the same state. + Implementing this method is optional. + + :return: A new CompilationSession with the same state. """ # No need to override this if you are not adding support to fork(). raise NotImplementedError("CompilationSession.fork() not supported") diff --git a/compiler_gym/service/runtime/BenchmarkCache.h b/compiler_gym/service/runtime/BenchmarkCache.h index 7f48d87f3..f285e9f1d 100644 --- a/compiler_gym/service/runtime/BenchmarkCache.h +++ b/compiler_gym/service/runtime/BenchmarkCache.h @@ -19,7 +19,7 @@ namespace compiler_gym::runtime { constexpr size_t kEvictionSizeInBytes = 512 * 1024 * 1024; /** - * @brief A cache of Benchmark protocol messages. + * A cache of Benchmark protocol messages. * * This object caches Benchmark messages by URI. Once the cache reaches a * predetermined size, benchmarks are evicted randomly until the capacity is @@ -28,7 +28,7 @@ constexpr size_t kEvictionSizeInBytes = 512 * 1024 * 1024; class BenchmarkCache { public: /** - * @brief Constructor. + * Constructor. * * @param maxSizeInBytes The maximum size of the benchmark buffer before an * automated eviction is run. diff --git a/compiler_gym/service/runtime/CompilerGymService.h b/compiler_gym/service/runtime/CompilerGymService.h index a4a19c833..16672caaa 100644 --- a/compiler_gym/service/runtime/CompilerGymService.h +++ b/compiler_gym/service/runtime/CompilerGymService.h @@ -17,9 +17,14 @@ namespace compiler_gym::runtime { -// A default implementation of the CompilerGymService. When parametrized by a -// CompilationSession subclass, this provides the RPC handling logic to run a -// gym service. +/** + * A default implementation of the CompilerGymService. + * + * When parametrized by a CompilationSession subclass, this provides the RPC + * handling logic to run a gym service. User should call + * createAndRunCompilerGymService() rather than interacting with this class + * directly. + */ template class CompilerGymService final : public compiler_gym::CompilerGymService::Service { public: diff --git a/compiler_gym/service/runtime/Runtime.h b/compiler_gym/service/runtime/Runtime.h index f49d0caa4..ef154bb1c 100644 --- a/compiler_gym/service/runtime/Runtime.h +++ b/compiler_gym/service/runtime/Runtime.h @@ -9,6 +9,28 @@ namespace compiler_gym::runtime { +/** + * Create and run an RPC service for the given compilation session. + * + * This should be called on its own in a self contained script to implement a + * compilation service. Example: + * + * \code{.cpp} + * #include "compiler_gym/service/runtime/Runtime.h" + * #include "my_compiler_service/MyCompilationSession.h" + * + * int main(int argc, char** argv) { + * createAndRunCompilerGymService( + * argc, argc, "My compiler service" + * ); + * } + * \endcode + * + * This function never returns. + * + * @tparam CompilationSessionType A sublass of CompilationSession that provides + * implementations of the abstract methods. + */ template [[noreturn]] void createAndRunCompilerGymService(int argc, char** argv, const char* usage) { createAndRunCompilerGymServiceImpl(argc, argv, usage); diff --git a/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py b/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py index a9d93663c..19e9750b9 100644 --- a/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py +++ b/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py @@ -13,10 +13,12 @@ from signal import SIGTERM, signal from tempfile import mkdtemp from threading import Event, Thread +from typing import Type import grpc from absl import app, flags, logging +from compiler_gym.service.compilation_session import CompilationSession from compiler_gym.service.proto import compiler_gym_service_pb2_grpc from compiler_gym.service.runtime.compiler_gym_service import CompilerGymService from compiler_gym.util import debug_util as dbg @@ -46,7 +48,29 @@ def _shutdown_handler(signal_number, stack_frame): # pragma: no cover shutdown_signal.set() -def create_and_run_compiler_gym_service(compilation_session_type): # pragma: no cover +def create_and_run_compiler_gym_service( # pragma: no cover + compilation_session_type: Type[CompilationSession], +): + """Create and run an RPC service for the given compilation session. + + This should be called on its own in a self contained script to implement a + compilation service. Example: + + .. code-block:: python + + from compiler_gym.service import runtime + from my_compiler_service import MyCompilationSession + + if __name__ == "__main__": + runtime.create_and_run_compiler_gym_service(MyCompilationSession) + + This function never returns. + + :param compilation_session_type: A sublass of :class:`CompilationSession + ` that provides implementations + of the abstract methods. + """ + def main(argv): # Register a signal handler for SIGTERM that will set the shutdownSignal # future value. diff --git a/compiler_gym/util/EnumUtil.h b/compiler_gym/util/EnumUtil.h index 0429a0bfe..08b201506 100644 --- a/compiler_gym/util/EnumUtil.h +++ b/compiler_gym/util/EnumUtil.h @@ -58,7 +58,7 @@ std::string enumNameToPascalCase(std::optional value) { } /** - * Enumearate all values of an optional Enum, including `std::nullopt`. + * Enumerate all values of an optional Enum, including `std::nullopt`. * * @return A vector of optional enum values. */ @@ -114,7 +114,7 @@ template } /** - * @brief Create a map from PascalCase enum value names to enum values. + * Create a map from PascalCase enum value names to enum values. * * @tparam Enum Enum type. * @return A `name -> value` lookup table. diff --git a/compiler_gym/util/GrpcStatusMacros.h b/compiler_gym/util/GrpcStatusMacros.h index f1ab01c73..c24233542 100644 --- a/compiler_gym/util/GrpcStatusMacros.h +++ b/compiler_gym/util/GrpcStatusMacros.h @@ -10,6 +10,11 @@ using grpc::Status; #undef ASSERT_OK +/** + * Fatal error if expression returns an error status. + * + * @param expr An expression that returns a `grpc::Status`. + */ #define ASSERT_OK(expr) \ do { \ const Status _status = (expr); \ @@ -17,17 +22,23 @@ using grpc::Status; } while (0) #undef RETURN_IF_ERROR +/** + * Return from the current function if the expression returns an error status. + * + * This is equivalent to: + * + * \code{.cpp} + * Status status = expr; + * if (!status.ok()) { + * return status; + * } + * \endcode + * + * @param expr An expression that return a `grpc::Status`. + */ #define RETURN_IF_ERROR(expr) \ do { \ const Status _status = (expr); \ if (!_status.ok()) \ return _status; \ } while (0) - -// Like RETURN_IF_ERROR(), but when you really want to commit! -#undef CRASH_IF_ERROR -#define CRASH_IF_ERROR(expr) \ - do { \ - const Status _status = (expr); \ - CHECK(_status.ok()) << _status.error_message(); \ - } while (0) diff --git a/compiler_gym/util/RunfilesPath.h b/compiler_gym/util/RunfilesPath.h index ef0dafdfc..f2308edd2 100644 --- a/compiler_gym/util/RunfilesPath.h +++ b/compiler_gym/util/RunfilesPath.h @@ -6,16 +6,20 @@ namespace compiler_gym::util { -// Resolve the path to a runfiles data path. -// -// Use environment variable COMPILER_GYM_RUNFILES=/path/to/runfiles if running -// outside of bazel. +/** + * Resolve the path to a runfiles data path. + * + * Use environment variable `COMPILER_GYM_RUNFILES=/path/to/runfiles` if running + * outside of bazel. + */ boost::filesystem::path getRunfilesPath(const std::string& relPath); -// Resolve the path to the site data path. -// -// The site data path is used for storing persistent data files, such as -// benchmark datasets. +/** + * Resolve the path to the site data path. + * + * The site data path is used for storing persistent data files, such as + * benchmark datasets. + */ boost::filesystem::path getSiteDataPath(const std::string& relPath); } // namespace compiler_gym::util diff --git a/compiler_gym/util/StrLenConstexpr.h b/compiler_gym/util/StrLenConstexpr.h index e19aa44a6..48ebc62e8 100644 --- a/compiler_gym/util/StrLenConstexpr.h +++ b/compiler_gym/util/StrLenConstexpr.h @@ -8,8 +8,15 @@ namespace compiler_gym::util { -// Calculate the length of a string literal at compile-time. -// E.g., strLen("abc") -> 3. +/** + * Calculate the length of a string literal at compile-time. + * + * E.g., `strLen("abc") -> 3`. + * + * @tparam T The character type. + * @param str A string. + * @return A nonnegative integer. + */ template size_t constexpr strLen(const T* str) { return *str ? 1 + strLen(str + 1) : 0; diff --git a/compiler_gym/util/Unreachable.h b/compiler_gym/util/Unreachable.h index b960de113..43f1efc3f 100644 --- a/compiler_gym/util/Unreachable.h +++ b/compiler_gym/util/Unreachable.h @@ -6,9 +6,14 @@ #include -// Declare a program point as unreachable. For debug builds, this will trigger -// a fatal error if reached. For optimized builds (i.e. ones built using -// `bazel build -c opt`), this is totally undefined. +/** + * Declare a program point as unreachable. For debug builds, this will trigger a + * fatal error if reached. For optimized builds (i.e. ones built using `bazel + * build -c opt`), this is undefined. + * + * @param msg A message that will be printed if this program point is reached + * in a debug build. + */ #define UNREACHABLE(msg) \ DLOG(FATAL) << "Unreachable: " << (msg); \ __builtin_unreachable(); diff --git a/docs/source/cc/compiler_gym/service.rst b/docs/source/cc/compiler_gym/service.rst index e33f2f6ed..0174abba9 100644 --- a/docs/source/cc/compiler_gym/service.rst +++ b/docs/source/cc/compiler_gym/service.rst @@ -1,6 +1,8 @@ compiler_gym/service ==================== +This directory contains the base class for implementing compilation sessions. + .. contents:: :local: diff --git a/docs/source/cc/compiler_gym/service/runtime.rst b/docs/source/cc/compiler_gym/service/runtime.rst index e96d78abf..c032821e0 100644 --- a/docs/source/cc/compiler_gym/service/runtime.rst +++ b/docs/source/cc/compiler_gym/service/runtime.rst @@ -1,8 +1,12 @@ compiler_gym/service/runtime ============================ +This directory contains the CompilerGym runtime that takes a +:code:`compiler_gym::CompilationSession` subclass and provides an RPC service +that can be used by the Python frontend. + .. contents:: - :local: +:local: Runtime.h --------- @@ -11,16 +15,17 @@ Runtime.h .. doxygenfile:: compiler_gym/service/runtime/Runtime.h -CompilerGymService.h --------------------- +.. + CompilerGymService.h + -------------------- -:code:`#include "compiler_gym/service/runtime/CompilerGymService.h"` + :code:`#include "compiler_gym/service/runtime/CompilerGymService.h"` -.. doxygenfile:: compiler_gym/service/runtime/CompilerGymService.h + .. doxygenfile:: compiler_gym/service/runtime/CompilerGymService.h -BenchmarkCache.h ----------------- + BenchmarkCache.h + ---------------- -:code:`#include "compiler_gym/service/runtime/BenchmarkCache.h"` + :code:`#include "compiler_gym/service/runtime/BenchmarkCache.h"` -.. doxygenfile:: compiler_gym/service/runtime/BenchmarkCache.h + .. doxygenfile:: compiler_gym/service/runtime/BenchmarkCache.h diff --git a/docs/source/cc/compiler_gym/util.rst b/docs/source/cc/compiler_gym/util.rst index f21ad3a4b..ac53808ee 100644 --- a/docs/source/cc/compiler_gym/util.rst +++ b/docs/source/cc/compiler_gym/util.rst @@ -4,13 +4,6 @@ compiler_gym/util .. contents:: :local: -RunfilesPath.h --------------- - -:code:`#include "compiler_gym/util/RunfilesPath.h"` - -.. doxygenfile:: compiler_gym/util/RunfilesPath.h - EnumUtil.h ---------- @@ -25,6 +18,13 @@ GrpcStatusMacros.h .. doxygenfile:: compiler_gym/util/GrpcStatusMacros.h +RunfilesPath.h +-------------- + +:code:`#include "compiler_gym/util/RunfilesPath.h"` + +.. doxygenfile:: compiler_gym/util/RunfilesPath.h + StrLenConstexpr.h ----------------- @@ -32,13 +32,6 @@ StrLenConstexpr.h .. doxygenfile:: compiler_gym/util/StrLenConstexpr.h -RunService.h ------------- - -:code:`#include "compiler_gym/util/RunService.h"` - -.. doxygenfile:: compiler_gym/util/RunService.h - Unreachable.h ------------- From 3d8535a85b4af35d70c61c14c1146ebdefe9cefd Mon Sep 17 00:00:00 2001 From: Chris Cummins Date: Thu, 3 Jun 2021 12:04:49 +0100 Subject: [PATCH 5/7] [wrappers] Add base observation and reward wrappers. --- compiler_gym/wrappers/__init__.py | 9 ++++++- compiler_gym/wrappers/core.py | 35 +++++++++++++++++++++++++++ docs/source/compiler_gym/wrappers.rst | 10 ++++++++ tests/wrappers/core_wrappers_test.py | 33 ++++++++++++++++++++++++- 4 files changed, 85 insertions(+), 2 deletions(-) diff --git a/compiler_gym/wrappers/__init__.py b/compiler_gym/wrappers/__init__.py index 142f4b0ba..5d01ae118 100644 --- a/compiler_gym/wrappers/__init__.py +++ b/compiler_gym/wrappers/__init__.py @@ -8,7 +8,12 @@ CommandlineWithTerminalAction, ConstrainedCommandline, ) -from compiler_gym.wrappers.core import ActionWrapper, CompilerEnvWrapper +from compiler_gym.wrappers.core import ( + ActionWrapper, + CompilerEnvWrapper, + ObservationWrapper, + RewardWrapper, +) from compiler_gym.wrappers.datasets import ( CycleOverBenchmarks, IterateOverBenchmarks, @@ -23,6 +28,8 @@ "ConstrainedCommandline", "CycleOverBenchmarks", "IterateOverBenchmarks", + "ObservationWrapper", "RandomOrderBenchmarks", + "RewardWrapper", "TimeLimit", ] diff --git a/compiler_gym/wrappers/core.py b/compiler_gym/wrappers/core.py index e886bf84d..fb9fddf99 100644 --- a/compiler_gym/wrappers/core.py +++ b/compiler_gym/wrappers/core.py @@ -51,3 +51,38 @@ def action(self, action): def reverse_action(self, action): """Translate an action from the new space to the wrapped space.""" raise NotImplementedError + + +class ObservationWrapper(CompilerEnvWrapper): + """Wraps a :class:`CompilerEnv ` environment + to allow an observation space transformation. + """ + + def reset(self, *args, **kwargs): + observation = self.env.reset(*args, **kwargs) + return self.observation(observation) + + def step(self, *args, **kwargs): + observation, reward, done, info = self.env.step(*args, **kwargs) + return self.observation(observation), reward, done, info + + def observation(self, observation): + """Translate an observation to the new space.""" + raise NotImplementedError + + +class RewardWrapper(CompilerEnvWrapper): + """Wraps a :class:`CompilerEnv ` environment + to allow an reward space transformation. + """ + + def reset(self, *args, **kwargs): + return self.env.reset(*args, **kwargs) + + def step(self, *args, **kwargs): + observation, reward, done, info = self.env.step(*args, **kwargs) + return observation, self.reward(reward), done, info + + def reward(self, reward): + """Translate a reward to the new space.""" + raise NotImplementedError diff --git a/docs/source/compiler_gym/wrappers.rst b/docs/source/compiler_gym/wrappers.rst index f042ba014..207042293 100644 --- a/docs/source/compiler_gym/wrappers.rst +++ b/docs/source/compiler_gym/wrappers.rst @@ -24,6 +24,16 @@ Base wrappers .. automethod:: reverse_action +.. autoclass:: ObservationWrapper + + .. automethod:: observation + + +.. autoclass:: RewardWrapper + + .. automethod:: reward + + Action space wrappers --------------------- diff --git a/tests/wrappers/core_wrappers_test.py b/tests/wrappers/core_wrappers_test.py index 5e1738e42..a4c8f7a0b 100644 --- a/tests/wrappers/core_wrappers_test.py +++ b/tests/wrappers/core_wrappers_test.py @@ -5,7 +5,12 @@ """Unit tests for //compiler_gym/wrappers.""" from compiler_gym.datasets import Datasets from compiler_gym.envs.llvm import LlvmEnv -from compiler_gym.wrappers import ActionWrapper, CompilerEnvWrapper +from compiler_gym.wrappers import ( + ActionWrapper, + CompilerEnvWrapper, + ObservationWrapper, + RewardWrapper, +) from tests.test_main import main pytest_plugins = ["tests.pytest_plugins.llvm"] @@ -95,5 +100,31 @@ def reverse_action(self, action): assert env.actions == [0, 1] +def test_wrapped_observation(env: LlvmEnv): + class MyWrapper(ObservationWrapper): + def observation(self, observation): + return isinstance(observation, str) + return len(str) + + env.observation_space = "Ir" + env = MyWrapper(env) + assert env.reset() > 0 + observation, _, _, _ = env.step(0) + assert observation > 0 + + +def test_wrapped_reward(env: LlvmEnv): + class MyWrapper(RewardWrapper): + def reward(self, reward): + return -5 + + env.reward_space = "IrInstructionCount" + env = MyWrapper(env) + + env.reset() + _, reward, _, _ = env.step(0) + assert reward == -5 + + if __name__ == "__main__": main() From 39c1d3012d3ad9b77505083bd39a4341ed0cbbef Mon Sep 17 00:00:00 2001 From: Chris Cummins Date: Thu, 3 Jun 2021 12:05:02 +0100 Subject: [PATCH 6/7] [docs] Split the FAQ into sections. Issue #239. --- docs/source/faq.rst | 98 +++++++++++++++++++++++++++------------------ 1 file changed, 58 insertions(+), 40 deletions(-) diff --git a/docs/source/faq.rst b/docs/source/faq.rst index eb588dae9..6c13142d9 100644 --- a/docs/source/faq.rst +++ b/docs/source/faq.rst @@ -5,11 +5,15 @@ This page answers some of the commonly asked questions about CompilerGym. Have a question not answered here? File an issue on the `GitHub issue tracker `_. -.. contents:: Questions: +.. contents:: Topics: :local: +General +------- + + What can I do with this? ------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~ This projects lets you control the decisions that a compiler makes when optimizing a program. Currently, it lets you control the selection and ordering @@ -27,16 +31,8 @@ Once you get the hang of things, try submitting your best algorithm to our `leaderboards `_! -I found a bug. How do I report it? ----------------------------------- - -Great! Please file an issue using the `GitHub issue tracker -`_. See -:doc:`contributing` for more details. - - Do I have to use reinforcement learning? ----------------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ No. We think that the the gym provides a useful abstraction for sequential decision making. You may use any technique you wish to explore the optimization @@ -44,13 +40,13 @@ space. What features are going to be added in the future? --------------------------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ See :ref:`roadmap `. Is compiler optimization really a sequential decision process? --------------------------------------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Compilers frequently package individual transformations as "optimization passes" which are then applied in a sequential order. Usually this order is fixed (e.g. @@ -60,8 +56,12 @@ CompilerGym replaces that fixed order with a sequential decision process where any pass may be applied at any stage. -When does a compiler enviornment consider an episode “done”? ------------------------------------------------------------- +LLVM Environment +---------------- + + +When does the environment consider an episode “done”? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The compiler itself doesn't have a signal for termination. Actions are like rewrite rules, it is up to the user to decide when no more improvement can be @@ -73,10 +73,10 @@ unexpected state - we have to abort. This happens. How do I run this on my own program? ------------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -For LLVM, you compile your program to an unoptimized LLVM bitcode file. This can -be done automatically for C/C++ programs using the :meth:`env.make_benchmark() +By compiling your program to an unoptimized LLVM bitcode file. This can be done +automatically for C/C++ programs using the :meth:`env.make_benchmark() ` API, or you can do this yourself using clang: @@ -89,31 +89,13 @@ tools using the `--benchmark` flag, e.g. :: - $ bazel run -c opt //compiler_gym/bin:random_search -- --env=llvm-ic-v0 \ + $ bazel run -c opt //compiler_gym/bin:random_search -- \ + --env=llvm-ic-v0 \ --benchmark=file:///$PWD/myapp.bc -I want to add a new program representation / reward signal. How do I do that? ------------------------------------------------------------------------------ - -If your program representation can be computed from existing observations, -consider using the :meth:`add_derived_space() -` API to add a derived -observation or :meth:`add_space() ` to -add a new reward space. - -If you require modifying the underlying compiler service implementation, fork -this project and build it from source (see `installation -`_). -Then modify the C++ service implementation for the compiler that you are -interested in. The service codebase is located at -:code:`compiler_gym/envs/$COMPILER/service`, where :code:`$COMPILER` is the name -of the compiler service you would wish to modify, e.g. llvm. Once done, send us -a pull request! - - Should I always try different actions? --------------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Some optimization actions may be called multiple times after other actions. An example of this is `dead code elimination @@ -122,11 +104,25 @@ example of this is `dead code elimination in different context can bring improvements. +Development +----------- + + +I found a bug. How do I report it? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Great! Please file an issue using the `GitHub issue tracker +`_. See +:doc:`contributing` for more details. + + I updated with "git pull" and now it doesn't work -------------------------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The first thing to is to re-run :code:`make init` to ensure that you have the correct development depencies installed, as those can change between releases. +Then run :code:`make distclean` to tidy up any build artifacts from the old +version. If that doesn't fix the problem, feel free to `file an issue `_, but @@ -137,3 +133,25 @@ stability. If you would like to build from source but do not require the latest feature set, use the `stable `_ branch which lags to the latest release with hotfixes. + + +I want to add a new program representation / reward signal. How do I do that? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If your reward or observation is a transformation of an existing space, consider +using the :mod:`compiler_gym.wrappers` module to define a wrapper that performs +the translation from the base space. + +If your reward or observation requires combining multiple existing spaces, +consider using :meth:`add_derived_space() +` or :meth:`add_space() +`. + +If you require modifying the underlying compiler service implementation, fork +this project and build it from source (see `installation +`_). +Then modify the service implementation for the compiler that you are interested +in. The service codebase is located at +:code:`compiler_gym/envs/$COMPILER/service`, where :code:`$COMPILER` is the name +of the compiler service you would wish to modify, e.g. llvm. Once done, send us +a pull request! From de14bfa20d2c183c7da0bf504999fc9383f77eb2 Mon Sep 17 00:00:00 2001 From: Chris Cummins Date: Thu, 3 Jun 2021 12:06:09 +0100 Subject: [PATCH 7/7] Rejig 'no cover' pragma. --- .../service/runtime/create_and_run_compiler_gym_service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py b/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py index 19e9750b9..f02f35a03 100644 --- a/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py +++ b/compiler_gym/service/runtime/create_and_run_compiler_gym_service.py @@ -48,9 +48,9 @@ def _shutdown_handler(signal_number, stack_frame): # pragma: no cover shutdown_signal.set() -def create_and_run_compiler_gym_service( # pragma: no cover +def create_and_run_compiler_gym_service( compilation_session_type: Type[CompilationSession], -): +): # pragma: no cover """Create and run an RPC service for the given compilation session. This should be called on its own in a self contained script to implement a