Merged main:ca003ee06d0e into amd-gfx:a86283a53947

Local branch amd-gfx a86283a Merged main:e00f2272d640 into amd-gfx:7a55dcafa40e Remote branch main ca003ee [clang-repl] Disable InterpreterExceptionTest on RISC-V (llvm#68216)
GPUOpen-Drivers · Oct 4, 2023 · a0a17ac · a0a17ac
2 parents a86283a + ca003ee
commit a0a17ac
Show file tree

Hide file tree

Showing 24 changed files with 749 additions and 65 deletions.
diff --git a/clang/docs/ClangOffloadBundler.rst b/clang/docs/ClangOffloadBundler.rst
@@ -309,3 +309,30 @@ target by comparing bundle ID's. Two bundle ID's are considered compatible if:
   * Their offload kind are the same
   * Their target triple are the same
   * Their GPUArch are the same
+
+Compression and Decompression
+=============================
+
+``clang-offload-bundler`` provides features to compress and decompress the full
+bundle, leveraging inherent redundancies within the bundle entries. Use the
+`-compress` command-line option to enable this compression capability.
+
+The compressed offload bundle begins with a header followed by the compressed binary data:
+
+- **Magic Number (4 bytes)**:
+    This is a unique identifier to distinguish compressed offload bundles. The value is the string 'CCOB' (Compressed Clang Offload Bundle).
+
+- **Version Number (16-bit unsigned int)**:
+    This denotes the version of the compressed offload bundle format. The current version is `1`.
+
+- **Compression Method (16-bit unsigned int)**:
+    This field indicates the compression method used. The value corresponds to either `zlib` or `zstd`, represented as a 16-bit unsigned integer cast from the LLVM compression enumeration.
+
+- **Uncompressed Binary Size (32-bit unsigned int)**:
+    This is the size (in bytes) of the binary data before it was compressed.
+
+- **Hash (64-bit unsigned int)**:
+    This is a 64-bit truncated MD5 hash of the uncompressed binary data. It serves for verification and caching purposes.
+
+- **Compressed Data**:
+    The actual compressed binary data follows the header. Its size can be inferred from the total size of the file minus the header size.
diff --git a/clang/include/clang/Driver/OffloadBundler.h b/clang/include/clang/Driver/OffloadBundler.h
@@ -19,18 +19,23 @@
 
 #include "llvm/Support/Error.h"
 #include "llvm/TargetParser/Triple.h"
+#include <llvm/Support/MemoryBuffer.h>
 #include <string>
 #include <vector>
 
 namespace clang {
 
 class OffloadBundlerConfig {
 public:
+  OffloadBundlerConfig();
+
   bool AllowNoHost = false;
   bool AllowMissingBundles = false;
   bool CheckInputArchive = false;
   bool PrintExternalCommands = false;
   bool HipOpenmpCompatible = false;
+  bool Compress = false;
+  bool Verbose = false;
 
   unsigned BundleAlignment = 1;
   unsigned HostInputIndex = ~0u;
@@ -84,6 +89,38 @@ struct OffloadTargetInfo {
   std::string str() const;
 };
 
+// CompressedOffloadBundle represents the format for the compressed offload
+// bundles.
+//
+// The format is as follows:
+// - Magic Number (4 bytes) - A constant "CCOB".
+// - Version (2 bytes)
+// - Compression Method (2 bytes) - Uses the values from
+// llvm::compression::Format.
+// - Uncompressed Size (4 bytes).
+// - Truncated MD5 Hash (8 bytes).
+// - Compressed Data (variable length).
+
+class CompressedOffloadBundle {
+private:
+  static inline const size_t MagicSize = 4;
+  static inline const size_t VersionFieldSize = sizeof(uint16_t);
+  static inline const size_t MethodFieldSize = sizeof(uint16_t);
+  static inline const size_t SizeFieldSize = sizeof(uint32_t);
+  static inline const size_t HashFieldSize = 8;
+  static inline const size_t HeaderSize = MagicSize + VersionFieldSize +
+                                          MethodFieldSize + SizeFieldSize +
+                                          HashFieldSize;
+  static inline const llvm::StringRef MagicNumber = "CCOB";
+  static inline const uint16_t Version = 1;
+
+public:
+  static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
+  compress(const llvm::MemoryBuffer &Input, bool Verbose = false);
+  static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
+  decompress(const llvm::MemoryBuffer &Input, bool Verbose = false);
+};
+
 } // namespace clang
 
 #endif // LLVM_CLANG_DRIVER_OFFLOADBUNDLER_H
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
@@ -1183,6 +1183,10 @@ def fgpu_inline_threshold_EQ : Joined<["-"], "fgpu-inline-threshold=">,
 def fgpu_sanitize : Flag<["-"], "fgpu-sanitize">, Group<f_Group>,
   HelpText<"Enable sanitizer for supported offloading devices">;
 def fno_gpu_sanitize : Flag<["-"], "fno-gpu-sanitize">, Group<f_Group>;
+
+def offload_compress : Flag<["--"], "offload-compress">,
+  HelpText<"Compress offload device binaries (HIP only)">;
+def no_offload_compress : Flag<["--"], "no-offload-compress">;
 }
 
 // CUDA options

diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -3723,10 +3723,12 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF,
   // Explicitly handle GNU void* and function pointer arithmetic extensions. The
   // GNU void* casts amount to no-ops since our void* type is i8*, but this is
   // future proof.
+  llvm::Type *elemTy;
   if (elementType->isVoidType() || elementType->isFunctionType())
-    return CGF.Builder.CreateGEP(CGF.Int8Ty, pointer, index, "add.ptr");
+    elemTy = CGF.Int8Ty;
+  else
+    elemTy = CGF.ConvertTypeForMem(elementType);
 
-  llvm::Type *elemTy = CGF.ConvertTypeForMem(elementType);
   if (CGF.getLangOpts().isSignedOverflowDefined())
     return CGF.Builder.CreateGEP(elemTy, pointer, index, "add.ptr");