Skip to content

Commit

Permalink
✨ Update examples (#52)
Browse files Browse the repository at this point in the history
Makes clang-tidy compatible with HIP, adds an example for using external
dependencies and fixes a few clang-tidy warnings.
  • Loading branch information
aaronmondal authored Mar 27, 2023
1 parent e19d31f commit 70da306
Show file tree
Hide file tree
Showing 14 changed files with 74 additions and 20 deletions.
1 change: 1 addition & 0 deletions examples/.envrc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
use flake
6 changes: 3 additions & 3 deletions examples/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ ll_compilation_database(
"//format_example",
"//frontend_action_example",
"//external_dependency_example",
"//hip_example",
"//hip_rdc_example",
"//module_partition_example",
"//hip_example:amdgpu",
"//hip_rdc_example:amdgpu",
# "//module_partition_example", # Bugged. Need to report upstream.
"//modules_example",
# "//modules_draft_example", # Not clang-tidy conform.
"//openmp_example",
Expand Down
1 change: 1 addition & 0 deletions examples/cuda_example/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ ll_binary(
compile_flags = OFFLOAD_ALL_NVPTX + [
"--std=c++20",
],
visibility = ["@//:__pkg__"],
)

ll_test(
Expand Down
22 changes: 14 additions & 8 deletions examples/cuda_example/example.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
constexpr float kInputA = 1.0F;
constexpr float kInputB = 2.0F;
constexpr float kExpectedOutput = 3.0F;
int kDimension = 1 << 20;
constexpr int kDimension = 1 << 20;
constexpr auto kThreadsPerBlockX = 128;
constexpr auto kThreadsPerBlockY = 1;
constexpr auto kThreadsPerBlockZ = 1;
Expand All @@ -20,7 +20,7 @@ constexpr void cuda_assert(const T value) {

__global__ void add_vector(float *input_a, const float *input_b,
const int dimension) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
const uint32_t index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < dimension) {
// NOLINTNEXTLINE cppcoreguidelines-pro-bounds-pointer-arithmetic
input_a[index] += input_b[index];
Expand All @@ -29,13 +29,13 @@ __global__ void add_vector(float *input_a, const float *input_b,

void print_device_info() {
int count = 0;
cudaError_t err = cudaGetDeviceCount(&count);
const cudaError_t err = cudaGetDeviceCount(&count);
if (err == cudaErrorInvalidDevice) {
std::cout << "FAIL: invalid device" << std::endl;
}
std::cout << "Number of devices is " << count << std::endl;

cudaDeviceProp device_prop;
cudaDeviceProp device_prop{};
cuda_assert(cudaGetDeviceProperties(&device_prop, 0));
std::cout << "System major: " << device_prop.major << std::endl;
std::cout << "System minor: " << device_prop.minor << std::endl;
Expand Down Expand Up @@ -93,18 +93,24 @@ auto main() -> int {
cuda_assert(cudaMemcpy(device_input_b, host_input_b,
kDimension * sizeof(float), cudaMemcpyHostToDevice));

dim3 grid_dim = dim3(kDimension / kThreadsPerBlockX);
dim3 block_dim = dim3(kThreadsPerBlockX);
const dim3 grid_dim = dim3(kDimension / kThreadsPerBlockX);
const dim3 block_dim = dim3(kThreadsPerBlockX);

// This is not pretty, but it is close to the HIP implementation.
std::array<void *, 3> args = {&device_input_a, &device_input_b, &kDimension};
// NOLINTBEGIN cppcoreguidelines-pro-type-reinterpret-cast
// NOLINTBEGIN cppcoreguidelines-pro-type-const-cast
std::array<void *, 3> args = {
&device_input_a, &device_input_b,
reinterpret_cast<void *>(const_cast<int *>(&kDimension))};
cudaLaunchKernel(reinterpret_cast<void *>(add_vector), grid_dim, block_dim,
args.data(), 0, nullptr);
// NOLINTEND cppcoreguidelines-pro-type-reinterpret-cast
// NOLINTEND cppcoreguidelines-pro-type-const-cast

cuda_assert(cudaMemcpy(host_input_a, device_input_a,
kDimension * sizeof(float), cudaMemcpyDeviceToHost));

int errors = count_errors(host_input_a);
const int errors = count_errors(host_input_a);

cuda_assert(cudaFree(device_input_a));
cuda_assert(cudaFree(device_input_b));
Expand Down
15 changes: 15 additions & 0 deletions examples/external_dependency_example/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
load("@rules_ll//ll:defs.bzl", "ll_binary")

ll_binary(
name = "external_dependency_example",
srcs = ["main.cpp"],
compile_flags = ["-std=c++2b"],
# The include path, library search path and rpath are already set in
# flake.nix and added to all targets in the workspace. This way we still
# have a reproducible openssl despite it not being tracked by Bazel.
#
# If possible, try to avoid this pattern and write custom BUILD files for
# external dependencies that are not too complicated to port to Bazel.
link_flags = ["-lcrypto"],
visibility = ["@//:__pkg__"],
)
26 changes: 26 additions & 0 deletions examples/external_dependency_example/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#include <openssl/sha.h>

#include <array>
#include <format>
#include <iostream>

auto main() -> int {
constexpr auto kMessageSize = 5;
std::array<unsigned char, kMessageSize> message = {'h', 'e', 'l', 'l', 'o'};

auto *hashed_message = SHA256(message.data(), message.size(), nullptr);

std::array<std::byte, SHA256_DIGEST_LENGTH> output = {};
std::memcpy(output.data(), hashed_message, SHA256_DIGEST_LENGTH);

std::cout << "Calculated:\t";
for (auto val : output) {
std::cout << std::format("{:0>2x}", static_cast<int>(val));
}
std::cout << "\n";

std::cout
<< "Expected:\t"
<< "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824"
<< std::endl;
}
2 changes: 2 additions & 0 deletions examples/hip_example/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ ll_binary(
compile_flags = OFFLOAD_ALL_NVPTX + [
"--std=c++20",
],
visibility = ["@//:__pkg__"],
)

ll_binary(
Expand All @@ -22,6 +23,7 @@ ll_binary(
compile_flags = OFFLOAD_ALL_AMDGPU + [
"--std=c++20",
],
visibility = ["@//:__pkg__"],
)

# We usually prefer native_test over ll_test, but that won't work here as the
Expand Down
8 changes: 4 additions & 4 deletions examples/hip_example/example.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ constexpr void hip_assert(const T value) {

__global__ auto add_vector(float *input_a, const float *input_b,
const int dimension) -> void {
int index = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
const uint32_t index = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
if (index < dimension) {
// NOLINTNEXTLINE cppcoreguidelines-pro-bounds-pointer-arithmetic
input_a[index] += input_b[index];
Expand All @@ -28,7 +28,7 @@ __global__ auto add_vector(float *input_a, const float *input_b,

void print_device_info() {
int count = 0;
hipError_t err = hipGetDeviceCount(&count);
const hipError_t err = hipGetDeviceCount(&count);
if (err == hipErrorInvalidDevice) {
std::cout << "FAIL: invalid device" << std::endl;
}
Expand Down Expand Up @@ -93,8 +93,8 @@ auto main() -> int {
hip_assert(hipMemcpy(device_input_b, host_input_b, kDimension * sizeof(float),
hipMemcpyHostToDevice));

dim3 grid_dim = dim3(kDimension / kThreadsPerBlockX);
dim3 block_dim = dim3(kThreadsPerBlockX);
const dim3 grid_dim = dim3(kDimension / kThreadsPerBlockX);
const dim3 block_dim = dim3(kThreadsPerBlockX);

hipLaunchKernelGGL(add_vector, grid_dim, block_dim, 0, nullptr,
device_input_a, device_input_b, kDimension);
Expand Down
2 changes: 2 additions & 0 deletions examples/hip_rdc_example/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ ll_binary(
],
compilation_mode = "hip_amdgpu",
compile_flags = COMPILE_FLAGS_AMDGPU,
visibility = ["@//:__pkg__"],
deps = [
":add_amdgpu",
":multiply_amdgpu",
Expand Down Expand Up @@ -93,6 +94,7 @@ ll_binary(
],
compilation_mode = "hip_nvptx",
compile_flags = COMPILE_FLAGS_NVPTX,
visibility = ["@//:__pkg__"],
deps = [
":add_nvptx",
":multiply_nvptx",
Expand Down
2 changes: 1 addition & 1 deletion examples/hip_rdc_example/add.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

__device__ auto add_vector(float *input_a, const float *input_b,
const int dimension) -> void {
int index = blockIdx.x * blockDim.x + threadIdx.x;
const uint32_t index = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
if (index < dimension) {
// NOLINTNEXTLINE cppcoreguidelines-pro-bounds-pointer-arithmetic
input_a[index] += input_b[index];
Expand Down
4 changes: 2 additions & 2 deletions examples/hip_rdc_example/main.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ auto main() -> int {
hip_assert(hipMemcpy(device_input_c, host_input_c, kDimension * sizeof(float),
hipMemcpyHostToDevice));

dim3 grid_dim = dim3(kDimension / kThreadsPerBlockX);
dim3 block_dim = dim3(kThreadsPerBlockX);
const dim3 grid_dim = dim3(kDimension / kThreadsPerBlockX);
const dim3 block_dim = dim3(kThreadsPerBlockX);

hipLaunchKernelGGL(multiply_add, grid_dim, block_dim, 0, nullptr,
device_input_a, device_input_b, device_input_c,
Expand Down
2 changes: 1 addition & 1 deletion examples/hip_rdc_example/multiply.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

__device__ auto multiply_vector(float *input_a, const float *input_b,
const int dimension) -> void {
int index = blockIdx.x * blockDim.x + threadIdx.x;
const uint32_t index = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
if (index < dimension) {
// NOLINTNEXTLINE cppcoreguidelines-pro-bounds-pointer-arithmetic
input_a[index] *= input_b[index];
Expand Down
2 changes: 1 addition & 1 deletion ll/compilation_database.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ with open(sys.argv[1], 'r') as in_file, open(sys.argv[2], 'w') as out_file:
# Workaround for https://github.com/llvm/llvm-project/issues/59291.
for arg in fragment['arguments']:
if arg == '-xcuda':
if arg in ['-xcuda', '-xhip']:
fragment['arguments'] += ['--offload-host-only']
if arg.startswith('--offload-arch'):
fragment['arguments'].remove(arg)
Expand Down
1 change: 1 addition & 0 deletions ll/init.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def _initialize_rules_ll_impl(_):
"@rules_ll//patches:hipamd_inconsistent_overrides.diff",
"@rules_ll//patches:hipamd_fix_extraneous_parentheses.diff",
"@rules_ll//patches:hipamd_default_visibility.diff",
"@rules_ll//patches:hipamd_enforce_semicolon.diff",
],
patch_args = ["-p1"],
)
Expand Down

0 comments on commit 70da306

Please sign in to comment.