Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backport #8259 to release/17.x #8270

Merged
merged 4 commits into from
Jun 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions src/CodeGen_C.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1159,8 +1159,9 @@ void CodeGen_C::compile(const Buffer<> &buffer) {
bool is_constant = buffer.dimensions() != 0;

// If it is an GPU source kernel, we would like to see the actual output, not the
// uint8 representation. We use a string literal for this.
if (ends_with(name, "gpu_source_kernels")) {
// uint8 representation. We use a string literal for this. Since the Vulkan backend
// actually generates a SPIR-V binary, keep it as raw data to avoid textual reformatting.
if (ends_with(name, "gpu_source_kernels") && !target.has_feature(Target::Vulkan)) {
stream << "static const char *" << name << "_string = R\"BUFCHARSOURCE(";
stream.write((char *)b.host, num_elems);
stream << ")BUFCHARSOURCE\";\n";
Expand Down
22 changes: 16 additions & 6 deletions src/CodeGen_Vulkan_Dev.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2514,12 +2514,20 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::declare_workgroup_size(SpvId kernel_func
namespace {

// Locate all the unique GPU variables used as SIMT intrinsics
// This pass is used to identify if LocalInvocationID and/or WorkgroupID
// need to be declared as variables for the entrypoint to the Kernel. Since
// these can only be declared once and their type is vec3, we don't
// care about the specific dims that are mapped to loop variables.
class FindIntrinsicsUsed : public IRVisitor {
using IRVisitor::visit;
void visit(const For *op) override {
if (CodeGen_GPU_Dev::is_gpu_var(op->name)) {

// map the block or thread id name to the SIMT intrinsic definition
auto intrinsic = simt_intrinsic(op->name);
intrinsics_used.insert(intrinsic.first);

// mark the name of the intrinsic being used (without the dimension)
intrinsics_used.insert(intrinsic.first); // name only!
}
op->body.accept(this);
}
Expand Down Expand Up @@ -2555,20 +2563,22 @@ void CodeGen_Vulkan_Dev::SPIRV_Emitter::declare_entry_point(const Stmt &s, SpvId
s.accept(&find_intrinsics);

SpvFactory::Variables entry_point_variables;
for (const std::string &intrinsic_name : find_intrinsics.intrinsics_used) {
for (const std::string &used_intrinsic : find_intrinsics.intrinsics_used) {

// The builtins are pointers to vec3
// The builtins are pointers to vec3 and can only be declared once per kernel entrypoint
SpvStorageClass storage_class = SpvStorageClassInput;
SpvId intrinsic_type_id = builder.declare_type(Type(Type::UInt, 32, 3));
SpvId intrinsic_ptr_type_id = builder.declare_pointer_type(intrinsic_type_id, storage_class);
const std::string intrinsic_var_name = std::string("k") + std::to_string(kernel_index) + std::string("_") + intrinsic_name;
const std::string intrinsic_var_name = std::string("k") + std::to_string(kernel_index) + std::string("_") + used_intrinsic;
SpvId intrinsic_var_id = builder.declare_global_variable(intrinsic_var_name, intrinsic_ptr_type_id, storage_class);
SpvId intrinsic_loaded_id = builder.reserve_id();
builder.append(SpvFactory::load(intrinsic_type_id, intrinsic_loaded_id, intrinsic_var_id));
symbol_table.push(intrinsic_var_name, {intrinsic_loaded_id, storage_class});

// Annotate that this is the specific builtin
SpvBuiltIn built_in_kind = map_simt_builtin(intrinsic_name);
// Map the used intrinsic name to the specific builtin
SpvBuiltIn built_in_kind = map_simt_builtin(used_intrinsic);

// Add an annotation that indicates this variable is bound to the requested intrinsic
SpvBuilder::Literals annotation_literals = {(uint32_t)built_in_kind};
builder.add_annotation(intrinsic_var_id, SpvDecorationBuiltIn, annotation_literals);

Expand Down
210 changes: 122 additions & 88 deletions src/runtime/internal/block_allocator.h

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/runtime/internal/memory_arena.h
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ void *MemoryArena::create_entry(void *user_context, Block *block, uint32_t index
void *entry_ptr = lookup_entry(user_context, block, index);
block->free_index = block->indices[index];
block->status[index] = AllocationStatus::InUse;
#if DEBUG_RUNTIME_INTERNAL
#ifdef DEBUG_RUNTIME_INTERNAL
memset(entry_ptr, 0, config.entry_size);
#endif
return entry_ptr;
Expand Down
6 changes: 5 additions & 1 deletion src/runtime/internal/memory_resources.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ ALWAYS_INLINE bool is_power_of_two_alignment(size_t x) {
// -- Alignment must be power of two!
ALWAYS_INLINE size_t aligned_offset(size_t offset, size_t alignment) {
halide_abort_if_false(nullptr, is_power_of_two_alignment(alignment));
return (offset + (alignment - 1)) & ~(alignment - 1);
return (alignment == 0) ? (offset) : (offset + (alignment - 1)) & ~(alignment - 1);
}

// Returns a suitable alignment such that requested alignment is a suitable
Expand Down Expand Up @@ -202,18 +202,22 @@ struct HalideSystemAllocatorFns {

typedef int (*AllocateBlockFn)(void *, MemoryBlock *);
typedef int (*DeallocateBlockFn)(void *, MemoryBlock *);
typedef int (*ConformBlockRequestFn)(void *, MemoryRequest *);

struct MemoryBlockAllocatorFns {
AllocateBlockFn allocate = nullptr;
DeallocateBlockFn deallocate = nullptr;
ConformBlockRequestFn conform = nullptr;
};

typedef int (*AllocateRegionFn)(void *, MemoryRegion *);
typedef int (*DeallocateRegionFn)(void *, MemoryRegion *);
typedef int (*ConformBlockRegionFn)(void *, MemoryRequest *);

struct MemoryRegionAllocatorFns {
AllocateRegionFn allocate = nullptr;
DeallocateRegionFn deallocate = nullptr;
ConformBlockRegionFn conform = nullptr;
};

// --
Expand Down
Loading
Loading