Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HexagonOffload needs to call inject_hvx_lock_unlock() before lower_parallel_tasks() #6457

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 19 additions & 11 deletions src/CodeGen_Hexagon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include "AlignLoads.h"
#include "CSE.h"
#include "CodeGen_Hexagon.h"
#include "CodeGen_Internal.h"
#include "CodeGen_Posix.h"
#include "Debug.h"
Expand Down Expand Up @@ -437,17 +438,6 @@ class InjectHVXLocks : public IRMutator {
Target target;
};

Stmt inject_hvx_lock_unlock(Stmt body, const Target &target) {
InjectHVXLocks i(target);
body = i.mutate(body);
if (i.uses_hvx) {
body = acquire_hvx_context(body, target);
}
body = substitute("uses_hvx", i.uses_hvx, body);
body = simplify(body);
return body;
}

void CodeGen_Hexagon::compile_func(const LoweredFunc &f,
const string &simple_name,
const string &extern_name) {
Expand Down Expand Up @@ -495,6 +485,8 @@ void CodeGen_Hexagon::compile_func(const LoweredFunc &f,
debug(1) << "Optimizing Hexagon instructions...\n";
body = optimize_hexagon_instructions(body, target);

debug(1) << "Before qurt_hvx_lock:\n";
debug(1) << body << "\n";
debug(1) << "Adding calls to qurt_hvx_lock, if necessary...\n";
body = inject_hvx_lock_unlock(body, target);

Expand Down Expand Up @@ -2245,13 +2237,29 @@ std::unique_ptr<CodeGen_Posix> new_CodeGen_Hexagon(const Target &target) {
return std::make_unique<CodeGen_Hexagon>(target);
}

Stmt inject_hvx_lock_unlock(Stmt body, const Target &target) {
InjectHVXLocks i(target);
body = i.mutate(body);
if (i.uses_hvx) {
body = acquire_hvx_context(body, target);
}
body = substitute("uses_hvx", i.uses_hvx, body);
body = simplify(body);
return body;
}

#else // WITH_HEXAGON

std::unique_ptr<CodeGen_Posix> new_CodeGen_Hexagon(const Target &target) {
user_error << "hexagon not enabled for this build of Halide.\n";
return nullptr;
}

Stmt inject_hvx_lock_unlock(Stmt body, const Target &target) {
user_error << "hexagon not enabled for this build of Halide.\n";
return Stmt();
}

#endif // WITH_HEXAGON

} // namespace Internal
Expand Down
17 changes: 17 additions & 0 deletions src/CodeGen_Hexagon.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#ifndef HALIDE_CODEGEN_HEXAGON_H
#define HALIDE_CODEGEN_HEXAGON_H

#include "Expr.h"

namespace Halide {

struct Target;

namespace Internal {

Stmt inject_hvx_lock_unlock(Stmt body, const Target &target);

} // namespace Internal
} // namespace Halide

#endif
9 changes: 8 additions & 1 deletion src/HexagonOffload.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <memory>

#include "Closure.h"
#include "CodeGen_Hexagon.h"
#include "Elf.h"
#include "HexagonOffload.h"
#include "IRMutator.h"
Expand Down Expand Up @@ -286,7 +287,7 @@ void do_reloc(char *addr, uint32_t mask, uintptr_t val, bool is_signed, bool ver
// Pull out the subinstructions. They're the low 13
// bits of each half-word.
uint32_t hi = (inst >> 16) & ((1 << 13) - 1);
//uint32_t lo = inst & ((1 << 13) - 1);
// uint32_t lo = inst & ((1 << 13) - 1);

// We only understand the ones where hi starts with 010
internal_assert((hi >> 10) == 2);
Expand Down Expand Up @@ -753,6 +754,12 @@ class InjectHexagonRpc : public IRMutator {
DeviceAPI::None, loop->body);
}

debug(2) << "Before inject_hvx_lock_unlock:\n"
<< body << "\n";
body = inject_hvx_lock_unlock(body, device_code.target());
debug(2) << "After inject_hvx_lock_unlock:\n"
<< body << "\n";

// Build a closure for the device code.
// Note that we must do this *before* calling lower_parallel_tasks();
// otherwise the Closure may fail to find buffers that are referenced
Expand Down