Skip to content
This repository has been archived by the owner on Nov 25, 2022. It is now read-only.

Commit

Permalink
[Codegen] Fix CUDA codegen for int64 Ramp (apache#13382)
Browse files Browse the repository at this point in the history
  • Loading branch information
vinx13 authored and xinetzone committed Nov 25, 2022
1 parent 1d4125b commit d92ee95
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 1 deletion.
4 changes: 3 additions & 1 deletion src/target/source/codegen_cuda.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1005,7 +1005,9 @@ void CodeGenCUDA::VisitStmt_(const EvaluateNode* op) {

void CodeGenCUDA::VisitExpr_(const RampNode* op, std::ostream& os) {
CHECK_LE(op->lanes, 4) << "ValueError: Ramp of more than 4 lanes is not allowed.";
os << "(make_int" << op->lanes << "(";
os << "(make_";
PrintType(op->dtype, os);
os << "(";
for (int i = 0; i < op->lanes; i++) {
os << "(" << PrintExpr(op->base) << ")"
<< "+(" << PrintExpr(op->stride) << "*" << i << ")";
Expand Down
1 change: 1 addition & 0 deletions tests/python/topi/python/test_topi_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -1040,6 +1040,7 @@ def test_gather():
verify_gather(np.random.randn(4, 7, 5), 1, np.random.randint(low=0, high=7, size=(4, 10, 5)))
verify_gather(np.random.randn(4, 7, 5), 2, np.random.randint(low=0, high=5, size=(4, 7, 2)))
verify_gather(np.random.randn(4, 7, 5), 2, np.random.randint(low=0, high=5, size=(4, 7, 10)))
verify_gather(np.random.randn(4, 7, 2), 0, np.random.randint(low=0, high=4, size=(4, 7, 2)))


@tvm.testing.uses_gpu
Expand Down

0 comments on commit d92ee95

Please sign in to comment.