apache · tqchen · May 17, 2019 · May 16, 2019
diff --git a/src/codegen/codegen_cuda.cc b/src/codegen/codegen_cuda.cc
@@ -57,6 +57,10 @@ std::string CodeGenCUDA::Finish() {
     decl_stream << "#include <sm_61_intrinsics.h>\n";
   }
 
+  if (need_math_constants_h_) {
+    decl_stream << "#include <math_constants.h>\n";
+  }
+
   return CodeGenC::Finish();
 }
 
@@ -318,8 +322,19 @@ inline void PrintConst(const FloatImm* op, std::ostream& os, CodeGenCUDA* p) { /
   switch (op->type.bits()) {
     case 64: case 32: {
       std::ostringstream temp;
-      temp << std::scientific << op->value;
-      if (op->type.bits() == 32) temp << 'f';
+      if (std::isinf(op->value)) {
+        if (op->value < 0) {
+          temp << "-";
+        }
+        temp << ((op->type.bits() == 32) ? "CUDART_INF_F" : "CUDART_INF");
+        p->need_math_constants_h_ = true;
+      } else if (std::isnan(op->value)) {
+        temp << ((op->type.bits() == 32) ? "CUDART_NAN_F" : "CUDART_NAN");
+        p->need_math_constants_h_ = true;
+      } else {
+        temp << std::scientific << op->value;
+        if (op->type.bits() == 32) temp << 'f';
+      }
       p->MarkConst(temp.str());
       os << temp.str();
       break;

diff --git a/src/codegen/codegen_cuda.h b/src/codegen/codegen_cuda.h
@@ -39,7 +39,9 @@ class CodeGenCUDA final : public CodeGenC {
   void Init(bool output_ssa);
   void AddFunction(LoweredFunc f);
   std::string Finish();
-  bool need_include_path() { return (enable_fp16_ || enable_int8_); }
+  bool need_include_path() {
+    return (enable_fp16_ || enable_int8_ || need_math_constants_h_);
+  }
   // override behavior
   void VisitStmt_(const ir::For* op) final;
   void PrintStorageSync(const Call* op) final;
@@ -70,6 +72,9 @@ class CodeGenCUDA final : public CodeGenC {
   bool enable_fp16_{false};
   // whether enable int8
   bool enable_int8_{false};
+  // whether need math_constants.h
+  bool need_math_constants_h_{false};
+  friend void PrintConst(const FloatImm* op, std::ostream& os, CodeGenCUDA* p);
 };
 
 }  // namespace codegen

diff --git a/src/codegen/codegen_opencl.cc b/src/codegen/codegen_opencl.cc
@@ -247,6 +247,19 @@ void CodeGenOpenCL::VisitExpr_(const Select* op, std::ostream& os) {  // NOLINT(
   CodeGenC::VisitExpr_(op, os);
 }
 
+void CodeGenOpenCL::VisitExpr_(const FloatImm *op, std::ostream& os) { // NOLINT(*)
+  if (std::isinf(op->value)) {
+    if (op->value < 0) {
+      os << "-";
+    }
+    os << "INFINITY";
+  } else if (std::isnan(op->value)) {
+    os << "NAN";
+  } else {
+    CodeGenC::VisitExpr_(op, os);
+  }
+}
+
 runtime::Module BuildOpenCL(Array<LoweredFunc> funcs) {
   using tvm::runtime::Registry;
   bool output_ssa = false;

diff --git a/src/codegen/codegen_opencl.h b/src/codegen/codegen_opencl.h
@@ -59,6 +59,7 @@ class CodeGenOpenCL final : public CodeGenC {
   void VisitExpr_(const Broadcast* op, std::ostream& os) final; // NOLINT(*)
   void VisitExpr_(const Call* op, std::ostream& os) final; // NOLINT(*)
   void VisitExpr_(const Select* op, std::ostream& os) final; // NOLINT(*)
+  void VisitExpr_(const FloatImm *op, std::ostream& os) final; // NOLINT(*)
 
  private:
   // whether enable fp16 and fp64 extension

diff --git a/tests/python/unittest/test_codegen_cuda.py b/tests/python/unittest/test_codegen_cuda.py
@@ -125,8 +125,38 @@ def check_cuda(n, value):
     check_cuda(64, 0)
     check_cuda(64, -3)
 
+
+def test_cuda_inf_nan():
+    target = 'cuda'
+    def check_inf_nan(ctx, n, value, dtype):
+        A = tvm.placeholder((n,), name='A', dtype=dtype)
+        inf_value = tvm.const(value, dtype=dtype)
+        C = tvm.compute((n,), lambda i: inf_value, name='C')
+        s = tvm.create_schedule(C.op)
+        s[C].bind(s[C].op.axis[0], tvm.thread_axis("threadIdx.x"))
+        fun = tvm.build(s, [A, C], target)
+        a = tvm.nd.empty((n,), A.dtype, ctx)
+        c = tvm.nd.empty((n,), A.dtype, ctx)
+        # Only need to test compiling here
+        fun(a, c)
+
+    if not tvm.gpu(0).exist or not tvm.module.enabled("cuda"):
+        print("skip because cuda is not enabled..")
+        return
+
+    ctx = tvm.context(target, 0)
+
+    check_inf_nan(ctx, 1, -float('inf'), 'float32')
+    check_inf_nan(ctx, 1, -float('inf'), 'float64')
+    check_inf_nan(ctx, 1, float('inf'), 'float32')
+    check_inf_nan(ctx, 1, float('inf'), 'float64')
+    check_inf_nan(ctx, 1, float('nan'), 'float32')
+    check_inf_nan(ctx, 1, float('nan'), 'float64')
+
+
 if __name__ == "__main__":
     test_cuda_vectorize_add()
     test_cuda_multiply_add()
     test_cuda_vectorize_load()
     test_cuda_make_int8x4()
+    test_cuda_inf_nan()
diff --git a/tests/python/unittest/test_codegen_opencl.py b/tests/python/unittest/test_codegen_opencl.py
@@ -66,6 +66,33 @@ def check_select(ctx, n, dtype):
     check_select(ctx, 1, 'int16')
     check_select(ctx, 1, 'uint16')
 
+def test_opencl_inf_nan():
+    def check_inf_nan(ctx, n, value, dtype):
+        A = tvm.placeholder((n,), name='A', dtype=dtype)
+        inf_value = tvm.const(value, dtype=dtype)
+        C = tvm.compute((n,), lambda i: inf_value, name='C')
+        s = tvm.create_schedule(C.op)
+        s[C].bind(s[C].op.axis[0], tvm.thread_axis("threadIdx.x"))
+        fun = tvm.build(s, [A, C], target)
+        a = tvm.nd.empty((n,), A.dtype, ctx)
+        c = tvm.nd.empty((n,), A.dtype, ctx)
+        # Only need to test compiling here
+        fun(a, c)
+
+    if not tvm.module.enabled(target):
+        print("skip because opencl is not enabled..")
+        return
+
+    ctx = tvm.context(target, 0)
+
+    check_inf_nan(ctx, 1, -float('inf'), 'float32')
+    check_inf_nan(ctx, 1, -float('inf'), 'float64')
+    check_inf_nan(ctx, 1, float('inf'), 'float32')
+    check_inf_nan(ctx, 1, float('inf'), 'float64')
+    check_inf_nan(ctx, 1, float('nan'), 'float32')
+    check_inf_nan(ctx, 1, float('nan'), 'float64')
+
 
 if __name__ == "__main__":
     test_opencl_ternary_expression()
+    test_opencl_inf_nan()