We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 68af215 commit 471cc7fCopy full SHA for 471cc7f
src/tl_templates/cuda/common.h
@@ -330,3 +330,8 @@ TL_DEVICE void __sync_thread_partial() {
330
asm volatile("bar.sync %0, %1;" : : "r"(barrier_id), "r"(thread_count));
331
}
332
} // namespace tl
333
+
334
+namespace cutlass {
335
+TL_DEVICE
336
+bfloat16_t fast_exp(bfloat16_t x) { return ::hexp(x); }
337
+} // namespace cutlass
0 commit comments