Merge branch 'taichi-dev:master' into amdgpu_backend

taichi-dev · Oct 31, 2022 · fddddcd · fddddcd
2 parents 7b01a65 + 8afb2dd
commit fddddcd
Show file tree

Hide file tree

Showing 17 changed files with 102 additions and 32 deletions.
diff --git a/.github/workflows/scripts/aot-demo.sh b/.github/workflows/scripts/aot-demo.sh
@@ -16,14 +16,9 @@ function build-and-smoke-test-android-aot-demo {
     export TAICHI_REPO_DIR=$(pwd)/taichi
 
     rm -rf taichi-aot-demo
+    # IF YOU PIN THIS TO A COMMIT/BRANCH, YOU'RE RESPONSIBLE TO REVERT IT BACK TO MASTER ONCE MERGED.
     git clone https://github.com/taichi-dev/taichi-aot-demo
 
-    # Normally we checkout the master's commit Id: https://github.com/taichi-dev/taichi-aot-demo/commit/master
-    # As for why we need this explicit commit Id here, refer to: https://docs.taichi-lang.org/docs/master/contributor_guide#handle-special-ci-failures
-    pushd taichi-aot-demo
-    git checkout 6b8d22f2c38318cf7a7333dc17cff4ae7ee5e607
-    popd
-
     APP_ROOT=taichi-aot-demo/implicit_fem
     ANDROID_APP_ROOT=$APP_ROOT/android
     JNI_PATH=$ANDROID_APP_ROOT/app/src/main/jniLibs/arm64-v8a/
@@ -103,7 +98,7 @@ function build-and-test-headless-demo {
     popd
 
     rm -rf taichi-aot-demo
-    git clone --recursive --depth=1 https://github.com/taichi-dev/taichi-aot-demo -b update-aot-module1
+    git clone --recursive --depth=1 https://github.com/taichi-dev/taichi-aot-demo
     cd taichi-aot-demo
     mkdir build
     pushd build

diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml
@@ -383,6 +383,7 @@ jobs:
             -DTI_WITH_VULKAN:BOOL=ON
             -DTI_BUILD_TESTS:BOOL=ON
             -DTI_WITH_C_API=ON
+          LLVM_VERSION: 15
 
       - name: Test
         id: test

diff --git a/c_api/docs/taichi/taichi_vulkan.h.md b/c_api/docs/taichi/taichi_vulkan.h.md
@@ -31,6 +31,8 @@ Necessary detail to share the same piece of Vulkan buffer between Taichi and ext
 - `structure.vulkan_memory_interop_info.buffer`: Vulkan buffer.
 - `structure.vulkan_memory_interop_info.size`: Size of the piece of memory in bytes.
 - `structure.vulkan_memory_interop_info.usage`: Vulkan buffer usage. In most of the cases, Taichi requires the `VK_BUFFER_USAGE_STORAGE_BUFFER_BIT`.
+- `structure.vulkan_memory_interop_info.memory`: Device memory binded to the Vulkan buffer.
+- `structure.vulkan_memory_interop_info.offset`: Offset in `VkDeviceMemory` object to the beginning of this allocation, in bytes.
 
 `structure.vulkan_image_interop_info`
 

diff --git a/c_api/include/taichi/taichi_vulkan.h b/c_api/include/taichi/taichi_vulkan.h
@@ -60,6 +60,11 @@ typedef struct TiVulkanMemoryInteropInfo {
   // Vulkan buffer usage. In most of the cases, Taichi requires the
   // `VK_BUFFER_USAGE_STORAGE_BUFFER_BIT`.
   VkBufferUsageFlags usage;
+  // DeviceMemory binded to the buffer.
+  VkDeviceMemory memory;
+  // Offset in `VkDeviceMemory` object to the beginning of this allocation, in
+  // bytes.
+  uint64_t offset;
 } TiVulkanMemoryInteropInfo;
 
 // Structure `TiVulkanImageInteropInfo`

diff --git a/c_api/src/taichi_vulkan_impl.cpp b/c_api/src/taichi_vulkan_impl.cpp
@@ -236,9 +236,15 @@ void ti_export_vulkan_memory(TiRuntime runtime,
   VulkanRuntime *runtime2 = ((Runtime *)runtime)->as_vk();
   taichi::lang::DeviceAllocation devalloc = devmem2devalloc(*runtime2, memory);
   vkapi::IVkBuffer buffer = runtime2->get_vk().get_vkbuffer(devalloc);
+
+  auto [vk_mem, offset, __] =
+      runtime2->get_vk().get_vkmemory_offset_size(devalloc);
+
   interop_info->buffer = buffer.get()->buffer;
   interop_info->size = buffer.get()->size;
   interop_info->usage = buffer.get()->usage;
+  interop_info->memory = vk_mem;
+  interop_info->offset = (uint64_t)offset;
 }
 TiImage ti_import_vulkan_image(TiRuntime runtime,
                                const TiVulkanImageInteropInfo *interop_info,

diff --git a/c_api/taichi.json b/c_api/taichi.json
@@ -1064,6 +1064,14 @@
                         {
                             "name": "usage",
                             "type": "VkBufferUsageFlags"
+                        },
+                        {
+                            "name": "memory",
+                            "type": "VkDeviceMemory"
+                        },
+                        {
+                            "name": "offset",
+                            "type": "uint64_t"
                         }
                     ]
                 },

diff --git a/misc/prtags.json b/misc/prtags.json
@@ -48,5 +48,6 @@
   "simt"            : "SIMT programming",
   "release"         : "Release",
   "build"           : "Build system",
-  "rfc"             : "RFC"
+  "rfc"             : "RFC",
+  "amdgpu"          : "AMDGPU backend"
 }
diff --git a/python/taichi/lang/ast/ast_transformer.py b/python/taichi/lang/ast/ast_transformer.py
@@ -13,7 +13,7 @@
 from taichi.lang.ast.ast_transformer_utils import (Builder, LoopStatus,
                                                    ReturnStatus)
 from taichi.lang.ast.symbol_resolver import ASTResolver
-from taichi.lang.exception import TaichiSyntaxError
+from taichi.lang.exception import TaichiSyntaxError, TaichiTypeError
 from taichi.lang.expr import Expr
 from taichi.lang.field import Field
 from taichi.lang.impl import current_cfg
@@ -426,8 +426,11 @@ def build_call_if_is_builtin(ctx, node, args, keywords):
                 name = "min" if func is min else "max"
                 warnings.warn_explicit(
                     f'Calling builtin function "{name}" in Taichi scope is deprecated. '
-                    f'Please use "ti.{name}" instead.', DeprecationWarning,
-                    ctx.file, node.lineno + ctx.lineno_offset)
+                    f'Please use "ti.{name}" instead.',
+                    DeprecationWarning,
+                    ctx.file,
+                    node.lineno + ctx.lineno_offset,
+                    module="taichi")
             return True
         return False
 
@@ -471,8 +474,11 @@ def warn_if_is_external_func(ctx, node):
             f'Calling non-taichi function "{name}". '
             f'Scope inside the function is not processed by the Taichi AST transformer. '
             f'The function may not work as expected. Proceed with caution! '
-            f'Maybe you can consider turning it into a @ti.func?', UserWarning,
-            ctx.file, node.lineno + ctx.lineno_offset)
+            f'Maybe you can consider turning it into a @ti.func?',
+            UserWarning,
+            ctx.file,
+            node.lineno + ctx.lineno_offset,
+            module="taichi")
 
     @staticmethod
     def build_Call(ctx, node):
@@ -797,7 +803,10 @@ def build_BinOp(ctx, node):
             ast.BitAnd: lambda l, r: l & r,
             ast.MatMult: lambda l, r: l @ r,
         }.get(type(node.op))
-        node.ptr = op(node.left.ptr, node.right.ptr)
+        try:
+            node.ptr = op(node.left.ptr, node.right.ptr)
+        except TypeError as e:
+            raise TaichiTypeError(str(e))
         return node.ptr
 
     @staticmethod
@@ -902,8 +911,10 @@ def build_Compare(ctx, node):
                 name = "is" if isinstance(node_op, ast.Is) else "is not"
                 warnings.warn_explicit(
                     f'Operator "{name}" in Taichi scope is deprecated. Please avoid using it.',
-                    DeprecationWarning, ctx.file,
-                    node.lineno + ctx.lineno_offset)
+                    DeprecationWarning,
+                    ctx.file,
+                    node.lineno + ctx.lineno_offset,
+                    module="taichi")
             if op is None:
                 if type(node_op) in ops_static:
                     raise TaichiSyntaxError(
@@ -1324,8 +1335,11 @@ def build_IfExp(ctx, node):
             warnings.warn_explicit(
                 'Using conditional expression for element-wise select operation on '
                 'Taichi vectors/matrices is deprecated. '
-                'Please use "ti.select" instead.', DeprecationWarning,
-                ctx.file, node.lineno + ctx.lineno_offset)
+                'Please use "ti.select" instead.',
+                DeprecationWarning,
+                ctx.file,
+                node.lineno + ctx.lineno_offset,
+                module="taichi")
             return node.ptr
 
         is_static_if = (ASTTransformer.get_decorator(ctx,

diff --git a/taichi/codegen/cuda/codegen_cuda.cpp b/taichi/codegen/cuda/codegen_cuda.cpp
@@ -780,12 +780,6 @@ FunctionType CUDAModuleToFunctionConverter::convert(
   auto &mod = data.module;
   auto &tasks = data.tasks;
 #ifdef TI_WITH_CUDA
-  for (const auto &task : tasks) {
-    llvm::Function *func = mod->getFunction(task.name);
-    TI_ASSERT(func);
-    tlctx_->mark_function_as_cuda_kernel(func, task.block_dim);
-  }
-
   auto jit = tlctx_->jit.get();
   auto cuda_module =
       jit->add_module(std::move(mod), executor_->get_config()->gpu_max_reg);

diff --git a/taichi/codegen/llvm/codegen_llvm.cpp b/taichi/codegen/llvm/codegen_llvm.cpp
@@ -2749,6 +2749,15 @@ LLVMCompiledTask TaskCodeGenLLVM::run_compilation() {
   emit_to_module();
   eliminate_unused_functions();
 
+  if (config.arch == Arch::cuda) {
+    // CUDA specific metadata
+    for (const auto &task : offloaded_tasks) {
+      llvm::Function *func = module->getFunction(task.name);
+      TI_ASSERT(func);
+      tlctx->mark_function_as_cuda_kernel(func, task.block_dim);
+    }
+  }
+
   return {std::move(offloaded_tasks), std::move(module),
           std::move(used_tree_ids), std::move(struct_for_tls_sizes)};
 }

diff --git a/taichi/ir/snode.cpp b/taichi/ir/snode.cpp
@@ -91,8 +91,8 @@ SNode &SNode::create_node(std::vector<Axis> axes,
   }
   if (acc_shape > std::numeric_limits<int>::max()) {
     TI_WARN(
-        "Snode index might be out of int32 boundary but int64 indexing is not "
-        "supported yet.");
+        "SNode index might be out of int32 boundary but int64 indexing is not "
+        "supported yet. Struct fors might not work either.");
   }
   new_node.num_cells_per_container = acc_shape;
   // infer extractors (only for POT)

diff --git a/taichi/rhi/vulkan/vulkan_device_creator.cpp b/taichi/rhi/vulkan/vulkan_device_creator.cpp
@@ -738,7 +738,8 @@ void VulkanDeviceCreator::create_logical_device(bool manual_create) {
         if (device_supported_features.shaderInt64) {
 // Temporarily disable it on macOS:
 // https://github.com/taichi-dev/taichi/issues/6295
-#if !defined(__APPLE__)
+// (penguinliong) Temporarily disabled (until device capability is ready).
+#if !defined(__APPLE__) && false
           caps.set(DeviceCapability::spirv_has_physical_storage_buffer, true);
 #endif
         }

diff --git a/taichi/runtime/cuda/jit_cuda.cpp b/taichi/runtime/cuda/jit_cuda.cpp
@@ -77,7 +77,7 @@ std::string JITSessionCUDA::compile_module_to_ptx(
   // Part of this function is borrowed from Halide::CodeGen_PTX_Dev.cpp
   if (llvm::verifyModule(*module, &llvm::errs())) {
     module->print(llvm::errs(), nullptr);
-    TI_WARN("Module broken");
+    TI_ERROR("LLVM Module broken");
   }
 
   using namespace llvm;

diff --git a/taichi/transforms/cache_loop_invariant_global_vars.cpp b/taichi/transforms/cache_loop_invariant_global_vars.cpp
@@ -122,9 +122,8 @@ class CacheLoopInvariantGlobalVars : public LoopInvariantDetector {
         std::make_unique<AllocaStmt>(dest->ret_type.ptr_removed());
     auto alloca_stmt = alloca_unique.get();
     modifier.insert_before(current_loop_stmt(), std::move(alloca_unique));
-    if (status == CacheStatus::Read) {
-      set_init_value(alloca_stmt, dest);
-    } else if (status == CacheStatus::Write) {
+    set_init_value(alloca_stmt, dest);
+    if (status == CacheStatus::Write) {
       add_writeback(alloca_stmt, dest);
     }
     cached_maps.top()[dest] = {status, alloca_stmt};

diff --git a/taichi/transforms/demote_dense_struct_fors.cpp b/taichi/transforms/demote_dense_struct_fors.cpp
@@ -26,7 +26,6 @@ void convert_to_range_for(OffloadedStmt *offloaded, bool packed) {
     snode = snode->parent;
   }
   std::reverse(snodes.begin(), snodes.end());
-  TI_ASSERT(total_bits <= 30);
 
   // general shape calculation - no dependence on POT
   int64 total_n = 1;
@@ -38,6 +37,7 @@ void convert_to_range_for(OffloadedStmt *offloaded, bool packed) {
     }
     total_n *= s->num_cells_per_container;
   }
+  TI_ASSERT(total_n <= std::numeric_limits<int>::max());
 
   offloaded->const_begin = true;
   offloaded->const_end = true;

diff --git a/tests/python/test_for_break.py b/tests/python/test_for_break.py
@@ -110,3 +110,23 @@ def test_kernel() -> ti.i32:
         return sum
 
     assert test_kernel() == 25
+
+
+@test_utils.test()
+def test_write_after_break():
+    a = ti.field(ti.i32, shape=5)
+    a.fill(-1)
+
+    @ti.kernel
+    def foo():
+        ti.loop_config(serialize=True)
+        for i in range(5):
+            while True:
+                if i > 3:
+                    break
+                a[i] = i
+                break
+
+    foo()
+
+    assert a[4] == -1
diff --git a/tests/python/test_struct_for_non_pot.py b/tests/python/test_struct_for_non_pot.py
@@ -66,3 +66,18 @@ def test_2d():
 @test_utils.test(require=ti.extension.packed, packed=True)
 def test_2d_packed():
     _test_2d()
+
+
+@test_utils.test(require=ti.extension.packed, packed=True)
+def test_2d_overflow_if_not_packed():
+    n, m, p = 2**9 + 1, 2**9 + 1, 2**10 + 1
+    arr = ti.field(ti.u8, (n, m, p))
+
+    @ti.kernel
+    def count() -> ti.i32:
+        res = 0
+        for _ in ti.grouped(arr):
+            res += 1
+        return res
+
+    assert count() == n * m * p