apache · comaniac · Mar 17, 2021 · Mar 9, 2021 · Mar 9, 2021 · Mar 11, 2021
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -36,6 +36,7 @@ tvm_option(USE_LLVM "Build with LLVM, can be set to specific llvm-config path" O
 tvm_option(USE_STACKVM_RUNTIME "Include stackvm into the runtime" OFF)
 tvm_option(USE_GRAPH_RUNTIME "Build with tiny graph runtime" ON)
 tvm_option(USE_GRAPH_RUNTIME_DEBUG "Build with tiny graph runtime debug mode" OFF)
+tvm_option(USE_GRAPH_RUNTIME_CUGRAPH "Build with tiny graph runtime cuGraph launch mode" OFF)
 tvm_option(USE_OPENMP "Build with OpenMP thread pool implementation" OFF)
 tvm_option(USE_RELAY_DEBUG "Building Relay in debug mode..." OFF)
 tvm_option(USE_RTTI "Build with RTTI" ON)
@@ -321,6 +322,16 @@ if(USE_GRAPH_RUNTIME)
     set_source_files_properties(${RUNTIME_GRAPH_SRCS}
       PROPERTIES COMPILE_DEFINITIONS "TVM_GRAPH_RUNTIME_DEBUG")
   endif(USE_GRAPH_RUNTIME_DEBUG)
+
+  if(USE_CUDA)
+    if(USE_GRAPH_RUNTIME_CUGRAPH)
+      message(STATUS "Build with Graph runtime cuGraph support...")
+      file(GLOB RUNTIME_CUGRAPH_SRCS src/runtime/graph/cugraph/*.cc)
+      list(APPEND RUNTIME_SRCS ${RUNTIME_CUGRAPH_SRCS})
+      set_source_files_properties(${RUNTIME_GRAPH_SRCS}
+        PROPERTIES COMPILE_DEFINITIONS "TVM_GRAPH_RUNTIME_CUGRAPH")
+    endif(USE_GRAPH_RUNTIME_CUGRAPH)
+  endif(USE_CUDA)
 endif(USE_GRAPH_RUNTIME)
 
 if(USE_VM_PROFILER)

diff --git a/cmake/config.cmake b/cmake/config.cmake
@@ -102,6 +102,9 @@ set(USE_GRAPH_RUNTIME ON)
 # Whether enable additional graph debug functions
 set(USE_GRAPH_RUNTIME_DEBUG OFF)
 
+# Whether enable tiny graph runtime for cudaGraph Launch
+set(USE_GRAPH_RUNTIME_CUGRAPH OFF)
+
 # Whether enable additional vm profiler functions
 set(USE_VM_PROFILER OFF)
 

diff --git a/python/tvm/contrib/cu_graph/cugraph_runtime.py b/python/tvm/contrib/cu_graph/cugraph_runtime.py
@@ -0,0 +1,62 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Graph runtime test cuGraph"""
+import tvm._ffi
+
+from tvm._ffi.base import string_types
+from tvm.contrib import graph_runtime
+
+
+def create(graph_json_str, libmod, ctx):
+    assert isinstance(graph_json_str, string_types)
+    try:
+        ctx, num_rpc_ctx, device_type_id = graph_runtime.get_device_ctx(libmod, ctx)
+        if num_rpc_ctx == len(ctx):
+            pass
+        else:
+            fcreate = tvm._ffi.get_global_func("tvm.graph_runtime_cugraph.create")
-        if num_rpc_ctx == len(ctx):
-            pass
-        else:
-            fcreate = tvm._ffi.get_global_func("tvm.graph_runtime_cugraph.create")
+        if num_rpc_ctx != len(ctx):
+            fcreate = tvm._ffi.get_global_func("tvm.graph_runtime_cugraph.create")
-        if num_rpc_ctx == len(ctx):
-            pass
-        else:
-            fcreate = tvm._ffi.get_global_func("tvm.graph_runtime_cugraph.create")
+        if num_rpc_ctx != len(ctx):
+            fcreate = tvm._ffi.get_global_func("tvm.graph_runtime_cugraph.create")
+    except ValueError:
+        raise ValueError(
+            "Please set '(USE_GRAPH_RUNTIME_CUGRAPH ON)' in "
+            "config.cmake and rebuild TVM to enable cu_graph test mode"
+        )
+
+    func_obj = fcreate(graph_json_str, libmod, *device_type_id)
+    return GraphModuleCuGraph(func_obj, ctx, graph_json_str)
+
+
+class GraphModuleCuGraph(graph_runtime.GraphModule):
+    def __init__(self, module, ctx, graph_json_str):
+
+        self._start_capture = module["start_capture"]
+        self._end_capture = module["end_capture"]
+        self._run_cuda_graph = module["run_cuda_graph"]
+
+        graph_runtime.GraphModule.__init__(self, module)
+
+    def capture_cuda_graph(self):
+        self._run()  # call cuModuleLoadData before cudaStream API
+
+        print("====== Start Stream Capture ======")
+        self._start_capture()
+        print("====== Start Run Ops On Stream ======")
+        self._run()
+        print("====== End Stream Capture ======")
+        self._end_capture()
+
+    def run_cuda_graph(self):
+        self._run_cuda_graph()
diff --git a/src/runtime/graph/cugraph/graph_runtime_cugraph.cc b/src/runtime/graph/cugraph/graph_runtime_cugraph.cc
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file graph_runtime_cugraph.cc
+ */
+
+#include <tvm/runtime/registry.h>
+
+#include "../../cuda/cuda_common.h"
+#include "../graph_runtime.h"
+
+namespace tvm {
+namespace runtime {
+
+class GraphRuntimeCuGraph : public GraphRuntime {
+ public:
+  int StartCapture() {
+    const TVMContext& ctx = data_entry_[entry_id(0, 0)]->ctx;
+
+    TVMStreamCreate(ctx.device_type, ctx.device_id, &capture_stream_);
+    TVMSetStream(ctx.device_type, ctx.device_id, capture_stream_);
+
+    CUDA_CALL(cudaStreamBeginCapture(static_cast<cudaStream_t>(capture_stream_),
+                                     cudaStreamCaptureModeGlobal));
+    return 0;
+  }
+
+  int RunCudaGraph() {
+    cudaStream_t cuStream = static_cast<cudaStream_t>(capture_stream_);
+    CUDA_CALL(cudaGraphLaunch(cu_graph_exec_, cuStream));
+    CUDA_CALL(cudaStreamSynchronize(cuStream));
+    return 0;
+  }
+
+  int EndCapture() {
+    cudaGraph_t graph;
+    CUDA_CALL(cudaStreamEndCapture(static_cast<cudaStream_t>(capture_stream_), &graph));
+
+    cudaGraphNode_t* nodes = NULL;
+    size_t numNodes = 0;
+    CUDA_CALL(cudaGraphGetNodes(graph, nodes, &numNodes));
+    LOG(INFO) << "Num of nodes in the cuda graph created using stream capture API = " << numNodes;
+
+    CUDA_CALL(cudaGraphInstantiate(&cu_graph_exec_, graph, NULL, NULL, 0));
+    return 0;
+  }
+
+  /*!
+   * \brief GetFunction Get the function based on input.
+   * \param name The function which needs to be invoked.
+   * \param sptr_to_self Packed function pointer.
+   */
+  PackedFunc GetFunction(const std::string& name, const ObjectPtr<Object>& sptr_to_self);
+
+ private:
+  TVMStreamHandle capture_stream_;
+  cudaGraphExec_t cu_graph_exec_;
+};
+
+PackedFunc GraphRuntimeCuGraph::GetFunction(const std::string& name,
+                                            const ObjectPtr<Object>& sptr_to_self) {
+  if (name == "run_cuda_graph") {
+    return PackedFunc(
+        [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->RunCudaGraph(); });
+  } else if (name == "start_capture") {
+    return PackedFunc(
+        [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->StartCapture(); });
+  } else if (name == "end_capture") {
+    return PackedFunc(
+        [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->EndCapture(); });
+  } else {
+    return GraphRuntime::GetFunction(name, sptr_to_self);
+  }
+}
+
+Module GraphRuntimeCuGraphCreate(const std::string& sym_json, const tvm::runtime::Module& m,
+                                 const std::vector<TVMContext>& ctxs,
+                                 PackedFunc lookup_linked_param_func) {
+  auto exec = make_object<GraphRuntimeCuGraph>();
+  exec->Init(sym_json, m, ctxs, lookup_linked_param_func);
+  return Module(exec);
+}
+
+TVM_REGISTER_GLOBAL("tvm.graph_runtime_cugraph.create").set_body([](TVMArgs args, TVMRetValue* rv) {
+  ICHECK_GE(args.num_args, 4) << "The expected number of arguments for graph_runtime.create is "
+                                 "at least 4, but it has "
+                              << args.num_args;
+  PackedFunc lookup_linked_param_func;
+  int ctx_start_arg = 2;
+  if (args[2].type_code() == kTVMPackedFuncHandle) {
+    lookup_linked_param_func = args[2];
+    ctx_start_arg++;
+  }
+
+  *rv = GraphRuntimeCuGraphCreate(args[0], args[1], GetAllContext(args, ctx_start_arg),
+                                  lookup_linked_param_func);
+});
+}  // namespace runtime
+}  // namespace tvm
diff --git a/tests/python/unittest/test_runtime_graph_cugraph.py b/tests/python/unittest/test_runtime_graph_cugraph.py
@@ -0,0 +1,92 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import json
+import os
+import re
+import sys
+import time
+
+import pytest
+
+import tvm
+import tvm.testing
+from tvm import te
+import numpy as np
+
+from tvm.contrib import utils, graph_runtime
+from tvm.contrib.cu_graph import cugraph_runtime
+
+
+bx = te.thread_axis("blockIdx.x")
+tx = te.thread_axis("threadIdx.x")
+
+
+@tvm.testing.requires_cuda
+def test_graph_simple():
+    n = 32
+    A = te.placeholder((n,), name="A")
+    B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name="B")
+    s = te.create_schedule(B.op)
+    xo, xi = s[B].split(B.op.axis[0], factor=8)
+    s[B].bind(xo, bx)
+    s[B].bind(xi, tx)
+
+    node0 = {"op": "null", "name": "x", "inputs": []}
+    node1 = {
+        "op": "tvm_op",
+        "name": "add",
+        "inputs": [[0, 0, 0]],
+        "attrs": {"func_name": "myadd", "flatten_data": "1", "num_inputs": "1", "num_outputs": "1"},
+    }
+    nodes = [node0, node1]
+    arg_nodes = [0]
+    node_row_ptr = [0, 1, 2]
+    outputs = [[1, 0, 0]]
+    shape = (n,)
+    attrs = {
+        "shape": ["list_shape", [shape, shape]],
+        "dltype": ["list_str", ["float32", "float32"]],
+        "storage_id": ["list_int", [0, 1]],
+    }
+    graph = {
+        "nodes": nodes,
+        "arg_nodes": arg_nodes,
+        "node_row_ptr": node_row_ptr,
+        "heads": outputs,
+        "attrs": attrs,
+    }
+    graph = json.dumps(graph)
+
+    def check_verify():
+        mlib = tvm.build(s, [A, B], "cuda", name="myadd")
+        ctx = tvm.gpu(0)
+        try:
+            mod = cugraph_runtime.create(graph, mlib, ctx)
+        except ValueError:
+            return
+        mod.capture_cuda_graph()
+        a = np.random.uniform(size=(n,)).astype(A.dtype)
+        mod.set_input(x=a)
+        mod.run_cuda_graph()
+        out = mod.get_output(0, tvm.nd.empty((n,)))
+        np.testing.assert_equal(out.asnumpy(), a + 1)
+
+    check_verify()
+
+
+if __name__ == "__main__":
+    test_graph_simple()