From e1611d6726982708bfe1a3d9120bd226265ce7d3 Mon Sep 17 00:00:00 2001 From: mei-ye Date: Fri, 22 May 2020 21:45:39 +0000 Subject: [PATCH] enable amd_apu device on vulkan target --- apps/benchmark/gpu_imagenet_bench.py | 10 +++++++--- python/tvm/autotvm/measure/measure_methods.py | 2 +- python/tvm/autotvm/tophub.py | 2 ++ src/runtime/vulkan/vulkan.cc | 14 ++++++++++++-- 4 files changed, 22 insertions(+), 6 deletions(-) diff --git a/apps/benchmark/gpu_imagenet_bench.py b/apps/benchmark/gpu_imagenet_bench.py index dfb0445bf214..00237006a2d3 100644 --- a/apps/benchmark/gpu_imagenet_bench.py +++ b/apps/benchmark/gpu_imagenet_bench.py @@ -56,13 +56,17 @@ def benchmark(network, target): 'vgg-16', 'vgg-19', 'densenet-121', 'inception_v3', 'mobilenet', 'squeezenet_v1.0', 'squeezenet_v1.1'], help='The name of neural network') + parser.add_argument("--device", type=str, + choices=['amd_apu'], default='amd_apu', + help="The name of the test device. If your device is not listed in " + "the choices list, pick the most similar one as argument.") parser.add_argument("--model", type=str, - choices=['1080ti', 'titanx', 'tx2', 'gfx900'], default='1080ti', + choices=['1080ti', 'titanx', 'tx2', 'gfx900', 'v1000'], default='1080ti', help="The model of the test device. If your device is not listed in " "the choices list, pick the most similar one as argument.") parser.add_argument("--repeat", type=int, default=600) parser.add_argument("--target", type=str, - choices=['cuda', 'opencl', 'rocm', 'nvptx', 'metal'], default='cuda', + choices=['cuda', 'opencl', 'rocm', 'nvptx', 'metal', 'vulkan'], default='cuda', help="The tvm compilation target") parser.add_argument("--thread", type=int, default=1, help="The number of threads to be run.") args = parser.parse_args() @@ -74,7 +78,7 @@ def benchmark(network, target): else: networks = [args.network] - target = tvm.target.create('%s -model=%s' % (args.target, args.model)) + target = tvm.target.create('%s -device=%s -model=%s' % (args.target, args.device, args.model)) print("--------------------------------------------------") print("%-20s %-20s" % ("Network Name", "Mean Inference Time (std dev)")) diff --git a/python/tvm/autotvm/measure/measure_methods.py b/python/tvm/autotvm/measure/measure_methods.py index 8f11a17920b4..45c35e5913eb 100644 --- a/python/tvm/autotvm/measure/measure_methods.py +++ b/python/tvm/autotvm/measure/measure_methods.py @@ -231,7 +231,7 @@ def set_task(self, task): def get_build_kwargs(self): kwargs = {} if 'cuda' in self.task.target.keys or 'opencl' in self.task.target.keys or \ - 'rocm' in self.task.target.keys: + 'rocm' in self.task.target.keys or 'vulkan' in self.task.target.keys: remote = request_remote(self.key, self.host, self.port) ctx = remote.context(str(self.task.target), 0) max_dims = ctx.max_thread_dimensions diff --git a/python/tvm/autotvm/tophub.py b/python/tvm/autotvm/tophub.py index 3fbccfe80ded..a00133709bea 100644 --- a/python/tvm/autotvm/tophub.py +++ b/python/tvm/autotvm/tophub.py @@ -56,6 +56,7 @@ 'intel_graphics': "v0.02", 'vta': "v0.08", + 'amd_apu': "v0.01", } logger = logging.getLogger('autotvm') @@ -69,6 +70,7 @@ def _alias(name): 'webgpu': 'opencl', 'vulkan': 'opencl', 'nvptx': 'cuda', + 'amd_apu': 'amd_apu' } return table.get(name, name) diff --git a/src/runtime/vulkan/vulkan.cc b/src/runtime/vulkan/vulkan.cc index ef4b9b020f95..44810116c3c2 100644 --- a/src/runtime/vulkan/vulkan.cc +++ b/src/runtime/vulkan/vulkan.cc @@ -368,7 +368,7 @@ void VulkanDeviceAPI::GetAttr(TVMContext ctx, DeviceAttrKind kind, TVMRetValue* case kMaxThreadsPerBlock: { VkPhysicalDeviceProperties phy_prop; vkGetPhysicalDeviceProperties(vctx.phy_device, &phy_prop); - int64_t value = phy_prop.limits.maxComputeWorkGroupSize[0]; + int64_t value = phy_prop.limits.maxComputeWorkGroupInvocations; *rv = value; break; } @@ -401,8 +401,18 @@ void VulkanDeviceAPI::GetAttr(TVMContext ctx, DeviceAttrKind kind, TVMRetValue* return; case kExist: break; - case kMaxThreadDimensions: + case kMaxThreadDimensions: { + VkPhysicalDeviceProperties phy_prop; + vkGetPhysicalDeviceProperties(vctx.phy_device, &phy_prop); + int64_t dims[3]; + dims[0] = phy_prop.limits.maxComputeWorkGroupSize[0]; + dims[1] = phy_prop.limits.maxComputeWorkGroupSize[1]; + dims[2] = phy_prop.limits.maxComputeWorkGroupSize[2]; + std::stringstream ss; // use json string to return multiple int values; + ss << "[" << dims[0] << ", " << dims[1] << ", " << dims[2] << "]"; + *rv = ss.str(); break; + } case kGcnArch: return; }