Skip to content

Commit bcf38cf

Browse files
Artem Gindinsonromanovvlad
authored andcommitted
[SYCL] Add initial support of AOT compilation for multiple devices
This patch adds handling of "target" device image field both in SYCL runtime. This enables basic support of multiple device images. The natural scenario goes as follows: - a kernel needs to be run on a particular device; - if it was AOT-compiled as part of a binary image for this device, load the image; - if the appropriate AOT-compiled image was not found, a SPIR-V image is chosen and JIT-compiled instead. This initial mechanism provides limited functionality, simply picking the first image that has appropriate binary target. Signed-off-by: Sergey Semenov <sergey.semenov@intel.com> Signed-off-by: Artem Gindinson <artem.gindinson@intel.com>
1 parent 6954406 commit bcf38cf

File tree

9 files changed

+479
-16
lines changed

9 files changed

+479
-16
lines changed

sycl/include/CL/sycl/detail/pi.h

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ typedef enum {
5656
PI_INVALID_CONTEXT = CL_INVALID_CONTEXT,
5757
PI_INVALID_PLATFORM = CL_INVALID_PLATFORM,
5858
PI_INVALID_DEVICE = CL_INVALID_DEVICE,
59+
PI_INVALID_BINARY = CL_INVALID_BINARY,
5960
PI_MISALIGNED_SUB_BUFFER_OFFSET = CL_MISALIGNED_SUB_BUFFER_OFFSET,
6061
PI_OUT_OF_HOST_MEMORY = CL_OUT_OF_HOST_MEMORY
6162
} _pi_result;
@@ -245,9 +246,20 @@ static const uint8_t PI_DEVICE_BINARY_OFFLOAD_KIND_SYCL = 4;
245246
/// Target identification strings for
246247
/// pi_device_binary_struct.DeviceTargetSpec
247248
///
249+
/// A device type represented by a particular target
250+
/// triple requires specific binary images. We need
251+
/// to map the image type onto the device target triple
252+
///
248253
#define PI_DEVICE_BINARY_TARGET_UNKNOWN "<unknown>"
254+
/// SPIR-V 32-bit image <-> 32-bit OpenCL device
249255
#define PI_DEVICE_BINARY_TARGET_SPIRV32 "spir"
250-
#define PI_DEVICE_BINARY_TARGET_SPIRV64 "spir64";
256+
/// SPIR-V 64-bit image <-> 64-bit OpenCL device
257+
#define PI_DEVICE_BINARY_TARGET_SPIRV64 "spir64"
258+
/// Device-specific binary images produced from SPIR-V 64-bit <->
259+
/// various triples for specific 64-bit OpenCL devices
260+
#define PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64 "spir64_x86_64"
261+
#define PI_DEVICE_BINARY_TARGET_SPIRV64_GEN "spir64_gen"
262+
#define PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA "spir64_fpga"
251263

252264
/// This struct is a record of the device binary information. If the Kind field
253265
/// denotes a portable binary type (SPIRV or LLVMIR), the DeviceTargetSpec field
@@ -264,6 +276,13 @@ struct pi_device_binary_struct {
264276
/// format of the binary data - SPIRV, LLVMIR bitcode,...
265277
uint8_t Format;
266278
/// null-terminated string representation of the device's target architecture
279+
/// which holds one of:
280+
/// PI_DEVICE_BINARY_TARGET_UNKNOWN - unknown
281+
/// PI_DEVICE_BINARY_TARGET_SPIRV32 - general value for 32-bit OpenCL devices
282+
/// PI_DEVICE_BINARY_TARGET_SPIRV64 - general value for 64-bit OpenCL devices
283+
/// PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64 - 64-bit OpenCL CPU device
284+
/// PI_DEVICE_BINARY_TARGET_SPIRV64_GEN - GEN GPU device (64-bit OpenCL)
285+
/// PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA - 64-bit OpenCL FPGA device
267286
const char *DeviceTargetSpec;
268287
/// a null-terminated string; target- and compiler-specific options
269288
/// which are suggested to use to "build" program at runtime
@@ -405,7 +424,7 @@ pi_result piDevicePartition(
405424
/// and the IR characteristics.
406425
///
407426
pi_result piextDeviceSelectBinary(
408-
pi_device device, // TODO: does this need to be context?
427+
pi_device device,
409428
pi_device_binary * binaries,
410429
pi_uint32 num_binaries,
411430
pi_device_binary * selected_binary);

sycl/source/detail/pi_opencl.cpp

Lines changed: 61 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -59,13 +59,16 @@ pi_result OCL(piDevicesGet)(pi_platform platform,
5959
return cast<pi_result>(result);
6060
}
6161

62-
pi_result OCL(piextDeviceSelectBinary)(
63-
pi_device device, // TODO: does this need to be context?
64-
pi_device_binary * images,
65-
pi_uint32 num_images,
66-
pi_device_binary * selected_image) {
67-
68-
// TODO dummy implementation.
62+
pi_result OCL(piextDeviceSelectBinary)(pi_device device,
63+
pi_device_binary *images,
64+
pi_uint32 num_images,
65+
pi_device_binary *selected_image) {
66+
67+
// TODO: this is a bare-bones implementation for choosing a device image
68+
// that would be compatible with the targeted device. An AOT-compiled
69+
// image is preferred over SPIRV for known devices (i.e. Intel devices)
70+
// The implementation makes no effort to differentiate between multiple images
71+
// for the given device, and simply picks the first one compatible
6972
// Real implementaion will use the same mechanism OpenCL ICD dispatcher
7073
// uses. Somthing like:
7174
// PI_VALIDATE_HANDLE_RETURN_HANDLE(ctx, PI_INVALID_CONTEXT);
@@ -74,8 +77,56 @@ pi_result OCL(piextDeviceSelectBinary)(
7477
// where context->dispatch is set to the dispatch table provided by PI
7578
// plugin for platform/device the ctx was created for.
7679

77-
*selected_image = num_images > 0 ? images[0] : nullptr;
78-
return PI_SUCCESS;
80+
// Choose the binary target for the provided device
81+
const char *image_target = nullptr;
82+
// Get the type of the device
83+
cl_device_type device_type;
84+
cl_int ret_err = clGetDeviceInfo(cast<cl_device_id>(device), CL_DEVICE_TYPE,
85+
sizeof(cl_device_type), &device_type, nullptr);
86+
if (ret_err != CL_SUCCESS) {
87+
*selected_image = nullptr;
88+
return cast<pi_result>(ret_err);
89+
}
90+
91+
switch (device_type) {
92+
// TODO: Factor out vendor specifics into a separate source
93+
// E.g. sycl/source/detail/vendor/intel/detail/pi_opencl.cpp?
94+
95+
// We'll attempt to find an image that was AOT-compiled
96+
// from a SPIR-V image into an image specific for:
97+
98+
case CL_DEVICE_TYPE_CPU: // OpenCL 64-bit CPU
99+
image_target = PI_DEVICE_BINARY_TARGET_SPIRV64_X86_64;
100+
break;
101+
case CL_DEVICE_TYPE_GPU: // OpenCL 64-bit GEN GPU
102+
image_target = PI_DEVICE_BINARY_TARGET_SPIRV64_GEN;
103+
break;
104+
case CL_DEVICE_TYPE_ACCELERATOR: // OpenCL 64-bit FPGA
105+
image_target = PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA;
106+
break;
107+
default:
108+
// Otherwise, we'll attempt to find and JIT-compile
109+
// a device-independent SPIR-V image
110+
image_target = PI_DEVICE_BINARY_TARGET_SPIRV64;
111+
break;
112+
}
113+
114+
// Find the appropriate device image, fallback to spirv if not found
115+
pi_device_binary fallback = nullptr;
116+
for (size_t i = 0; i < num_images; ++i) {
117+
if (strcmp(images[i]->DeviceTargetSpec, image_target) == 0) {
118+
*selected_image = images[i];
119+
return PI_SUCCESS;
120+
}
121+
if (strcmp(images[i]->DeviceTargetSpec, PI_DEVICE_BINARY_TARGET_SPIRV64) ==
122+
0)
123+
fallback = images[i];
124+
}
125+
// Points to a spirv image, if such indeed was found
126+
if ((*selected_image = fallback))
127+
return PI_SUCCESS;
128+
// No image can be loaded for the given device
129+
return PI_INVALID_BINARY;
79130
}
80131

81132
pi_result OCL(piQueueCreate)(pi_context context, pi_device device,
@@ -290,7 +341,7 @@ _PI_CL(piDeviceRetain, clRetainDevice)
290341
_PI_CL(piDeviceRelease, clReleaseDevice)
291342
_PI_CL(piextDeviceSelectBinary, OCL(piextDeviceSelectBinary))
292343
_PI_CL(piextGetDeviceFunctionPointer, OCL(piextGetDeviceFunctionPointer))
293-
// Context
344+
// Context
294345
_PI_CL(piContextCreate, clCreateContext)
295346
_PI_CL(piContextGetInfo, clGetContextInfo)
296347
_PI_CL(piContextRetain, clRetainContext)

sycl/source/detail/program_manager/program_manager.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,7 @@ RT::PiProgram ProgramManager::loadProgram(OSModuleHandle M,
263263
<< getRawSyclObjImpl(Context) << ")\n";
264264
}
265265

266+
const RT::PiContext &Ctx = getRawSyclObjImpl(Context)->getHandleRef();
266267
DeviceImage *Img = nullptr;
267268
bool UseKernelSpv = false;
268269
const std::string UseSpvEnv("SYCL_USE_KERNEL_SPV");
@@ -309,6 +310,10 @@ RT::PiProgram ProgramManager::loadProgram(OSModuleHandle M,
309310
std::cerr << "loaded device image from " << Fname << "\n";
310311
}
311312
} else {
313+
// TODO: There may be cases with cl::sycl::program class usage in source code
314+
// that will result in a multi-device context. This case needs to be handled
315+
// here or at the program_impl class level
316+
312317
// Take all device images in module M and ask the native runtime under the
313318
// given context to choose one it prefers.
314319
auto ImgIt = m_DeviceImages.find(M);
@@ -318,8 +323,8 @@ RT::PiProgram ProgramManager::loadProgram(OSModuleHandle M,
318323
}
319324
std::vector<DeviceImage *> *Imgs = (ImgIt->second).get();
320325

321-
PI_CALL(RT::piextDeviceSelectBinary(
322-
0, Imgs->data(), (cl_uint)Imgs->size(), &Img));
326+
PI_CALL(RT::piextDeviceSelectBinary(getFirstDevice(Ctx), Imgs->data(),
327+
(cl_uint)Imgs->size(), &Img));
323328

324329
if (DbgProgMgr > 0) {
325330
std::cerr << "available device images:\n";
@@ -400,7 +405,6 @@ RT::PiProgram ProgramManager::loadProgram(OSModuleHandle M,
400405
// Load the selected image
401406
if (!is_device_binary_type_supported(Context, Format))
402407
throw feature_not_supported("Online compilation is not supported in this context");
403-
const RT::PiContext &Ctx = getRawSyclObjImpl(Context)->getHandleRef();
404408
RT::PiProgram Res = nullptr;
405409
Res = Format == PI_DEVICE_BINARY_TYPE_SPIRV
406410
? createSpirvProgram(Ctx, Img->BinaryStart, ImgSize)

sycl/test/aot/accelerator.cpp

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
// REQUIRES: aoc
2+
3+
// RUN: %clangxx -fsycl -fsycl-targets=spir64_fpga-unknown-linux-sycldevice %s -o %t.out
4+
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
5+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
6+
7+
//==----- accelerator.cpp - AOT compilation for fpga devices using aoc ------==//
8+
//
9+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
10+
// See https://llvm.org/LICENSE.txt for license information.
11+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
12+
//
13+
//===------------------------------------------------------------------------===//
14+
15+
#include <CL/sycl.hpp>
16+
17+
#include <array>
18+
#include <iostream>
19+
20+
constexpr cl::sycl::access::mode sycl_read = cl::sycl::access::mode::read;
21+
constexpr cl::sycl::access::mode sycl_write = cl::sycl::access::mode::write;
22+
23+
template <typename T>
24+
class SimpleVadd;
25+
26+
template <typename T, size_t N>
27+
void simple_vadd(const std::array<T, N>& VA, const std::array<T, N>& VB,
28+
std::array<T, N>& VC) {
29+
cl::sycl::queue deviceQueue([](cl::sycl::exception_list ExceptionList) {
30+
for (cl::sycl::exception_ptr_class ExceptionPtr : ExceptionList) {
31+
try {
32+
std::rethrow_exception(ExceptionPtr);
33+
} catch (cl::sycl::exception &E) {
34+
std::cerr << E.what();
35+
} catch (...) {
36+
std::cerr << "Unknown async exception was caught." << std::endl;
37+
}
38+
}
39+
});
40+
41+
cl::sycl::range<1> numOfItems{N};
42+
cl::sycl::buffer<T, 1> bufferA(VA.data(), numOfItems);
43+
cl::sycl::buffer<T, 1> bufferB(VB.data(), numOfItems);
44+
cl::sycl::buffer<T, 1> bufferC(VC.data(), numOfItems);
45+
46+
deviceQueue.submit([&](cl::sycl::handler& cgh) {
47+
auto accessorA = bufferA.template get_access<sycl_read>(cgh);
48+
auto accessorB = bufferB.template get_access<sycl_read>(cgh);
49+
auto accessorC = bufferC.template get_access<sycl_write>(cgh);
50+
51+
cgh.parallel_for<class SimpleVadd<T>>(numOfItems,
52+
[=](cl::sycl::id<1> wiID) {
53+
accessorC[wiID] = accessorA[wiID] + accessorB[wiID];
54+
});
55+
});
56+
57+
deviceQueue.wait_and_throw();
58+
}
59+
60+
int main() {
61+
const size_t array_size = 4;
62+
std::array<cl::sycl::cl_int, array_size> A = {{1, 2, 3, 4}},
63+
B = {{1, 2, 3, 4}}, C;
64+
std::array<cl::sycl::cl_float, array_size> D = {{1.f, 2.f, 3.f, 4.f}},
65+
E = {{1.f, 2.f, 3.f, 4.f}}, F;
66+
simple_vadd(A, B, C);
67+
simple_vadd(D, E, F);
68+
for (unsigned int i = 0; i < array_size; i++) {
69+
if (C[i] != A[i] + B[i]) {
70+
std::cout << "The results are incorrect (element " << i << " is " << C[i]
71+
<< "!\n";
72+
return 1;
73+
}
74+
if (F[i] != D[i] + E[i]) {
75+
std::cout << "The results are incorrect (element " << i << " is " << F[i]
76+
<< "!\n";
77+
return 1;
78+
}
79+
}
80+
std::cout << "The results are correct!\n";
81+
return 0;
82+
}

sycl/test/aot/cpu.cpp

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
// REQUIRES: ioc64
2+
3+
// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64-unknown-linux-sycldevice %s -o %t.out
4+
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
5+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
6+
7+
//==----- cpu.cpp - AOT compilation for cpu devices using ioc64 -------------==//
8+
//
9+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
10+
// See https://llvm.org/LICENSE.txt for license information.
11+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
12+
//
13+
//===------------------------------------------------------------------------===//
14+
15+
#include <CL/sycl.hpp>
16+
17+
#include <array>
18+
#include <iostream>
19+
20+
constexpr cl::sycl::access::mode sycl_read = cl::sycl::access::mode::read;
21+
constexpr cl::sycl::access::mode sycl_write = cl::sycl::access::mode::write;
22+
23+
template <typename T>
24+
class SimpleVadd;
25+
26+
template <typename T, size_t N>
27+
void simple_vadd(const std::array<T, N>& VA, const std::array<T, N>& VB,
28+
std::array<T, N>& VC) {
29+
cl::sycl::queue deviceQueue([](cl::sycl::exception_list ExceptionList) {
30+
for (cl::sycl::exception_ptr_class ExceptionPtr : ExceptionList) {
31+
try {
32+
std::rethrow_exception(ExceptionPtr);
33+
} catch (cl::sycl::exception &E) {
34+
std::cerr << E.what();
35+
} catch (...) {
36+
std::cerr << "Unknown async exception was caught." << std::endl;
37+
}
38+
}
39+
});
40+
41+
cl::sycl::range<1> numOfItems{N};
42+
cl::sycl::buffer<T, 1> bufferA(VA.data(), numOfItems);
43+
cl::sycl::buffer<T, 1> bufferB(VB.data(), numOfItems);
44+
cl::sycl::buffer<T, 1> bufferC(VC.data(), numOfItems);
45+
46+
deviceQueue.submit([&](cl::sycl::handler& cgh) {
47+
auto accessorA = bufferA.template get_access<sycl_read>(cgh);
48+
auto accessorB = bufferB.template get_access<sycl_read>(cgh);
49+
auto accessorC = bufferC.template get_access<sycl_write>(cgh);
50+
51+
cgh.parallel_for<class SimpleVadd<T>>(numOfItems,
52+
[=](cl::sycl::id<1> wiID) {
53+
accessorC[wiID] = accessorA[wiID] + accessorB[wiID];
54+
});
55+
});
56+
57+
deviceQueue.wait_and_throw();
58+
}
59+
60+
int main() {
61+
const size_t array_size = 4;
62+
std::array<cl::sycl::cl_int, array_size> A = {{1, 2, 3, 4}},
63+
B = {{1, 2, 3, 4}}, C;
64+
std::array<cl::sycl::cl_float, array_size> D = {{1.f, 2.f, 3.f, 4.f}},
65+
E = {{1.f, 2.f, 3.f, 4.f}}, F;
66+
simple_vadd(A, B, C);
67+
simple_vadd(D, E, F);
68+
for (unsigned int i = 0; i < array_size; i++) {
69+
if (C[i] != A[i] + B[i]) {
70+
std::cout << "The results are incorrect (element " << i << " is " << C[i]
71+
<< "!\n";
72+
return 1;
73+
}
74+
if (F[i] != D[i] + E[i]) {
75+
std::cout << "The results are incorrect (element " << i << " is " << F[i]
76+
<< "!\n";
77+
return 1;
78+
}
79+
}
80+
std::cout << "The results are correct!\n";
81+
return 0;
82+
}

0 commit comments

Comments
 (0)