-
Notifications
You must be signed in to change notification settings - Fork 766
/
Copy pathprogram_manager.hpp
517 lines (434 loc) · 22.5 KB
/
program_manager.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
//==------ program_manager.hpp --- SYCL program manager---------------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#pragma once
#include <detail/cg.hpp>
#include <detail/device_binary_image.hpp>
#include <detail/device_global_map_entry.hpp>
#include <detail/host_pipe_map_entry.hpp>
#include <detail/kernel_arg_mask.hpp>
#include <detail/spec_constant_impl.hpp>
#include <sycl/detail/cg_types.hpp>
#include <sycl/detail/common.hpp>
#include <sycl/detail/device_global_map.hpp>
#include <sycl/detail/export.hpp>
#include <sycl/detail/host_pipe_map.hpp>
#include <sycl/detail/os_util.hpp>
#include <sycl/detail/ur.hpp>
#include <sycl/detail/util.hpp>
#include <sycl/device.hpp>
#include <sycl/kernel_bundle.hpp>
#include <array>
#include <cstdint>
#include <map>
#include <memory>
#include <set>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <vector>
// +++ Entry points referenced by the offload wrapper object {
/// Executed as a part of current module's (.exe, .dll) static initialization.
/// Registers device executable images with the runtime.
extern "C" __SYCL_EXPORT void __sycl_register_lib(sycl_device_binaries desc);
/// Executed as a part of current module's (.exe, .dll) static
/// de-initialization.
/// Unregisters device executable images with the runtime.
extern "C" __SYCL_EXPORT void __sycl_unregister_lib(sycl_device_binaries desc);
// +++ }
// For testing purposes
class ProgramManagerTest;
namespace sycl {
inline namespace _V1 {
class context;
namespace detail {
bool doesDevSupportDeviceRequirements(const device &Dev,
const RTDeviceBinaryImage &BinImages);
std::optional<sycl::exception>
checkDevSupportDeviceRequirements(const device &Dev,
const RTDeviceBinaryImage &BinImages,
const NDRDescT &NDRDesc = {});
bool doesImageTargetMatchDevice(const RTDeviceBinaryImage &Img,
const device &Dev);
// This value must be the same as in libdevice/device_itt.h.
// See sycl/doc/design/ITTAnnotations.md for more info.
static constexpr uint32_t inline ITTSpecConstId = 0xFF747469;
class context_impl;
using ContextImplPtr = std::shared_ptr<context_impl>;
class device_impl;
using DeviceImplPtr = std::shared_ptr<device_impl>;
class queue_impl;
class event_impl;
// DeviceLibExt is shared between sycl runtime and sycl-post-link tool.
// If any update is made here, need to sync with DeviceLibExt definition
// in llvm/tools/sycl-post-link/sycl-post-link.cpp
enum class DeviceLibExt : std::uint32_t {
cl_intel_devicelib_assert,
cl_intel_devicelib_math,
cl_intel_devicelib_math_fp64,
cl_intel_devicelib_complex,
cl_intel_devicelib_complex_fp64,
cl_intel_devicelib_cstring,
cl_intel_devicelib_imf,
cl_intel_devicelib_imf_fp64,
cl_intel_devicelib_imf_bf16,
cl_intel_devicelib_bfloat16,
};
enum class SanitizerType {
None,
AddressSanitizer,
MemorySanitizer,
ThreadSanitizer
};
// A helper class for storing image/program objects and their dependencies
// and making their handling a bit more readable.
template <typename T> class ObjectWithDeps {
public:
ObjectWithDeps(T Main) : Objs({std::move(Main)}) {}
// Assumes 0th element is the main one.
ObjectWithDeps(std::vector<T> AllObjs) : Objs{std::move(AllObjs)} {}
T &getMain() { return *Objs.begin(); }
const T &getMain() const { return *Objs.begin(); }
const std::vector<T> &getAll() const { return Objs; }
std::size_t size() const { return Objs.size(); }
bool hasDeps() const { return Objs.size() > 1; }
auto begin() { return Objs.begin(); }
auto begin() const { return Objs.begin(); }
auto end() { return Objs.end(); }
auto end() const { return Objs.end(); }
// TODO use a subrange once C++20 is available
auto depsBegin() const { return Objs.begin() + 1; }
auto depsEnd() const { return Objs.end(); }
private:
std::vector<T> Objs;
};
using DevImgPlainWithDeps = ObjectWithDeps<device_image_plain>;
using BinImgWithDeps = ObjectWithDeps<const RTDeviceBinaryImage *>;
// Provides single loading and building OpenCL programs with unique contexts
// that is necessary for no interoperability cases with lambda.
class ProgramManager {
public:
// Returns the single instance of the program manager for the entire
// process. Can only be called after staticInit is done.
static ProgramManager &getInstance();
RTDeviceBinaryImage &getDeviceImage(const std::string &KernelName,
const ContextImplPtr &ContextImpl,
const device &Device);
RTDeviceBinaryImage &getDeviceImage(
const std::unordered_set<RTDeviceBinaryImage *> &ImagesToVerify,
const ContextImplPtr &ContextImpl, const device &Device);
ur_program_handle_t createURProgram(const RTDeviceBinaryImage &Img,
const ContextImplPtr &ContextImpl,
const std::vector<device> &Devices);
/// Creates a UR program using either a cached device code binary if present
/// in the persistent cache or from the supplied device image otherwise.
/// \param Img The device image used to create the program.
/// \param AllImages All images needed to build the program, used for cache
/// lookup.
/// \param Context The context to find or create the UR program with.
/// \param Device The device to find or create the UR program for.
/// \param CompileAndLinkOptions The compile and linking options to be used
/// for building the UR program. These options must appear in the
/// mentioned order. This parameter is used as a partial key in the
/// cache and has no effect if no cached device code binary is found in
/// the persistent cache.
/// \param SpecConsts Specialization constants associated with the device
/// image. This parameter is used as a partial key in the cache and
/// has no effect if no cached device code binary is found in the
/// persistent cache.
/// \return A pair consisting of the UR program created with the corresponding
/// device code binary and a boolean that is true if the device code
/// binary was found in the persistent cache and false otherwise.
std::pair<ur_program_handle_t, bool> getOrCreateURProgram(
const RTDeviceBinaryImage &Img,
const std::vector<const RTDeviceBinaryImage *> &AllImages,
const ContextImplPtr &ContextImpl, const std::vector<device> &Devices,
const std::string &CompileAndLinkOptions, SerializedObj SpecConsts);
/// Builds or retrieves from cache a program defining the kernel with given
/// name.
/// \param M identifies the OS module the kernel comes from (multiple OS
/// modules may have kernels with the same name)
/// \param Context the context to build the program with
/// \param Device the device for which the program is built
/// \param KernelName the kernel's name
ur_program_handle_t getBuiltURProgram(const ContextImplPtr &ContextImpl,
const DeviceImplPtr &DeviceImpl,
const std::string &KernelName,
const NDRDescT &NDRDesc = {});
/// Builds a program from a given set of images or retrieves that program from
/// cache.
/// \param ImgWithDeps is the main image the program is built with and its
/// dependencies.
/// \param Context is the context the program is built for.
/// \param Devs is a vector of devices the program is built for.
/// \param DevImgWithDeps is an optional DevImgPlainWithDeps pointer that
/// represents the images.
/// \param SpecConsts is an optional parameter containing spec constant values
/// the program should be built with.
ur_program_handle_t
getBuiltURProgram(const BinImgWithDeps &ImgWithDeps,
const ContextImplPtr &ContextImpl,
const std::vector<device> &Devs,
const DevImgPlainWithDeps *DevImgWithDeps = nullptr,
const SerializedObj &SpecConsts = {});
std::tuple<ur_kernel_handle_t, std::mutex *, const KernelArgMask *,
ur_program_handle_t>
getOrCreateKernel(const ContextImplPtr &ContextImpl,
const DeviceImplPtr &DeviceImpl,
const std::string &KernelName,
const NDRDescT &NDRDesc = {});
ur_kernel_handle_t getCachedMaterializedKernel(
const std::string &KernelName,
const std::vector<unsigned char> &SpecializationConsts);
ur_kernel_handle_t getOrCreateMaterializedKernel(
const RTDeviceBinaryImage &Img, const context &Context,
const device &Device, const std::string &KernelName,
const std::vector<unsigned char> &SpecializationConsts);
ur_program_handle_t getUrProgramFromUrKernel(ur_kernel_handle_t Kernel,
const ContextImplPtr &Context);
void addImages(sycl_device_binaries DeviceImages);
void removeImages(sycl_device_binaries DeviceImages);
void debugPrintBinaryImages() const;
static std::string getProgramBuildLog(const ur_program_handle_t &Program,
const ContextImplPtr &Context);
uint32_t getDeviceLibReqMask(const RTDeviceBinaryImage &Img);
/// Returns the mask for eliminated kernel arguments for the requested kernel
/// within the native program.
/// \param NativePrg the UR program associated with the kernel.
/// \param KernelName the name of the kernel.
const KernelArgMask *
getEliminatedKernelArgMask(ur_program_handle_t NativePrg,
const std::string &KernelName);
// The function returns the unique SYCL kernel identifier associated with a
// kernel name.
kernel_id getSYCLKernelID(const std::string &KernelName);
// The function returns a vector containing all unique SYCL kernel identifiers
// in SYCL device images.
std::vector<kernel_id> getAllSYCLKernelIDs();
// The function returns the unique SYCL kernel identifier associated with a
// built-in kernel name.
kernel_id getBuiltInKernelID(const std::string &KernelName);
// The function inserts or initializes a device_global entry into the
// device_global map.
void addOrInitDeviceGlobalEntry(const void *DeviceGlobalPtr,
const char *UniqueId);
// Returns true if any available image is compatible with the device Dev.
bool hasCompatibleImage(const device &Dev);
// The function gets a device_global entry identified by the pointer to the
// device_global object from the device_global map.
DeviceGlobalMapEntry *getDeviceGlobalEntry(const void *DeviceGlobalPtr);
// The function gets multiple device_global entries identified by their unique
// IDs from the device_global map.
std::vector<DeviceGlobalMapEntry *>
getDeviceGlobalEntries(const std::vector<std::string> &UniqueIds,
bool ExcludeDeviceImageScopeDecorated = false);
// The function inserts or initializes a host_pipe entry into the
// host_pipe map.
void addOrInitHostPipeEntry(const void *HostPipePtr, const char *UniqueId);
// The function gets a host_pipe entry identified by the unique ID from
// the host_pipe map.
HostPipeMapEntry *getHostPipeEntry(const std::string &UniqueId);
// The function gets a host_pipe entry identified by the pointer to the
// host_pipe object from the host_pipe map.
HostPipeMapEntry *getHostPipeEntry(const void *HostPipePtr);
device_image_plain
getDeviceImageFromBinaryImage(RTDeviceBinaryImage *BinImage,
const context &Ctx, const device &Dev);
// The function returns a vector of SYCL device images that are compiled with
// the required state and at least one device from the passed list of devices.
std::vector<DevImgPlainWithDeps> getSYCLDeviceImagesWithCompatibleState(
const context &Ctx, const std::vector<device> &Devs,
bundle_state TargetState, const std::vector<kernel_id> &KernelIDs = {});
// Brind images in the passed vector to the required state. Does it inplace
void
bringSYCLDeviceImagesToState(std::vector<DevImgPlainWithDeps> &DeviceImages,
bundle_state TargetState);
// The function returns a vector of SYCL device images in required state,
// which are compatible with at least one of the device from Devs.
std::vector<DevImgPlainWithDeps>
getSYCLDeviceImages(const context &Ctx, const std::vector<device> &Devs,
bundle_state State);
// The function returns a vector of SYCL device images, for which Selector
// callable returns true, in required state, which are compatible with at
// least one of the device from Devs.
std::vector<DevImgPlainWithDeps>
getSYCLDeviceImages(const context &Ctx, const std::vector<device> &Devs,
const DevImgSelectorImpl &Selector,
bundle_state TargetState);
// The function returns a vector of SYCL device images which represent at
// least one kernel from kernel ids vector in required state, which are
// compatible with at least one of the device from Devs.
std::vector<DevImgPlainWithDeps>
getSYCLDeviceImages(const context &Ctx, const std::vector<device> &Devs,
const std::vector<kernel_id> &KernelIDs,
bundle_state TargetState);
// Produces new device image by convering input device image to the object
// state
DevImgPlainWithDeps compile(const DevImgPlainWithDeps &ImgWithDeps,
const std::vector<device> &Devs,
const property_list &PropList);
// Produces set of device images by convering input device images to object
// the executable state
std::vector<device_image_plain> link(const DevImgPlainWithDeps &ImgWithDeps,
const std::vector<device> &Devs,
const property_list &PropList);
// Produces new device image by converting input device image to the
// executable state
device_image_plain build(const DevImgPlainWithDeps &ImgWithDeps,
const std::vector<device> &Devs,
const property_list &PropList);
std::tuple<ur_kernel_handle_t, std::mutex *, const KernelArgMask *>
getOrCreateKernel(const context &Context, const std::string &KernelName,
const property_list &PropList, ur_program_handle_t Program);
ProgramManager();
~ProgramManager() = default;
bool kernelUsesAssert(const std::string &KernelName) const;
SanitizerType kernelUsesSanitizer() const { return m_SanitizerFoundInImage; }
std::optional<int>
kernelImplicitLocalArgPos(const std::string &KernelName) const;
std::set<RTDeviceBinaryImage *>
getRawDeviceImages(const std::vector<kernel_id> &KernelIDs);
private:
ProgramManager(ProgramManager const &) = delete;
ProgramManager &operator=(ProgramManager const &) = delete;
using ProgramPtr = std::unique_ptr<std::remove_pointer_t<ur_program_handle_t>,
decltype(&::urProgramRelease)>;
ProgramPtr build(ProgramPtr Program, const ContextImplPtr &Context,
const std::string &CompileOptions,
const std::string &LinkOptions,
std::vector<ur_device_handle_t> &Devices,
uint32_t DeviceLibReqMask,
const std::vector<ur_program_handle_t> &ProgramsToLink,
bool CreatedFromBinary = false);
/// Dumps image to current directory
void dumpImage(const RTDeviceBinaryImage &Img, uint32_t SequenceID = 0) const;
/// Add info on kernels using assert into cache
void cacheKernelUsesAssertInfo(RTDeviceBinaryImage &Img);
/// Add info on kernels using local arg into cache
void cacheKernelImplicitLocalArg(RTDeviceBinaryImage &Img);
std::set<RTDeviceBinaryImage *>
collectDeviceImageDeps(const RTDeviceBinaryImage &Img, const device &Dev);
std::set<RTDeviceBinaryImage *>
collectDeviceImageDepsForImportedSymbols(const RTDeviceBinaryImage &Img,
const device &Dev);
std::set<RTDeviceBinaryImage *>
collectDependentDeviceImagesForVirtualFunctions(
const RTDeviceBinaryImage &Img, const device &Dev);
bool isSpecialDeviceImage(RTDeviceBinaryImage *BinImage);
bool isSpecialDeviceImageShouldBeUsed(RTDeviceBinaryImage *BinImage,
const device &Dev);
protected:
/// The three maps below are used during kernel resolution. Any kernel is
/// identified by its name.
using RTDeviceBinaryImageUPtr = std::unique_ptr<RTDeviceBinaryImage>;
using DynRTDeviceBinaryImageUPtr = std::unique_ptr<DynRTDeviceBinaryImage>;
/// Maps names of kernels to their unique kernel IDs.
/// TODO: Use std::unordered_set with transparent hash and equality functions
/// when C++20 is enabled for the runtime library.
/// Access must be guarded by the m_KernelIDsMutex mutex.
//
std::unordered_map<std::string, kernel_id> m_KernelName2KernelIDs;
// Maps KernelIDs to device binary images. There can be more than one image
// in case of SPIRV + AOT.
// Using shared_ptr to avoid expensive copy of the vector.
/// Access must be guarded by the m_KernelIDsMutex mutex.
std::unordered_multimap<kernel_id, RTDeviceBinaryImage *>
m_KernelIDs2BinImage;
// Maps device binary image to a vector of kernel ids in this image.
// Using shared_ptr to avoid expensive copy of the vector.
// The vector is initialized in addImages function and is supposed to be
// immutable afterwards.
/// Access must be guarded by the m_KernelIDsMutex mutex.
std::unordered_map<RTDeviceBinaryImage *,
std::shared_ptr<std::vector<kernel_id>>>
m_BinImg2KernelIDs;
/// Protects kernel ID cache.
/// NOTE: This may be acquired while \ref Sync::getGlobalLock() is held so to
/// avoid deadlocks care must be taken not to acquire
/// \ref Sync::getGlobalLock() while holding this mutex.
std::mutex m_KernelIDsMutex;
/// Caches all found service kernels to expedite future checks. A SYCL service
/// kernel is a kernel that has not been defined by the user but is instead
/// generated by the SYCL runtime. Service kernel name types must be declared
/// in the sycl::detail::__sycl_service_kernel__ namespace which is
/// exclusively used for this purpose.
/// Access must be guarded by the m_KernelIDsMutex mutex.
std::unordered_multimap<std::string, RTDeviceBinaryImage *> m_ServiceKernels;
/// Caches all exported symbols to allow faster lookup when excluding these
// from kernel bundles.
/// Access must be guarded by the m_KernelIDsMutex mutex.
std::unordered_multimap<std::string, RTDeviceBinaryImage *>
m_ExportedSymbolImages;
/// Keeps all device images we are refering to during program lifetime. Used
/// for proper cleanup.
std::unordered_map<sycl_device_binary, RTDeviceBinaryImageUPtr>
m_DeviceImages;
/// Maps names of built-in kernels to their unique kernel IDs.
/// Access must be guarded by the m_BuiltInKernelIDsMutex mutex.
std::unordered_map<std::string, kernel_id> m_BuiltInKernelIDs;
/// Caches list of device images that use or provide virtual functions from
/// the same set. Used to simplify access.
std::unordered_map<std::string, std::set<RTDeviceBinaryImage *>>
m_VFSet2BinImage;
/// Protects built-in kernel ID cache.
std::mutex m_BuiltInKernelIDsMutex;
// Keeps track of ur_program to image correspondence. Needed for:
// - knowing which specialization constants are used in the program and
// injecting their current values before compiling the SPIR-V; the binary
// image object has info about all spec constants used in the module
// - finding kernel argument masks for kernels associated with each
// ur_program
// NOTE: using RTDeviceBinaryImage raw pointers is OK, since they are not
// referenced from outside SYCL runtime and RTDeviceBinaryImage object
// lifetime matches program manager's one.
// NOTE: keys in the map can be invalid (reference count went to zero and
// the underlying program disposed of), so the map can't be used in any way
// other than binary image lookup with known live UrProgram as the key.
// NOTE: access is synchronized via the MNativeProgramsMutex
std::unordered_multimap<
ur_program_handle_t,
std::pair<std::weak_ptr<context_impl>, const RTDeviceBinaryImage *>>
NativePrograms;
/// Protects NativePrograms that can be changed by class' methods.
std::mutex MNativeProgramsMutex;
using KernelNameToArgMaskMap = std::unordered_map<std::string, KernelArgMask>;
/// Maps binary image and kernel name pairs to kernel argument masks which
/// specify which arguments were eliminated during device code optimization.
std::unordered_map<const RTDeviceBinaryImage *, KernelNameToArgMaskMap>
m_EliminatedKernelArgMasks;
/// True iff a SPIR-V file has been specified with an environment variable
bool m_UseSpvFile = false;
RTDeviceBinaryImageUPtr m_SpvFileImage;
std::set<std::string> m_KernelUsesAssert;
std::unordered_map<std::string, int> m_KernelImplicitLocalArgPos;
// Sanitizer type used in device image
SanitizerType m_SanitizerFoundInImage;
// Maps between device_global identifiers and associated information.
std::unordered_map<std::string, std::unique_ptr<DeviceGlobalMapEntry>>
m_DeviceGlobals;
std::unordered_map<const void *, DeviceGlobalMapEntry *> m_Ptr2DeviceGlobal;
/// Protects m_DeviceGlobals and m_Ptr2DeviceGlobal.
std::mutex m_DeviceGlobalsMutex;
// Maps between host_pipe identifiers and associated information.
std::unordered_map<std::string, std::unique_ptr<HostPipeMapEntry>>
m_HostPipes;
std::unordered_map<const void *, HostPipeMapEntry *> m_Ptr2HostPipe;
/// Protects m_HostPipes and m_Ptr2HostPipe.
std::mutex m_HostPipesMutex;
using MaterializedEntries =
std::map<std::vector<unsigned char>, ur_kernel_handle_t>;
std::unordered_map<std::string, MaterializedEntries> m_MaterializedKernels;
// Holds bfloat16 device library images, the 1st element is for fallback
// version and 2nd is for native version. These bfloat16 device library
// images are provided by compiler long time ago, we expect no further
// update, so keeping 1 copy should be OK.
std::array<DynRTDeviceBinaryImageUPtr, 2> m_Bfloat16DeviceLibImages;
friend class ::ProgramManagerTest;
};
} // namespace detail
} // namespace _V1
} // namespace sycl