@@ -481,36 +481,25 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(
481481 if (Prg)
482482 Prg->stableSerializeSpecConstRegistry (SpecConsts);
483483
484- // Check if root device architecture is homogeneous and we can optimize builds
485- // for sub-devices
484+ // Check if we can optimize program builds for sub-devices by using a program
485+ // built for the root device
486486 DeviceImplPtr RootDevImpl = DeviceImpl;
487487 while (!RootDevImpl->isRootDevice ()) {
488488 auto ParentDev = detail::getSyclObjImpl (
489489 RootDevImpl->get_info <info::device::parent_device>());
490+ // Sharing is allowed within a single context only
490491 if (!ContextImpl->hasDevice (ParentDev))
491492 break ;
492493 RootDevImpl = ParentDev;
493494 }
494495
495- pi_bool IsRootDeviceArchHomogeneous = PI_FALSE ;
496+ pi_bool MustBuildOnSubdevice = PI_TRUE ;
496497 ContextImpl->getPlugin ().call <PiApiKind::piDeviceGetInfo>(
497- RootDevImpl->getHandleRef (), PI_DEVICE_INFO_HOMOGENEOUS_ARCH,
498- sizeof (pi_bool), &IsRootDeviceArchHomogeneous, nullptr );
499-
500- // FIXME: the logic is modified to work around unintuitive Intel OpenCL CPU
501- // implementation behavior. Kernels created with the program built for root
502- // device can be re-used on sub-devices, but other combinations doesn't work
503- // (e.g. clGetKernelWorkGroupInfo returns CL_INVALID_KERNEL if kernel was
504- // created from the program built for sub-device and re-used either on root or
505- // other sub-device).
506- // To work around this case we optimize only one case: root device shares the
507- // same context with its sub-device(s). We built for the root device and
508- // cache the results.
509- // The expected solution is to build for any sub-device and use root device
510- // handle as cache key to share build results for any other sub-device or even
511- // a root device.
498+ RootDevImpl->getHandleRef (), PI_DEVICE_INFO_BUILD_ON_SUBDEVICE,
499+ sizeof (pi_bool), &MustBuildOnSubdevice, nullptr );
500+
512501 DeviceImplPtr Dev =
513- (IsRootDeviceArchHomogeneous == PI_TRUE) ? RootDevImpl : DeviceImpl ;
502+ (MustBuildOnSubdevice == PI_TRUE) ? DeviceImpl : RootDevImpl ;
514503 auto BuildF = [this , &M, &KSId, &ContextImpl, &Dev, Prg, &CompileOpts,
515504 &LinkOpts, &JITCompilationIsRequired, SpecConsts] {
516505 auto Context = createSyclObjFromImpl<context>(ContextImpl);
0 commit comments