@@ -278,13 +278,15 @@ struct xe_pt_stage_bind_walk {
278278 struct xe_vm * vm ;
279279 /** @tile: The tile we're building for. */
280280 struct xe_tile * tile ;
281- /** @default_pte: PTE flag only template. No address is associated */
282- u64 default_pte ;
281+ /** @default_pte: PTE flag only template for VRAM. No address is associated */
282+ u64 default_vram_pte ;
283+ /** @default_pte: PTE flag only template for VRAM. No address is associated */
284+ u64 default_system_pte ;
283285 /** @dma_offset: DMA offset to add to the PTE. */
284286 u64 dma_offset ;
285287 /**
286288 * @needs_64k: This address range enforces 64K alignment and
287- * granularity.
289+ * granularity on VRAM .
288290 */
289291 bool needs_64K ;
290292 /**
@@ -515,13 +517,16 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
515517 if (level == 0 || xe_pt_hugepte_possible (addr , next , level , xe_walk )) {
516518 struct xe_res_cursor * curs = xe_walk -> curs ;
517519 bool is_null = xe_vma_is_null (xe_walk -> vma );
520+ bool is_vram = is_null ? false : xe_res_is_vram (curs );
518521
519522 XE_WARN_ON (xe_walk -> va_curs_start != addr );
520523
521524 pte = vm -> pt_ops -> pte_encode_vma (is_null ? 0 :
522525 xe_res_dma (curs ) + xe_walk -> dma_offset ,
523526 xe_walk -> vma , pat_index , level );
524- pte |= xe_walk -> default_pte ;
527+ if (!is_null )
528+ pte |= is_vram ? xe_walk -> default_vram_pte :
529+ xe_walk -> default_system_pte ;
525530
526531 /*
527532 * Set the XE_PTE_PS64 hint if possible, otherwise if
@@ -531,7 +536,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
531536 if (xe_pt_is_pte_ps64K (addr , next , xe_walk )) {
532537 xe_walk -> vma -> gpuva .flags |= XE_VMA_PTE_64K ;
533538 pte |= XE_PTE_PS64 ;
534- } else if (XE_WARN_ON (xe_walk -> needs_64K )) {
539+ } else if (XE_WARN_ON (xe_walk -> needs_64K && is_vram )) {
535540 return - EINVAL ;
536541 }
537542 }
@@ -603,6 +608,44 @@ static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = {
603608 .pt_entry = xe_pt_stage_bind_entry ,
604609};
605610
611+ /*
612+ * Default atomic expectations for different allocation scenarios are as follows:
613+ *
614+ * 1. Traditional API: When the VM is not in LR mode:
615+ * - Device atomics are expected to function with all allocations.
616+ *
617+ * 2. Compute/SVM API: When the VM is in LR mode:
618+ * - Device atomics are the default behavior when the bo is placed in a single region.
619+ * - In all other cases device atomics will be disabled with AE=0 until an application
620+ * request differently using a ioctl like madvise.
621+ */
622+ static bool xe_atomic_for_vram (struct xe_vm * vm )
623+ {
624+ return true;
625+ }
626+
627+ static bool xe_atomic_for_system (struct xe_vm * vm , struct xe_bo * bo )
628+ {
629+ struct xe_device * xe = vm -> xe ;
630+
631+ if (!xe -> info .has_device_atomics_on_smem )
632+ return false;
633+
634+ /*
635+ * If a SMEM+LMEM allocation is backed by SMEM, a device
636+ * atomics will cause a gpu page fault and which then
637+ * gets migrated to LMEM, bind such allocations with
638+ * device atomics enabled.
639+ *
640+ * TODO: Revisit this. Perhaps add something like a
641+ * fault_on_atomics_in_system UAPI flag.
642+ * Note that this also prohibits GPU atomics in LR mode for
643+ * userptr and system memory on DGFX.
644+ */
645+ return (!IS_DGFX (xe ) || (!xe_vm_in_lr_mode (vm ) ||
646+ (bo && xe_bo_has_single_placement (bo ))));
647+ }
648+
606649/**
607650 * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address
608651 * range.
@@ -629,99 +672,58 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
629672{
630673 struct xe_device * xe = tile_to_xe (tile );
631674 struct xe_bo * bo = xe_vma_bo (vma );
632- bool is_devmem = !xe_vma_is_userptr (vma ) && bo &&
633- (xe_bo_is_vram (bo ) || xe_bo_is_stolen_devmem (bo ));
634675 struct xe_res_cursor curs ;
676+ struct xe_vm * vm = xe_vma_vm (vma );
635677 struct xe_pt_stage_bind_walk xe_walk = {
636678 .base = {
637679 .ops = & xe_pt_stage_bind_ops ,
638680 .shifts = xe_normal_pt_shifts ,
639681 .max_level = XE_PT_HIGHEST_LEVEL ,
640682 .staging = true,
641683 },
642- .vm = xe_vma_vm ( vma ) ,
684+ .vm = vm ,
643685 .tile = tile ,
644686 .curs = & curs ,
645687 .va_curs_start = range ? range -> base .itree .start :
646688 xe_vma_start (vma ),
647689 .vma = vma ,
648690 .wupd .entries = entries ,
649691 };
650- struct xe_pt * pt = xe_vma_vm ( vma ) -> pt_root [tile -> id ];
692+ struct xe_pt * pt = vm -> pt_root [tile -> id ];
651693 int ret ;
652694
653695 if (range ) {
654696 /* Move this entire thing to xe_svm.c? */
655- xe_svm_notifier_lock (xe_vma_vm ( vma ) );
697+ xe_svm_notifier_lock (vm );
656698 if (!xe_svm_range_pages_valid (range )) {
657699 xe_svm_range_debug (range , "BIND PREPARE - RETRY" );
658- xe_svm_notifier_unlock (xe_vma_vm ( vma ) );
700+ xe_svm_notifier_unlock (vm );
659701 return - EAGAIN ;
660702 }
661703 if (xe_svm_range_has_dma_mapping (range )) {
662704 xe_res_first_dma (range -> base .dma_addr , 0 ,
663705 range -> base .itree .last + 1 - range -> base .itree .start ,
664706 & curs );
665- is_devmem = xe_res_is_vram (& curs );
666- if (is_devmem )
667- xe_svm_range_debug (range , "BIND PREPARE - DMA VRAM" );
668- else
669- xe_svm_range_debug (range , "BIND PREPARE - DMA" );
707+ xe_svm_range_debug (range , "BIND PREPARE - MIXED" );
670708 } else {
671709 xe_assert (xe , false);
672710 }
673711 /*
674712 * Note, when unlocking the resource cursor dma addresses may become
675713 * stale, but the bind will be aborted anyway at commit time.
676714 */
677- xe_svm_notifier_unlock (xe_vma_vm ( vma ) );
715+ xe_svm_notifier_unlock (vm );
678716 }
679717
680- xe_walk .needs_64K = (xe_vma_vm (vma )-> flags & XE_VM_FLAG_64K ) && is_devmem ;
681-
682- /**
683- * Default atomic expectations for different allocation scenarios are as follows:
684- *
685- * 1. Traditional API: When the VM is not in LR mode:
686- * - Device atomics are expected to function with all allocations.
687- *
688- * 2. Compute/SVM API: When the VM is in LR mode:
689- * - Device atomics are the default behavior when the bo is placed in a single region.
690- * - In all other cases device atomics will be disabled with AE=0 until an application
691- * request differently using a ioctl like madvise.
692- */
718+ xe_walk .needs_64K = (vm -> flags & XE_VM_FLAG_64K );
693719 if (vma -> gpuva .flags & XE_VMA_ATOMIC_PTE_BIT ) {
694- if (xe_vm_in_lr_mode (xe_vma_vm (vma ))) {
695- if (bo && xe_bo_has_single_placement (bo ))
696- xe_walk .default_pte |= XE_USM_PPGTT_PTE_AE ;
697- /**
698- * If a SMEM+LMEM allocation is backed by SMEM, a device
699- * atomics will cause a gpu page fault and which then
700- * gets migrated to LMEM, bind such allocations with
701- * device atomics enabled.
702- */
703- else if (is_devmem )
704- xe_walk .default_pte |= XE_USM_PPGTT_PTE_AE ;
705- } else {
706- xe_walk .default_pte |= XE_USM_PPGTT_PTE_AE ;
707- }
708-
709- /**
710- * Unset AE if the platform(PVC) doesn't support it on an
711- * allocation
712- */
713- if (!xe -> info .has_device_atomics_on_smem && !is_devmem )
714- xe_walk .default_pte &= ~XE_USM_PPGTT_PTE_AE ;
715- }
716-
717- if (is_devmem ) {
718- xe_walk .default_pte |= XE_PPGTT_PTE_DM ;
719- xe_walk .dma_offset = bo ? vram_region_gpu_offset (bo -> ttm .resource ) : 0 ;
720+ xe_walk .default_vram_pte = xe_atomic_for_vram (vm ) ? XE_USM_PPGTT_PTE_AE : 0 ;
721+ xe_walk .default_system_pte = xe_atomic_for_system (vm , bo ) ?
722+ XE_USM_PPGTT_PTE_AE : 0 ;
720723 }
721724
722- if (!xe_vma_has_no_bo (vma ) && xe_bo_is_stolen (bo ))
723- xe_walk .dma_offset = xe_ttm_stolen_gpu_offset (xe_bo_device (bo ));
724-
725+ xe_walk .default_vram_pte |= XE_PPGTT_PTE_DM ;
726+ xe_walk .dma_offset = bo ? vram_region_gpu_offset (bo -> ttm .resource ) : 0 ;
725727 if (!range )
726728 xe_bo_assert_held (bo );
727729
0 commit comments