Skip to content

Commit

Permalink
drm/amdgpu: sync page table freeing with tlb flush
Browse files Browse the repository at this point in the history
The idea behind this patch is to delay the freeing of PT entry objects
until the TLB flush is done.

This patch:
- Adds a tlb_flush_waitlist in amdgpu_vm_update_params which will keep the
  objects that need to be freed after tlb_flush.
- Adds PT entries in this list in amdgpu_vm_ptes_update after finding
  the PT entry.
- Changes functionality of amdgpu_vm_pt_free_dfs from (df_search + free)
  to simply freeing of the BOs, also renames it to
  amdgpu_vm_pt_free_list to reflect this same.
- Exports function amdgpu_vm_pt_free_list to be called directly.
- Calls amdgpu_vm_pt_free_list directly from amdgpu_vm_update_range.

V2: rebase
V4: Addressed review comments from Christian
    - add only locked PTEs entries in TLB flush waitlist.
    - do not create a separate function for list flush.
    - do not create a new lock for TLB flush.
    - there is no need to wait on tlb_flush_fence exclusively.

V5: Addressed review comments from Christian
    - change the amdgpu_vm_pt_free_dfs's functionality to simple freeing
      of the objects and rename it.
    - add all the PTE objects in params->tlb_flush_waitlist
    - let amdgpu_vm_pt_free_root handle the freeing of BOs independently
    - call amdgpu_vm_pt_free_list directly

V6: Rebase
V7: Rebase
V8: Added a NULL check to fix this backtrace issue:
[  415.351447] BUG: kernel NULL pointer dereference, address: 0000000000000008
[  415.359245] #PF: supervisor write access in kernel mode
[  415.365081] #PF: error_code(0x0002) - not-present page
[  415.370817] PGD 101259067 P4D 101259067 PUD 10125a067 PMD 0
[  415.377140] Oops: 0002 [#1] PREEMPT SMP NOPTI
[  415.382004] CPU: 0 PID: 25481 Comm: test_with_MPI.e Tainted: G           OE     5.18.2-mi300-build-140423-ubuntu-22.04+ #24
[  415.394437] Hardware name: AMD Corporation Sh51p/Sh51p, BIOS RMO1001AS 02/21/2024
[  415.402797] RIP: 0010:amdgpu_vm_ptes_update+0x6fd/0xa10 [amdgpu]
[  415.409648] Code: 4c 89 ff 4d 8d 66 30 e8 f1 ed ff ff 48 85 db 74 42 48 39 5d a0 74 40 48 8b 53 20 48 8b 4b 18 48 8d 43 18 48 8d 75 b0 4c 89 ff <48
> 89 51 08 48 89 0a 49 8b 56 30 48 89 42 08 48 89 53 18 4c 89 63
[  415.430621] RSP: 0018:ffffc9000401f990 EFLAGS: 00010287
[  415.436456] RAX: ffff888147bb82f0 RBX: ffff888147bb82d8 RCX: 0000000000000000
[  415.444426] RDX: 0000000000000000 RSI: ffffc9000401fa30 RDI: ffff888161f80000
[  415.452397] RBP: ffffc9000401fa80 R08: 0000000000000000 R09: ffffc9000401fa00
[  415.460368] R10: 00000007f0cc0000 R11: 00000007f0c85000 R12: ffffc9000401fb20
[  415.468340] R13: 00000007f0d00000 R14: ffffc9000401faf0 R15: ffff888161f80000
[  415.476312] FS:  00007f132ff89840(0000) GS:ffff889f87c00000(0000) knlGS:0000000000000000
[  415.485350] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  415.491767] CR2: 0000000000000008 CR3: 0000000161d46003 CR4: 0000000000770ef0
[  415.499738] PKRU: 55555554
[  415.502750] Call Trace:
[  415.505482]  <TASK>
[  415.507825]  amdgpu_vm_update_range+0x32a/0x880 [amdgpu]
[  415.513869]  amdgpu_vm_clear_freed+0x117/0x250 [amdgpu]
[  415.519814]  amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu+0x18c/0x250 [amdgpu]
[  415.527729]  kfd_ioctl_unmap_memory_from_gpu+0xed/0x340 [amdgpu]
[  415.534551]  kfd_ioctl+0x3b6/0x510 [amdgpu]

V9: Addressed review comments from Christian
    - No NULL check reqd for root PT freeing
    - Free PT list regardless of needs_flush
    - Move adding BOs in list in a separate function

V10: Added Christian's RB

Cc: Christian König <Christian.Koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Felix Kuehling <felix.kuehling@amd.com>
Cc: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Acked-by: Felix Kuehling <felix.kuehling@amd.com>
Acked-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Reviewed-by: Christian König <Christian.Koenig@amd.com>
Tested-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
Change-Id: Ifd7157af197912a0ec6e5d820609e9db72fdab14
  • Loading branch information
contactshashanksharma authored and kentrussell committed Apr 18, 2024
1 parent b60d163 commit 3bdf4a7
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 23 deletions.
3 changes: 3 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
Original file line number Diff line number Diff line change
Expand Up @@ -997,6 +997,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
params.unlocked = unlocked;
params.needs_flush = flush_tlb;
params.allow_override = allow_override;
INIT_LIST_HEAD(&params.tlb_flush_waitlist);

/* Implicitly sync to command submissions in the same VM before
* unmapping. Sync to moving fences before mapping.
Expand Down Expand Up @@ -1101,6 +1102,8 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
tlb_cb = NULL;
}

amdgpu_vm_pt_free_list(adev, &params);

error_free:
kfree(tlb_cb);
amdgpu_vm_eviction_unlock(vm);
Expand Down
7 changes: 7 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,11 @@ struct amdgpu_vm_update_params {
* to be overridden for NUMA local memory.
*/
bool allow_override;

/**
* @tlb_flush_waitlist: temporary storage for BOs until tlb_flush
*/
struct list_head tlb_flush_waitlist;
};

struct amdgpu_vm_update_funcs {
Expand Down Expand Up @@ -543,6 +548,8 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
uint64_t start, uint64_t end,
uint64_t dst, uint64_t flags);
void amdgpu_vm_pt_free_work(struct work_struct *work);
void amdgpu_vm_pt_free_list(struct amdgpu_device *adev,
struct amdgpu_vm_update_params *params);

#if defined(CONFIG_DEBUG_FS)
void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m);
Expand Down
66 changes: 43 additions & 23 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
Original file line number Diff line number Diff line change
Expand Up @@ -679,40 +679,58 @@ void amdgpu_vm_pt_free_work(struct work_struct *work)
}

/**
* amdgpu_vm_pt_free_dfs - free PD/PT levels
* amdgpu_vm_pt_free_list - free PD/PT levels
*
* @adev: amdgpu device structure
* @vm: amdgpu vm structure
* @start: optional cursor where to start freeing PDs/PTs
* @unlocked: vm resv unlock status
* @params: see amdgpu_vm_update_params definition
*
* Free the page directory or page table level and all sub levels.
* Free the page directory objects saved in the flush list
*/
static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct amdgpu_vm_pt_cursor *start,
bool unlocked)
void amdgpu_vm_pt_free_list(struct amdgpu_device *adev,
struct amdgpu_vm_update_params *params)
{
struct amdgpu_vm_pt_cursor cursor;
struct amdgpu_vm_bo_base *entry;
struct amdgpu_vm_bo_base *entry, *next;
struct amdgpu_vm *vm = params->vm;
bool unlocked = params->unlocked;

if (list_empty(&params->tlb_flush_waitlist))
return;

if (unlocked) {
spin_lock(&vm->status_lock);
for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
list_move(&entry->vm_status, &vm->pt_freed);

if (start)
list_move(&start->entry->vm_status, &vm->pt_freed);
list_splice_init(&vm->pt_freed, &params->tlb_flush_waitlist);
spin_unlock(&vm->status_lock);
schedule_work(&vm->pt_free_work);
return;
}

for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
list_for_each_entry_safe(entry, next, &params->tlb_flush_waitlist, vm_status)
amdgpu_vm_pt_free(entry);
}

if (start)
amdgpu_vm_pt_free(start->entry);
/**
* amdgpu_vm_pt_add_list - add PD/PT level to the flush list
*
* @params: parameters for the update
* @cursor: first PT entry to start DF search from, non NULL
*
* This list will be freed after TLB flush.
*/
static void amdgpu_vm_pt_add_list(struct amdgpu_vm_update_params *params,
struct amdgpu_vm_pt_cursor *cursor)
{
struct amdgpu_vm_pt_cursor seek;
struct amdgpu_vm_bo_base *entry;

spin_lock(&params->vm->status_lock);
for_each_amdgpu_vm_pt_dfs_safe(params->adev, params->vm, cursor, seek, entry) {
if (entry && entry->bo)
list_move(&entry->vm_status, &params->tlb_flush_waitlist);
}

/* enter start node now */
list_move(&cursor->entry->vm_status, &params->tlb_flush_waitlist);
spin_unlock(&params->vm->status_lock);
}

/**
Expand All @@ -724,7 +742,11 @@ static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
*/
void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm)
{
amdgpu_vm_pt_free_dfs(adev, vm, NULL, false);
struct amdgpu_vm_pt_cursor cursor;
struct amdgpu_vm_bo_base *entry;

for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry)
amdgpu_vm_pt_free(entry);
}

/**
Expand Down Expand Up @@ -1057,9 +1079,7 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
/* Make sure previous mapping is freed */
if (cursor.entry->bo) {
params->needs_flush = true;
amdgpu_vm_pt_free_dfs(adev, params->vm,
&cursor,
params->unlocked);
amdgpu_vm_pt_add_list(params, &cursor);
}
amdgpu_vm_pt_next(adev, &cursor);
}
Expand Down

0 comments on commit 3bdf4a7

Please sign in to comment.