diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index bb3fd53950dda..ac134d4c0531e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4238,12 +4238,17 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, return r; } -static bool amdgpu_device_lock_adev(struct amdgpu_device *adev) +static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, + struct amdgpu_hive_info *hive) { if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0) return false; - down_write(&adev->reset_sem); + if (hive) { + down_write_nest_lock(&adev->reset_sem, &hive->hive_lock); + } else { + down_write(&adev->reset_sem); + } atomic_inc(&adev->gpu_reset_counter); switch (amdgpu_asic_reset_method(adev)) { @@ -4406,7 +4411,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, /* block all schedulers and reset given job's ring */ list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { - if (!amdgpu_device_lock_adev(tmp_adev)) { + if (!amdgpu_device_lock_adev(tmp_adev, hive)) { dev_info(tmp_adev->dev, "Bailing on TDR for s_job:%llx, as another already in progress", job ? job->base.id : -1); r = 0;