Skip to content

Commit

Permalink
drm/amdgpu: create function to check RAS RMA status
Browse files Browse the repository at this point in the history
In the convenience of calling it globally.

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
  • Loading branch information
Tao Zhou authored and alexdeucher committed Aug 6, 2024
1 parent 64a9052 commit 792be2e
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 8 deletions.
22 changes: 16 additions & 6 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
Original file line number Diff line number Diff line change
Expand Up @@ -2153,7 +2153,7 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *
/* gpu reset is fallback for failed and default cases.
* For RMA case, amdgpu_umc_poison_handler will handle gpu reset.
*/
if (poison_stat && !con->is_rma) {
if (poison_stat && !amdgpu_ras_is_rma(adev)) {
event_id = amdgpu_ras_acquire_event_id(adev, type);
RAS_EVENT_LOG(adev, event_id,
"GPU reset for %s RAS poison consumption is issued!\n",
Expand Down Expand Up @@ -2945,7 +2945,7 @@ static void amdgpu_ras_do_page_retirement(struct work_struct *work)

amdgpu_ras_error_data_fini(&err_data);

if (err_cnt && con->is_rma)
if (err_cnt && amdgpu_ras_is_rma(adev))
amdgpu_ras_reset_gpu(adev);

amdgpu_ras_schedule_retirement_dwork(con,
Expand Down Expand Up @@ -3046,7 +3046,7 @@ static int amdgpu_ras_poison_consumption_handler(struct amdgpu_device *adev,
}

/* for RMA, amdgpu_ras_poison_creation_handler will trigger gpu reset */
if (reset_flags && !con->is_rma) {
if (reset_flags && !amdgpu_ras_is_rma(adev)) {
if (reset_flags & AMDGPU_RAS_GPU_RESET_MODE1_RESET)
reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
else if (reset_flags & AMDGPU_RAS_GPU_RESET_MODE2_RESET)
Expand Down Expand Up @@ -3192,7 +3192,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
* This calling fails when is_rma is true or
* ret != 0.
*/
if (con->is_rma || ret)
if (amdgpu_ras_is_rma(adev) || ret)
goto free;

if (con->eeprom_control.ras_num_recs) {
Expand Down Expand Up @@ -3241,7 +3241,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
* Except error threshold exceeding case, other failure cases in this
* function would not fail amdgpu driver init.
*/
if (!con->is_rma)
if (!amdgpu_ras_is_rma(adev))
ret = 0;
else
ret = -EINVAL;
Expand Down Expand Up @@ -4284,7 +4284,7 @@ int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);

/* mode1 is the only selection for RMA status */
if (ras->is_rma) {
if (amdgpu_ras_is_rma(adev)) {
ras->gpu_reset_flags = 0;
ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET;
}
Expand Down Expand Up @@ -4824,3 +4824,13 @@ void amdgpu_ras_event_log_print(struct amdgpu_device *adev, u64 event_id,

va_end(args);
}

bool amdgpu_ras_is_rma(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);

if (!con)
return false;

return con->is_rma;
}
1 change: 1 addition & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
Original file line number Diff line number Diff line change
Expand Up @@ -974,4 +974,5 @@ __printf(3, 4)
void amdgpu_ras_event_log_print(struct amdgpu_device *adev, u64 event_id,
const char *fmt, ...);

bool amdgpu_ras_is_rma(struct amdgpu_device *adev);
#endif
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
amdgpu_umc_handle_bad_pages(adev, ras_error_status);

if ((err_data->ue_count || err_data->de_count) &&
(reset || (con && con->is_rma))) {
(reset || amdgpu_ras_is_rma(adev))) {
con->gpu_reset_flags |= reset;
amdgpu_ras_reset_gpu(adev);
}
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ static int gfx_v11_0_3_poison_consumption_handler(struct amdgpu_device *adev,
ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE2_RESET;
}

if (con && !con->is_rma)
if (con && !amdgpu_ras_is_rma(adev))
amdgpu_ras_reset_gpu(adev);
}

Expand Down

0 comments on commit 792be2e

Please sign in to comment.