Skip to content

Commit

Permalink
drm/amdgpu: Increase soft IH ring size
Browse files Browse the repository at this point in the history
[ Upstream commit bf80d34 ]

Retry faults are delegated to soft IH ring and then processed by
deferred worker. Current soft IH ring size PAGE_SIZE can store 128
entries, which may overflow and drop retry faults, causes HW stucks
because the retry fault is not recovered.

Increase soft IH ring size to 8KB, enough to store 256 CAM entries
because we clear the CAM entry after handling the retry fault from soft
ring.

Define macro IH_RING_SIZE and IH_SW_RING_SIZE to remove duplicate
constant.

Show warning message if soft IH ring overflows with CAM enabled because
this should not happen.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
  • Loading branch information
PhilipYangA authored and gregkh committed Sep 23, 2023
1 parent 3ea9058 commit 02b8d71
Show file tree
Hide file tree
Showing 7 changed files with 20 additions and 13 deletions.
8 changes: 6 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
Original file line number Diff line number Diff line change
Expand Up @@ -138,15 +138,16 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
/**
* amdgpu_ih_ring_write - write IV to the ring buffer
*
* @adev: amdgpu_device pointer
* @ih: ih ring to write to
* @iv: the iv to write
* @num_dw: size of the iv in dw
*
* Writes an IV to the ring buffer using the CPU and increment the wptr.
* Used for testing and delegating IVs to a software ring.
*/
void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
unsigned int num_dw)
void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
const uint32_t *iv, unsigned int num_dw)
{
uint32_t wptr = le32_to_cpu(*ih->wptr_cpu) >> 2;
unsigned int i;
Expand All @@ -161,6 +162,9 @@ void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
if (wptr != READ_ONCE(ih->rptr)) {
wmb();
WRITE_ONCE(*ih->wptr_cpu, cpu_to_le32(wptr));
} else if (adev->irq.retry_cam_enabled) {
dev_warn_once(adev->dev, "IH soft ring buffer overflow 0x%X, 0x%X\n",
wptr, ih->rptr);
}
}

Expand Down
7 changes: 5 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@
/* Maximum number of IVs processed at once */
#define AMDGPU_IH_MAX_NUM_IVS 32

#define IH_RING_SIZE (256 * 1024)
#define IH_SW_RING_SIZE (8 * 1024) /* enough for 256 CAM entries */

struct amdgpu_device;
struct amdgpu_iv_entry;

Expand Down Expand Up @@ -97,8 +100,8 @@ struct amdgpu_ih_funcs {
int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
unsigned ring_size, bool use_bus_addr);
void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
unsigned int num_dw);
void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
const uint32_t *iv, unsigned int num_dw);
int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev,
struct amdgpu_ih_ring *ih);
int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,7 @@ void amdgpu_irq_delegate(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry,
unsigned int num_dw)
{
amdgpu_ih_ring_write(&adev->irq.ih_soft, entry->iv_entry, num_dw);
amdgpu_ih_ring_write(adev, &adev->irq.ih_soft, entry->iv_entry, num_dw);
schedule_work(&adev->irq.ih_soft_work);
}

Expand Down
4 changes: 2 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ static int ih_v6_0_sw_init(void *handle)
* use bus address for ih ring by psp bl */
use_bus_addr =
(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) ? false : true;
r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr);
r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE, use_bus_addr);
if (r)
return r;

Expand All @@ -548,7 +548,7 @@ static int ih_v6_0_sw_init(void *handle)
/* initialize ih control register offset */
ih_v6_0_init_register_offset(adev);

r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true);
r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true);
if (r)
return r;

Expand Down
4 changes: 2 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/navi10_ih.c
Original file line number Diff line number Diff line change
Expand Up @@ -565,7 +565,7 @@ static int navi10_ih_sw_init(void *handle)
use_bus_addr = false;
else
use_bus_addr = true;
r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr);
r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE, use_bus_addr);
if (r)
return r;

Expand All @@ -578,7 +578,7 @@ static int navi10_ih_sw_init(void *handle)
/* initialize ih control registers offset */
navi10_ih_init_register_offset(adev);

r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true);
r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true);
if (r)
return r;

Expand Down
4 changes: 2 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/vega10_ih.c
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,7 @@ static int vega10_ih_sw_init(void *handle)
if (r)
return r;

r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, true);
r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE, true);
if (r)
return r;

Expand All @@ -510,7 +510,7 @@ static int vega10_ih_sw_init(void *handle)
/* initialize ih control registers offset */
vega10_ih_init_register_offset(adev);

r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true);
r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true);
if (r)
return r;

Expand Down
4 changes: 2 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/vega20_ih.c
Original file line number Diff line number Diff line change
Expand Up @@ -539,7 +539,7 @@ static int vega20_ih_sw_init(void *handle)
(adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 2)))
use_bus_addr = false;

r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr);
r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE, use_bus_addr);
if (r)
return r;

Expand All @@ -565,7 +565,7 @@ static int vega20_ih_sw_init(void *handle)
/* initialize ih control registers offset */
vega20_ih_init_register_offset(adev);

r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, use_bus_addr);
r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, use_bus_addr);
if (r)
return r;

Expand Down

0 comments on commit 02b8d71

Please sign in to comment.