Skip to content

Commit

Permalink
drm/amd/display: fix a memleak issue when driver is removed
Browse files Browse the repository at this point in the history
[ Upstream commit d4f36e5 ]

Running "modprobe amdgpu" the second time (followed by a modprobe -r
amdgpu) causes a call trace like:

[  845.212163] Memory manager not clean during takedown.
[  845.212170] WARNING: CPU: 4 PID: 2481 at drivers/gpu/drm/drm_mm.c:999 drm_mm_takedown+0x2b/0x40
[  845.212177] Modules linked in: amdgpu(OE-) amddrm_ttm_helper(OE) amddrm_buddy(OE) amdxcp(OE) amd_sched(OE) drm_exec drm_suballoc_helper drm_display_helper i2c_algo_bit amdttm(OE) amdkcl(OE) cec rc_core sunrpc qrtr intel_rapl_msr intel_rapl_common snd_hda_codec_hdmi edac_mce_amd snd_hda_intel snd_intel_dspcfg snd_intel_sdw_acpi snd_usb_audio snd_hda_codec snd_usbmidi_lib kvm_amd snd_hda_core snd_ump mc snd_hwdep kvm snd_pcm snd_seq_midi snd_seq_midi_event irqbypass crct10dif_pclmul snd_rawmidi polyval_clmulni polyval_generic ghash_clmulni_intel sha256_ssse3 sha1_ssse3 snd_seq aesni_intel crypto_simd snd_seq_device cryptd snd_timer mfd_aaeon asus_nb_wmi eeepc_wmi joydev asus_wmi snd ledtrig_audio sparse_keymap ccp wmi_bmof input_leds k10temp i2c_piix4 platform_profile rapl soundcore gpio_amdpt mac_hid binfmt_misc msr parport_pc ppdev lp parport efi_pstore nfnetlink dmi_sysfs ip_tables x_tables autofs4 hid_logitech_hidpp hid_logitech_dj hid_generic usbhid hid ahci xhci_pci igc crc32_pclmul libahci xhci_pci_renesas video
[  845.212284]  wmi [last unloaded: amddrm_ttm_helper(OE)]
[  845.212290] CPU: 4 PID: 2481 Comm: modprobe Tainted: G        W  OE      6.8.0-31-generic torvalds#31-Ubuntu
[  845.212296] RIP: 0010:drm_mm_takedown+0x2b/0x40
[  845.212300] Code: 1f 44 00 00 48 8b 47 38 48 83 c7 38 48 39 f8 75 09 31 c0 31 ff e9 90 2e 86 00 55 48 c7 c7 d0 f6 8e 8a 48 89 e5 e8 f5 db 45 ff <0f> 0b 5d 31 c0 31 ff e9 74 2e 86 00 66 0f 1f 84 00 00 00 00 00 90
[  845.212302] RSP: 0018:ffffb11302127ae0 EFLAGS: 00010246
[  845.212305] RAX: 0000000000000000 RBX: ffff92aa5020fc08 RCX: 0000000000000000
[  845.212307] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
[  845.212309] RBP: ffffb11302127ae0 R08: 0000000000000000 R09: 0000000000000000
[  845.212310] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000004
[  845.212312] R13: ffff92aa50200000 R14: ffff92aa5020fb10 R15: ffff92aa5020faa0
[  845.212313] FS:  0000707dd7c7c080(0000) GS:ffff92b93de00000(0000) knlGS:0000000000000000
[  845.212316] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  845.212318] CR2: 00007d48b0aee200 CR3: 0000000115a58000 CR4: 0000000000f50ef0
[  845.212320] PKRU: 55555554
[  845.212321] Call Trace:
[  845.212323]  <TASK>
[  845.212328]  ? show_regs+0x6d/0x80
[  845.212333]  ? __warn+0x89/0x160
[  845.212339]  ? drm_mm_takedown+0x2b/0x40
[  845.212344]  ? report_bug+0x17e/0x1b0
[  845.212350]  ? handle_bug+0x51/0xa0
[  845.212355]  ? exc_invalid_op+0x18/0x80
[  845.212359]  ? asm_exc_invalid_op+0x1b/0x20
[  845.212366]  ? drm_mm_takedown+0x2b/0x40
[  845.212371]  amdgpu_gtt_mgr_fini+0xa9/0x130 [amdgpu]
[  845.212645]  amdgpu_ttm_fini+0x264/0x340 [amdgpu]
[  845.212770]  amdgpu_bo_fini+0x2e/0xc0 [amdgpu]
[  845.212894]  gmc_v12_0_sw_fini+0x2a/0x40 [amdgpu]
[  845.213036]  amdgpu_device_fini_sw+0x11a/0x590 [amdgpu]
[  845.213159]  amdgpu_driver_release_kms+0x16/0x40 [amdgpu]
[  845.213302]  devm_drm_dev_init_release+0x5e/0x90
[  845.213305]  devm_action_release+0x12/0x30
[  845.213308]  release_nodes+0x42/0xd0
[  845.213311]  devres_release_all+0x97/0xe0
[  845.213314]  device_unbind_cleanup+0x12/0x80
[  845.213317]  device_release_driver_internal+0x230/0x270
[  845.213319]  ? srso_alias_return_thunk+0x5/0xfbef5

This is caused by lost memory during early init phase. First time driver
is removed, memory is freed but when second time the driver is inserted,
VBIOS dmub is not active, since the PSP policy is to retain the driver
loaded version on subsequent warm boots. Hence, communication with VBIOS
DMUB fails.

Fix this by aborting further communication with vbios dmub and release
the memory immediately.

Fixes: f59549c ("drm/amd/display: free bo used for dmub bounding box")
Reviewed-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Signed-off-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Rodrigo Siqueira <rodrigo.siqueira@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
  • Loading branch information
Aurabindo Pillai authored and gregkh committed Dec 5, 2024
1 parent 4e5d50b commit e51cbe4
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 15 deletions.
32 changes: 28 additions & 4 deletions drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1681,6 +1681,26 @@ dm_allocate_gpu_mem(
return da->cpu_ptr;
}

void
dm_free_gpu_mem(
struct amdgpu_device *adev,
enum dc_gpu_mem_alloc_type type,
void *pvMem)
{
struct dal_allocation *da;

/* walk the da list in DM */
list_for_each_entry(da, &adev->dm.da_list, list) {
if (pvMem == da->cpu_ptr) {
amdgpu_bo_free_kernel(&da->bo, &da->gpu_addr, &da->cpu_ptr);
list_del(&da->list);
kfree(da);
break;
}
}

}

static enum dmub_status
dm_dmub_send_vbios_gpint_command(struct amdgpu_device *adev,
enum dmub_gpint_command command_code,
Expand Down Expand Up @@ -1747,16 +1767,20 @@ static struct dml2_soc_bb *dm_dmub_get_vbios_bounding_box(struct amdgpu_device *
/* Send the chunk */
ret = dm_dmub_send_vbios_gpint_command(adev, send_addrs[i], chunk, 30000);
if (ret != DMUB_STATUS_OK)
/* No need to free bb here since it shall be done in dm_sw_fini() */
return NULL;
goto free_bb;
}

/* Now ask DMUB to copy the bb */
ret = dm_dmub_send_vbios_gpint_command(adev, DMUB_GPINT__BB_COPY, 1, 200000);
if (ret != DMUB_STATUS_OK)
return NULL;
goto free_bb;

return bb;

free_bb:
dm_free_gpu_mem(adev, DC_MEM_ALLOC_TYPE_GART, (void *) bb);
return NULL;

}

static enum dmub_ips_disable_type dm_get_default_ips_mode(
Expand Down Expand Up @@ -2520,11 +2544,11 @@ static int dm_sw_fini(void *handle)
amdgpu_bo_free_kernel(&da->bo, &da->gpu_addr, &da->cpu_ptr);
list_del(&da->list);
kfree(da);
adev->dm.bb_from_dmub = NULL;
break;
}
}

adev->dm.bb_from_dmub = NULL;

kfree(adev->dm.dmub_fb_info);
adev->dm.dmub_fb_info = NULL;
Expand Down
3 changes: 3 additions & 0 deletions drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
Original file line number Diff line number Diff line change
Expand Up @@ -1004,6 +1004,9 @@ void *dm_allocate_gpu_mem(struct amdgpu_device *adev,
enum dc_gpu_mem_alloc_type type,
size_t size,
long long *addr);
void dm_free_gpu_mem(struct amdgpu_device *adev,
enum dc_gpu_mem_alloc_type type,
void *addr);

bool amdgpu_dm_is_headless(struct amdgpu_device *adev);

Expand Down
13 changes: 2 additions & 11 deletions drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -1055,17 +1055,8 @@ void dm_helpers_free_gpu_mem(
void *pvMem)
{
struct amdgpu_device *adev = ctx->driver_context;
struct dal_allocation *da;

/* walk the da list in DM */
list_for_each_entry(da, &adev->dm.da_list, list) {
if (pvMem == da->cpu_ptr) {
amdgpu_bo_free_kernel(&da->bo, &da->gpu_addr, &da->cpu_ptr);
list_del(&da->list);
kfree(da);
break;
}
}

dm_free_gpu_mem(adev, type, pvMem);
}

bool dm_helpers_dmub_outbox_interrupt_control(struct dc_context *ctx, bool enable)
Expand Down

0 comments on commit e51cbe4

Please sign in to comment.