Skip to content

Commit

Permalink
x86/mce/zhaoxin: Enable mcelog to decode PCIE, ZDI/ZPI, and DRAM errors
Browse files Browse the repository at this point in the history
zhaoxin inclusion
category: bugfix

-------------------

The mcelog cannot decode PCIE, ZDI/ZPI, and DRAM errors in the FFM
(Firmware First Mode).
The purpose of this patch is to enable mcelog to decode PCIE, ZDI/ZPI, and
DRAM errors that occur on Zhaoxin processors, so that the cause of these
errors can be quickly located.

Signed-off-by: leoliu-oc <leoliu-oc@zhaoxin.com>
  • Loading branch information
leoliu-oc committed Jul 10, 2024
1 parent 19b7869 commit 85ff5df
Show file tree
Hide file tree
Showing 6 changed files with 232 additions and 1 deletion.
6 changes: 6 additions & 0 deletions arch/x86/include/asm/mce.h
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,12 @@ struct cper_sec_mem_err;
extern void apei_mce_report_mem_error(int corrected,
struct cper_sec_mem_err *mem_err);

extern void zx_apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err);
struct cper_sec_pcie;
extern void zx_apei_mce_report_pcie_error(int corrected, struct cper_sec_pcie *pcie_err);
struct cper_sec_proc_generic;
extern void zx_apei_mce_report_zdi_error(int corrected, struct cper_sec_proc_generic *zdi_err);

/*
* Enumerate new IP types and HWID values in AMD processors which support
* Scalable MCA.
Expand Down
24 changes: 23 additions & 1 deletion arch/x86/kernel/acpi/apei.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,29 @@ int arch_apei_enable_cmcff(struct acpi_hest_header *hest_hdr, void *data)
void arch_apei_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
{
#ifdef CONFIG_X86_MCE
apei_mce_report_mem_error(sev, mem_err);
if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN ||
boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR)
zx_apei_mce_report_mem_error(sev, mem_err);
else
apei_mce_report_mem_error(sev, mem_err);
#endif
}

void arch_apei_report_pcie_error(int sev, struct cper_sec_pcie *pcie_err)
{
#ifdef CONFIG_X86_MCE
if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN ||
boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR)
zx_apei_mce_report_pcie_error(sev, pcie_err);
#endif
}

void arch_apei_report_zdi_error(int sev, struct cper_sec_proc_generic *zdi_err)
{
#ifdef CONFIG_X86_MCE
if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN ||
boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR)
zx_apei_mce_report_zdi_error(sev, zdi_err);
#endif
}

Expand Down
165 changes: 165 additions & 0 deletions arch/x86/kernel/cpu/mce/apei.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,171 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
}
EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);

void zx_apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
{
struct mce m;
int apei_error = 0;

if (boot_cpu_data.x86 != 7 || boot_cpu_data.x86_model != 91)
return;

if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
return;

mce_setup(&m);
m.misc = 0;
m.misc = mem_err->module;
m.addr = mem_err->physical_addr;
if (mem_err->card == 0)
m.bank = 9;
else
m.bank = 10;

switch (mem_err->error_type) {
case 2:
m.status = 0x9c20004000010080;
break;
case 3:
m.status = 0xbe40000000020090;
apei_error = apei_write_mce(&m);
break;
case 8:
if (mem_err->requestor_id == 2)
m.status = 0x98200040000400b0;
else if (mem_err->requestor_id == 3) {
m.status = 0xba400000000600a0;
apei_error = apei_write_mce(&m);
} else if (mem_err->requestor_id == 4)
m.status = 0x98200100000300b0;
else if (mem_err->requestor_id == 5) {
m.status = 0xba000000000500b0;
apei_error = apei_write_mce(&m);
} else
pr_info("Undefined Parity error\n");
break;
case 10:
if (mem_err->requestor_id == 6) {
m.status = 0xba400000000700a0;
apei_error = apei_write_mce(&m);
} else if (mem_err->requestor_id == 7) {
m.status = 0xba000000000800b0;
apei_error = apei_write_mce(&m);
} else
pr_info("Undefined dvad error\n");
break;
case 13:
m.status = 0x9c200040000100c0;
break;
case 14:
m.status = 0xbd000000000200c0;
apei_error = apei_write_mce(&m);
break;
}
mce_log(&m);
}
EXPORT_SYMBOL_GPL(zx_apei_mce_report_mem_error);

void zx_apei_mce_report_pcie_error(int severity, struct cper_sec_pcie *pcie_err)
{
struct mce m;
int apei_error = 0;

if (boot_cpu_data.x86 != 7 || boot_cpu_data.x86_model != 91)
return;

mce_setup(&m);
m.addr = 0;
m.misc = 0;
m.misc |= (u64)pcie_err->device_id.segment << 32;
m.misc |= pcie_err->device_id.bus << 24;
m.misc |= pcie_err->device_id.device << 19;
m.misc |= pcie_err->device_id.function << 16;
m.bank = 6;

switch (severity) {
case 1:
m.status = 0x9820004000020e0b;
break;
case 2:
m.status = 0xba20000000010e0b;
break;
case 3:
m.status = 0xbd20000000000e0b;
apei_error = apei_write_mce(&m);
break;
default:
pr_info("Undefine pcie error\n");
break;
}
mce_log(&m);
}
EXPORT_SYMBOL_GPL(zx_apei_mce_report_pcie_error);

void zx_apei_mce_report_zdi_error(int severity, struct cper_sec_proc_generic *zdi_err)
{
struct mce m;
int apei_error = 0;

if (boot_cpu_data.x86 != 7 || boot_cpu_data.x86_model != 91)
return;

mce_setup(&m);
m.misc = 0;
m.misc |= (zdi_err->requestor_id & 0xff) << 19;
m.misc |= ((zdi_err->requestor_id & 0xff00) >> 8) >> 24;
m.bank = 5;
switch (zdi_err->responder_id) {
case 2:
m.status = 0xba00000000040e0f;
apei_error = apei_write_mce(&m);
break;
case 3:
m.status = 0xba00000000030e0f;
apei_error = apei_write_mce(&m);
break;
case 4:
m.status = 0xba00000000020e0f;
apei_error = apei_write_mce(&m);
break;
case 5:
m.status = 0xba00000000010e0f;
apei_error = apei_write_mce(&m);
break;
case 6:
m.status = 0x9820004000090e0f;
break;
case 7:
m.status = 0x9820004000080e0f;
break;
case 8:
m.status = 0x9820004000070e0f;
break;
case 9:
m.status = 0x9820004000060e0f;
break;
case 10:
m.status = 0x9820004000050e0f;
break;
case 11:
case 12:
case 13:
case 14:
case 15:
m.status = 0x98200040000b0e0f;
break;
case 16:
case 17:
case 18:
m.status = 0x98200040000c0e0f;
break;
default:
pr_info("Undefined ZDI Error\n");
break;
}
mce_log(&m);
}
EXPORT_SYMBOL_GPL(zx_apei_mce_report_zdi_error);

int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
{
const u64 *i_mce = ((const u64 *) (ctx_info + 1));
Expand Down
10 changes: 10 additions & 0 deletions drivers/acpi/apei/apei-base.c
Original file line number Diff line number Diff line change
Expand Up @@ -773,6 +773,16 @@ void __weak arch_apei_report_mem_error(int sev,
}
EXPORT_SYMBOL_GPL(arch_apei_report_mem_error);

void __weak arch_apei_report_pcie_error(int sev, struct cper_sec_pcie *pcie_err)
{
}
EXPORT_SYMBOL_GPL(arch_apei_report_pcie_error);

void __weak arch_apei_report_zdi_error(int sev, struct cper_sec_proc_generic *zdi_err)
{
}
EXPORT_SYMBOL_GPL(arch_apei_report_zdi_error);

int apei_osc_setup(void)
{
static u8 whea_uuid_str[] = "ed855e0c-6c90-47bf-a62a-26de0fc5ad5c";
Expand Down
26 changes: 26 additions & 0 deletions drivers/acpi/apei/ghes.c
Original file line number Diff line number Diff line change
Expand Up @@ -703,10 +703,17 @@ static bool ghes_do_proc(struct ghes *ghes,
queued = ghes_handle_memory_failure(gdata, sev, sync);
}
else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata);

arch_apei_report_pcie_error(sec_sev, pcie_err);
ghes_handle_aer(gdata);
}
else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
queued = ghes_handle_arm_hw_error(gdata, sev, sync);
} else if (guid_equal(sec_type, &CPER_SEC_PROC_GENERIC)) {
struct cper_sec_proc_generic *zdi_err = acpi_hest_get_payload(gdata);

arch_apei_report_zdi_error(sec_sev, zdi_err);
} else {
void *err = acpi_hest_get_payload(gdata);

Expand Down Expand Up @@ -1091,6 +1098,8 @@ static int ghes_in_nmi_queue_one_entry(struct ghes *ghes,
u32 len, node_len;
u64 buf_paddr;
int sev, rc;
struct acpi_hest_generic_data *gdata;
guid_t *sec_type;

if (!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG))
return -EOPNOTSUPP;
Expand Down Expand Up @@ -1126,6 +1135,23 @@ static int ghes_in_nmi_queue_one_entry(struct ghes *ghes,

sev = ghes_severity(estatus->error_severity);
if (sev >= GHES_SEV_PANIC) {
apei_estatus_for_each_section(estatus, gdata) {
sec_type = (guid_t *)gdata->section_type;
if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);

arch_apei_report_mem_error(sev, mem_err);
} else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata);

arch_apei_report_pcie_error(sev, pcie_err);
} else if (guid_equal(sec_type, &CPER_SEC_PROC_GENERIC)) {
struct cper_sec_proc_generic *zdi_err =
acpi_hest_get_payload(gdata);

arch_apei_report_zdi_error(sev, zdi_err);
}
}
ghes_print_queued_estatus();
__ghes_panic(ghes, estatus, buf_paddr, fixmap_idx);
}
Expand Down
2 changes: 2 additions & 0 deletions include/acpi/apei.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ int erst_clear(u64 record_id);

int arch_apei_enable_cmcff(struct acpi_hest_header *hest_hdr, void *data);
void arch_apei_report_mem_error(int sev, struct cper_sec_mem_err *mem_err);
void arch_apei_report_pcie_error(int sev, struct cper_sec_pcie *pcie_err);
void arch_apei_report_zdi_error(int sev, struct cper_sec_proc_generic *zdi_err);

#endif
#endif

0 comments on commit 85ff5df

Please sign in to comment.