Skip to content

Commit

Permalink
acpi/ghes: Remove CXL CPER notifications
Browse files Browse the repository at this point in the history
Initial tests with the CXL CPER implementation identified that error
reports were being duplicated in the log and the trace event [1].  Then
it was discovered that the notification handler took sleeping locks
while the GHES event handling runs in spin_lock_irqsave() context [2]

While the duplicate reporting was fixed in v6.8-rc4, the fix for the
sleeping-lock-vs-atomic collision would enjoy more time to settle and
gain some test cycles.  Given how late it is in the development cycle,
remove the CXL hookup for now and try again during the next merge
window.

Note that end result is that v6.8 does not emit CXL CPER payloads to the
kernel log, but this is in line with the CXL trend to move error
reporting to trace events instead of the kernel log.

Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Link: http://lore.kernel.org/r/20240108165855.00002f5a@Huawei.com [1]
Closes: http://lore.kernel.org/r/b963c490-2c13-4b79-bbe7-34c6568423c7@moroto.mountain [2]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
  • Loading branch information
djbw authored and pull[bot] committed Feb 26, 2024
1 parent 7774ee6 commit c050c85
Show file tree
Hide file tree
Showing 3 changed files with 1 addition and 137 deletions.
63 changes: 0 additions & 63 deletions drivers/acpi/apei/ghes.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
#include <linux/interrupt.h>
#include <linux/timer.h>
#include <linux/cper.h>
#include <linux/cxl-event.h>
#include <linux/platform_device.h>
#include <linux/mutex.h>
#include <linux/ratelimit.h>
Expand Down Expand Up @@ -674,52 +673,6 @@ static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata,
schedule_work(&entry->work);
}

/*
* Only a single callback can be registered for CXL CPER events.
*/
static DECLARE_RWSEM(cxl_cper_rw_sem);
static cxl_cper_callback cper_callback;

static void cxl_cper_post_event(enum cxl_event_type event_type,
struct cxl_cper_event_rec *rec)
{
if (rec->hdr.length <= sizeof(rec->hdr) ||
rec->hdr.length > sizeof(*rec)) {
pr_err(FW_WARN "CXL CPER Invalid section length (%u)\n",
rec->hdr.length);
return;
}

if (!(rec->hdr.validation_bits & CPER_CXL_COMP_EVENT_LOG_VALID)) {
pr_err(FW_WARN "CXL CPER invalid event\n");
return;
}

guard(rwsem_read)(&cxl_cper_rw_sem);
if (cper_callback)
cper_callback(event_type, rec);
}

int cxl_cper_register_callback(cxl_cper_callback callback)
{
guard(rwsem_write)(&cxl_cper_rw_sem);
if (cper_callback)
return -EINVAL;
cper_callback = callback;
return 0;
}
EXPORT_SYMBOL_NS_GPL(cxl_cper_register_callback, CXL);

int cxl_cper_unregister_callback(cxl_cper_callback callback)
{
guard(rwsem_write)(&cxl_cper_rw_sem);
if (callback != cper_callback)
return -EINVAL;
cper_callback = NULL;
return 0;
}
EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_callback, CXL);

static bool ghes_do_proc(struct ghes *ghes,
const struct acpi_hest_generic_status *estatus)
{
Expand Down Expand Up @@ -754,22 +707,6 @@ static bool ghes_do_proc(struct ghes *ghes,
}
else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
queued = ghes_handle_arm_hw_error(gdata, sev, sync);
} else if (guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID)) {
struct cxl_cper_event_rec *rec =
acpi_hest_get_payload(gdata);

cxl_cper_post_event(CXL_CPER_EVENT_GEN_MEDIA, rec);
} else if (guid_equal(sec_type, &CPER_SEC_CXL_DRAM_GUID)) {
struct cxl_cper_event_rec *rec =
acpi_hest_get_payload(gdata);

cxl_cper_post_event(CXL_CPER_EVENT_DRAM, rec);
} else if (guid_equal(sec_type,
&CPER_SEC_CXL_MEM_MODULE_GUID)) {
struct cxl_cper_event_rec *rec =
acpi_hest_get_payload(gdata);

cxl_cper_post_event(CXL_CPER_EVENT_MEM_MODULE, rec);
} else {
void *err = acpi_hest_get_payload(gdata);

Expand Down
57 changes: 1 addition & 56 deletions drivers/cxl/pci.c
Original file line number Diff line number Diff line change
Expand Up @@ -974,61 +974,6 @@ static struct pci_driver cxl_pci_driver = {
},
};

#define CXL_EVENT_HDR_FLAGS_REC_SEVERITY GENMASK(1, 0)
static void cxl_cper_event_call(enum cxl_event_type ev_type,
struct cxl_cper_event_rec *rec)
{
struct cper_cxl_event_devid *device_id = &rec->hdr.device_id;
struct pci_dev *pdev __free(pci_dev_put) = NULL;
enum cxl_event_log_type log_type;
struct cxl_dev_state *cxlds;
unsigned int devfn;
u32 hdr_flags;

devfn = PCI_DEVFN(device_id->device_num, device_id->func_num);
pdev = pci_get_domain_bus_and_slot(device_id->segment_num,
device_id->bus_num, devfn);
if (!pdev)
return;

guard(pci_dev)(pdev);
if (pdev->driver != &cxl_pci_driver)
return;

cxlds = pci_get_drvdata(pdev);
if (!cxlds)
return;

/* Fabricate a log type */
hdr_flags = get_unaligned_le24(rec->event.generic.hdr.flags);
log_type = FIELD_GET(CXL_EVENT_HDR_FLAGS_REC_SEVERITY, hdr_flags);

cxl_event_trace_record(cxlds->cxlmd, log_type, ev_type,
&uuid_null, &rec->event);
}

static int __init cxl_pci_driver_init(void)
{
int rc;

rc = cxl_cper_register_callback(cxl_cper_event_call);
if (rc)
return rc;

rc = pci_register_driver(&cxl_pci_driver);
if (rc)
cxl_cper_unregister_callback(cxl_cper_event_call);

return rc;
}

static void __exit cxl_pci_driver_exit(void)
{
pci_unregister_driver(&cxl_pci_driver);
cxl_cper_unregister_callback(cxl_cper_event_call);
}

module_init(cxl_pci_driver_init);
module_exit(cxl_pci_driver_exit);
module_pci_driver(cxl_pci_driver);
MODULE_LICENSE("GPL v2");
MODULE_IMPORT_NS(CXL);
18 changes: 0 additions & 18 deletions include/linux/cxl-event.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,22 +140,4 @@ struct cxl_cper_event_rec {
union cxl_event event;
} __packed;

typedef void (*cxl_cper_callback)(enum cxl_event_type type,
struct cxl_cper_event_rec *rec);

#ifdef CONFIG_ACPI_APEI_GHES
int cxl_cper_register_callback(cxl_cper_callback callback);
int cxl_cper_unregister_callback(cxl_cper_callback callback);
#else
static inline int cxl_cper_register_callback(cxl_cper_callback callback)
{
return 0;
}

static inline int cxl_cper_unregister_callback(cxl_cper_callback callback)
{
return 0;
}
#endif

#endif /* _LINUX_CXL_EVENT_H */

0 comments on commit c050c85

Please sign in to comment.