[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250603172239.159260-11-terry.bowman@amd.com>
Date: Tue, 3 Jun 2025 12:22:33 -0500
From: Terry Bowman <terry.bowman@....com>
To: <PradeepVineshReddy.Kodamati@....com>, <dave@...olabs.net>,
<jonathan.cameron@...wei.com>, <dave.jiang@...el.com>,
<alison.schofield@...el.com>, <vishal.l.verma@...el.com>,
<ira.weiny@...el.com>, <dan.j.williams@...el.com>, <bhelgaas@...gle.com>,
<bp@...en8.de>, <ming.li@...omail.com>, <shiju.jose@...wei.com>,
<dan.carpenter@...aro.org>, <Smita.KoralahalliChannabasappa@....com>,
<kobayashi.da-06@...itsu.com>, <terry.bowman@....com>, <yanfei.xu@...el.com>,
<rrichter@....com>, <peterz@...radead.org>, <colyli@...e.de>,
<uaisheng.ye@...el.com>, <fabio.m.de.francesco@...ux.intel.com>,
<ilpo.jarvinen@...ux.intel.com>, <yazen.ghannam@....com>,
<linux-cxl@...r.kernel.org>, <linux-kernel@...r.kernel.org>,
<linux-pci@...r.kernel.org>
Subject: [PATCH v9 10/16] cxl/pci: Unify CXL trace logging for CXL Endpoints and CXL Ports
CXL currently has separate trace routines for CXL Port errors and CXL
Endpoint errors. This is inconvenient for the user because they must enable
2 sets of trace routines. Make updates to the trace logging such that a
single trace routine logs both CXL Endpoint and CXL Port protocol errors.
Rename the 'host' field from the CXL Endpoint trace to 'parent' in the
unified trace routines. 'host' does not correctly apply to CXL Port
devices. Parent is more general and applies to CXL Port devices and CXL
Endpoints.
Add serial number parameter to the trace logging. This is used for EPs
and 0 is provided for CXL port devices without a serial number.
Below is output of correctable and uncorrectable protocol error logging.
CXL Root Port and CXL Endpoint examples are included below.
Root Port:
cxl_aer_correctable_error: device=0000:0c:00.0 parent=pci0000:0c serial: 0 status='CRC Threshold Hit'
cxl_aer_uncorrectable_error: device=0000:0c:00.0 parent=pci0000:0c serial: 0 status: 'Cache Byte Enable Parity Error' first_error: 'Cache Byte Enable Parity Error'
Endpoint:
cxl_aer_correctable_error: device=mem3 parent=0000:0f:00.0 serial=0 status='CRC Threshold Hit'
cxl_aer_uncorrectable_error: device=mem3 parent=0000:0f:00.0 serial: 0 status: 'Cache Byte Enable Parity Error' first_error: 'Cache Byte Enable Parity Error'
Signed-off-by: Terry Bowman <terry.bowman@....com>
---
drivers/cxl/core/pci.c | 18 +++++----
drivers/cxl/core/ras.c | 14 ++++---
drivers/cxl/core/trace.h | 84 +++++++++-------------------------------
3 files changed, 37 insertions(+), 79 deletions(-)
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 186a5a20b951..0f4c07fd64a5 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -664,7 +664,7 @@ void read_cdat_data(struct cxl_port *port)
}
EXPORT_SYMBOL_NS_GPL(read_cdat_data, "CXL");
-static void __cxl_handle_cor_ras(struct device *dev,
+static void __cxl_handle_cor_ras(struct device *dev, u64 serial,
void __iomem *ras_base)
{
void __iomem *addr;
@@ -679,13 +679,13 @@ static void __cxl_handle_cor_ras(struct device *dev,
status = readl(addr);
if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
- trace_cxl_aer_correctable_error(to_cxl_memdev(dev), status);
+ trace_cxl_aer_correctable_error(dev, serial, status);
}
}
static void cxl_handle_endpoint_cor_ras(struct cxl_dev_state *cxlds)
{
- return __cxl_handle_cor_ras(&cxlds->cxlmd->dev, cxlds->regs.ras);
+ return __cxl_handle_cor_ras(&cxlds->cxlmd->dev, cxlds->serial, cxlds->regs.ras);
}
/* CXL spec rev3.0 8.2.4.16.1 */
@@ -709,7 +709,8 @@ static void header_log_copy(void __iomem *ras_base, u32 *log)
* Log the state of the RAS status registers and prepare them to log the
* next error status. Return 1 if reset needed.
*/
-static bool __cxl_handle_ras(struct device *dev, void __iomem *ras_base)
+static bool __cxl_handle_ras(struct device *dev, u64 serial,
+ void __iomem *ras_base)
{
u32 hl[CXL_HEADERLOG_SIZE_U32];
void __iomem *addr;
@@ -738,7 +739,7 @@ static bool __cxl_handle_ras(struct device *dev, void __iomem *ras_base)
}
header_log_copy(ras_base, hl);
- trace_cxl_aer_uncorrectable_error(to_cxl_memdev(dev), status, fe, hl);
+ trace_cxl_aer_uncorrectable_error(dev, serial, status, fe, hl);
writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr);
return true;
@@ -746,7 +747,8 @@ static bool __cxl_handle_ras(struct device *dev, void __iomem *ras_base)
static bool cxl_handle_endpoint_ras(struct cxl_dev_state *cxlds)
{
- return __cxl_handle_ras(&cxlds->cxlmd->dev, cxlds->regs.ras);
+
+ return __cxl_handle_ras(&cxlds->cxlmd->dev, cxlds->serial, cxlds->regs.ras);
}
#ifdef CONFIG_PCIEAER_CXL
@@ -754,13 +756,13 @@ static bool cxl_handle_endpoint_ras(struct cxl_dev_state *cxlds)
static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds,
struct cxl_dport *dport)
{
- return __cxl_handle_cor_ras(&cxlds->cxlmd->dev, dport->regs.ras);
+ return __cxl_handle_cor_ras(&cxlds->cxlmd->dev, cxlds->serial, dport->regs.ras);
}
static bool cxl_handle_rdport_ras(struct cxl_dev_state *cxlds,
struct cxl_dport *dport)
{
- return __cxl_handle_ras(&cxlds->cxlmd->dev, dport->regs.ras);
+ return __cxl_handle_ras(&cxlds->cxlmd->dev, cxlds->serial, dport->regs.ras);
}
/*
diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
index 715f7221ea3a..0ef8c2068c0c 100644
--- a/drivers/cxl/core/ras.c
+++ b/drivers/cxl/core/ras.c
@@ -13,7 +13,7 @@ static void cxl_cper_trace_corr_port_prot_err(struct pci_dev *pdev,
{
u32 status = ras_cap.cor_status & ~ras_cap.cor_mask;
- trace_cxl_port_aer_correctable_error(&pdev->dev, status);
+ trace_cxl_aer_correctable_error(&pdev->dev, 0, status);
}
static void cxl_cper_trace_uncorr_port_prot_err(struct pci_dev *pdev,
@@ -28,8 +28,8 @@ static void cxl_cper_trace_uncorr_port_prot_err(struct pci_dev *pdev,
else
fe = status;
- trace_cxl_port_aer_uncorrectable_error(&pdev->dev, status, fe,
- ras_cap.header_log);
+ trace_cxl_aer_uncorrectable_error(&pdev->dev, 0, status, fe,
+ ras_cap.header_log);
}
static void cxl_cper_trace_corr_prot_err(struct pci_dev *pdev,
@@ -42,7 +42,8 @@ static void cxl_cper_trace_corr_prot_err(struct pci_dev *pdev,
if (!cxlds)
return;
- trace_cxl_aer_correctable_error(cxlds->cxlmd, status);
+ trace_cxl_aer_correctable_error(&cxlds->cxlmd->dev, cxlds->serial,
+ status);
}
static void cxl_cper_trace_uncorr_prot_err(struct pci_dev *pdev,
@@ -62,8 +63,9 @@ static void cxl_cper_trace_uncorr_prot_err(struct pci_dev *pdev,
else
fe = status;
- trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe,
- ras_cap.header_log);
+ trace_cxl_aer_uncorrectable_error(&cxlds->cxlmd->dev,
+ cxlds->serial, status,
+ fe, ras_cap.header_log);
}
static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data)
diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h
index 25ebfbc1616c..8c91b0f3d165 100644
--- a/drivers/cxl/core/trace.h
+++ b/drivers/cxl/core/trace.h
@@ -48,49 +48,22 @@
{ CXL_RAS_UC_IDE_RX_ERR, "IDE Rx Error" } \
)
-TRACE_EVENT(cxl_port_aer_uncorrectable_error,
- TP_PROTO(struct device *dev, u32 status, u32 fe, u32 *hl),
- TP_ARGS(dev, status, fe, hl),
- TP_STRUCT__entry(
- __string(device, dev_name(dev))
- __string(host, dev_name(dev->parent))
- __field(u32, status)
- __field(u32, first_error)
- __array(u32, header_log, CXL_HEADERLOG_SIZE_U32)
- ),
- TP_fast_assign(
- __assign_str(device);
- __assign_str(host);
- __entry->status = status;
- __entry->first_error = fe;
- /*
- * Embed the 512B headerlog data for user app retrieval and
- * parsing, but no need to print this in the trace buffer.
- */
- memcpy(__entry->header_log, hl, CXL_HEADERLOG_SIZE);
- ),
- TP_printk("device=%s host=%s status: '%s' first_error: '%s'",
- __get_str(device), __get_str(host),
- show_uc_errs(__entry->status),
- show_uc_errs(__entry->first_error)
- )
-);
-
TRACE_EVENT(cxl_aer_uncorrectable_error,
- TP_PROTO(const struct cxl_memdev *cxlmd, u32 status, u32 fe, u32 *hl),
- TP_ARGS(cxlmd, status, fe, hl),
+ TP_PROTO(struct device *dev, u64 serial, u32 status, u32 fe,
+ u32 *hl),
+ TP_ARGS(dev, serial, status, fe, hl),
TP_STRUCT__entry(
- __string(memdev, dev_name(&cxlmd->dev))
- __string(host, dev_name(cxlmd->dev.parent))
+ __string(name, dev_name(dev))
+ __string(parent, dev_name(dev->parent))
__field(u64, serial)
__field(u32, status)
__field(u32, first_error)
__array(u32, header_log, CXL_HEADERLOG_SIZE_U32)
),
TP_fast_assign(
- __assign_str(memdev);
- __assign_str(host);
- __entry->serial = cxlmd->cxlds->serial;
+ __assign_str(name);
+ __assign_str(parent);
+ __entry->serial = serial;
__entry->status = status;
__entry->first_error = fe;
/*
@@ -99,8 +72,8 @@ TRACE_EVENT(cxl_aer_uncorrectable_error,
*/
memcpy(__entry->header_log, hl, CXL_HEADERLOG_SIZE);
),
- TP_printk("memdev=%s host=%s serial=%lld: status: '%s' first_error: '%s'",
- __get_str(memdev), __get_str(host), __entry->serial,
+ TP_printk("device=%s parent=%s serial=%lld status='%s' first_error='%s'",
+ __get_str(name), __get_str(parent), __entry->serial,
show_uc_errs(__entry->status),
show_uc_errs(__entry->first_error)
)
@@ -124,42 +97,23 @@ TRACE_EVENT(cxl_aer_uncorrectable_error,
{ CXL_RAS_CE_PHYS_LAYER_ERR, "Received Error From Physical Layer" } \
)
-TRACE_EVENT(cxl_port_aer_correctable_error,
- TP_PROTO(struct device *dev, u32 status),
- TP_ARGS(dev, status),
- TP_STRUCT__entry(
- __string(device, dev_name(dev))
- __string(host, dev_name(dev->parent))
- __field(u32, status)
- ),
- TP_fast_assign(
- __assign_str(device);
- __assign_str(host);
- __entry->status = status;
- ),
- TP_printk("device=%s host=%s status='%s'",
- __get_str(device), __get_str(host),
- show_ce_errs(__entry->status)
- )
-);
-
TRACE_EVENT(cxl_aer_correctable_error,
- TP_PROTO(const struct cxl_memdev *cxlmd, u32 status),
- TP_ARGS(cxlmd, status),
+ TP_PROTO(struct device *dev, u64 serial, u32 status),
+ TP_ARGS(dev, serial, status),
TP_STRUCT__entry(
- __string(memdev, dev_name(&cxlmd->dev))
- __string(host, dev_name(cxlmd->dev.parent))
+ __string(name, dev_name(dev))
+ __string(parent, dev_name(dev->parent))
__field(u64, serial)
__field(u32, status)
),
TP_fast_assign(
- __assign_str(memdev);
- __assign_str(host);
- __entry->serial = cxlmd->cxlds->serial;
+ __assign_str(name);
+ __assign_str(parent);
+ __entry->serial = serial;
__entry->status = status;
),
- TP_printk("memdev=%s host=%s serial=%lld: status: '%s'",
- __get_str(memdev), __get_str(host), __entry->serial,
+ TP_printk("device=%s parent=%s serial=%lld status='%s'",
+ __get_str(name), __get_str(parent), __entry->serial,
show_ce_errs(__entry->status)
)
);
--
2.34.1
Powered by blists - more mailing lists