[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <on7eajzjb7bb5ut4jiwxgi4gckhspar4eztmvzfslemoohr6kr@ccffktlzlpcn>
Date: Tue, 19 Aug 2025 21:53:11 +0530
From: Manivannan Sadhasivam <mani@...nel.org>
To: Jim Quinlan <james.quinlan@...adcom.com>
Cc: linux-pci@...r.kernel.org, Nicolas Saenz Julienne <nsaenz@...nel.org>,
Bjorn Helgaas <bhelgaas@...gle.com>, Lorenzo Pieralisi <lorenzo.pieralisi@....com>,
Cyril Brulebois <kibi@...ian.org>, bcm-kernel-feedback-list@...adcom.com, jim2101024@...il.com,
Florian Fainelli <florian.fainelli@...adcom.com>, Lorenzo Pieralisi <lpieralisi@...nel.org>,
Krzysztof Wilczyński <kwilczynski@...nel.org>, Rob Herring <robh@...nel.org>,
"moderated list:BROADCOM BCM2711/BCM2835 ARM ARCHITECTURE" <linux-rpi-kernel@...ts.infradead.org>,
"moderated list:BROADCOM BCM2711/BCM2835 ARM ARCHITECTURE" <linux-arm-kernel@...ts.infradead.org>, open list <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH v2 2/2] PCI: brcmstb: Add panic/die handler to driver
On Thu, Aug 07, 2025 at 06:15:13PM GMT, Jim Quinlan wrote:
> Whereas most PCIe HW returns 0xffffffff on illegal accesses and the like,
> by default Broadcom's STB PCIe controller effects an abort. Some SoCs --
> 7216 and its descendants -- have new HW that identifies error details.
>
> This simple handler determines if the PCIe controller was the cause of the
> abort and if so, prints out diagnostic info. Unfortunately, an abort still
> occurs.
>
> Care is taken to read the error registers only when the PCIe bridge is
> active and the PCIe registers are acceptable. Otherwise, a "die" event
> caused by something other than the PCIe could cause an abort if the PCIe
> "die" handler tried to access registers when the bridge is off.
>
> Example error output:
> brcm-pcie 8b20000.pcie: Error: Mem Acc: 32bit, Read, @0x38000000
> brcm-pcie 8b20000.pcie: Type: TO=0 Abt=0 UnspReq=1 AccDsble=0 BadAddr=0
>
> Signed-off-by: Jim Quinlan <james.quinlan@...adcom.com>
> ---
> drivers/pci/controller/pcie-brcmstb.c | 155 +++++++++++++++++++++++++-
> 1 file changed, 154 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c
> index ceb431a252b7..43c4ada3de07 100644
> --- a/drivers/pci/controller/pcie-brcmstb.c
> +++ b/drivers/pci/controller/pcie-brcmstb.c
> @@ -14,15 +14,18 @@
> #include <linux/irqchip/chained_irq.h>
> #include <linux/irqchip/irq-msi-lib.h>
> #include <linux/irqdomain.h>
> +#include <linux/kdebug.h>
> #include <linux/kernel.h>
> #include <linux/list.h>
> #include <linux/log2.h>
> #include <linux/module.h>
> #include <linux/msi.h>
> +#include <linux/notifier.h>
> #include <linux/of_address.h>
> #include <linux/of_irq.h>
> #include <linux/of_pci.h>
> #include <linux/of_platform.h>
> +#include <linux/panic_notifier.h>
> #include <linux/pci.h>
> #include <linux/pci-ecam.h>
> #include <linux/printk.h>
> @@ -156,6 +159,39 @@
> #define MSI_INT_MASK_SET 0x10
> #define MSI_INT_MASK_CLR 0x14
>
> +/* Error report registers */
> +#define PCIE_OUTB_ERR_TREAT 0x6000
> +#define PCIE_OUTB_ERR_TREAT_CONFIG_MASK 0x1
> +#define PCIE_OUTB_ERR_TREAT_MEM_MASK 0x2
> +#define PCIE_OUTB_ERR_VALID 0x6004
> +#define PCIE_OUTB_ERR_CLEAR 0x6008
> +#define PCIE_OUTB_ERR_ACC_INFO 0x600c
> +#define PCIE_OUTB_ERR_ACC_INFO_CFG_ERR_MASK 0x01
> +#define PCIE_OUTB_ERR_ACC_INFO_MEM_ERR_MASK 0x02
> +#define PCIE_OUTB_ERR_ACC_INFO_TYPE_64_MASK 0x04
> +#define PCIE_OUTB_ERR_ACC_INFO_DIR_WRITE_MASK 0x10
> +#define PCIE_OUTB_ERR_ACC_INFO_BYTE_LANES_MASK 0xff00
> +#define PCIE_OUTB_ERR_ACC_ADDR 0x6010
> +#define PCIE_OUTB_ERR_ACC_ADDR_BUS_MASK 0xff00000
> +#define PCIE_OUTB_ERR_ACC_ADDR_DEV_MASK 0xf8000
> +#define PCIE_OUTB_ERR_ACC_ADDR_FUNC_MASK 0x7000
> +#define PCIE_OUTB_ERR_ACC_ADDR_REG_MASK 0xfff
> +#define PCIE_OUTB_ERR_CFG_CAUSE 0x6014
> +#define PCIE_OUTB_ERR_CFG_CAUSE_TIMEOUT_MASK 0x40
> +#define PCIE_OUTB_ERR_CFG_CAUSE_ABORT_MASK 0x20
> +#define PCIE_OUTB_ERR_CFG_CAUSE_UNSUPP_REQ_MASK 0x10
> +#define PCIE_OUTB_ERR_CFG_CAUSE_ACC_TIMEOUT_MASK 0x4
> +#define PCIE_OUTB_ERR_CFG_CAUSE_ACC_DISABLED_MASK 0x2
> +#define PCIE_OUTB_ERR_CFG_CAUSE_ACC_64BIT__MASK 0x1
> +#define PCIE_OUTB_ERR_MEM_ADDR_LO 0x6018
> +#define PCIE_OUTB_ERR_MEM_ADDR_HI 0x601c
> +#define PCIE_OUTB_ERR_MEM_CAUSE 0x6020
> +#define PCIE_OUTB_ERR_MEM_CAUSE_TIMEOUT_MASK 0x40
> +#define PCIE_OUTB_ERR_MEM_CAUSE_ABORT_MASK 0x20
> +#define PCIE_OUTB_ERR_MEM_CAUSE_UNSUPP_REQ_MASK 0x10
> +#define PCIE_OUTB_ERR_MEM_CAUSE_ACC_DISABLED_MASK 0x2
> +#define PCIE_OUTB_ERR_MEM_CAUSE_BAD_ADDR_MASK 0x1
> +
> #define PCIE_RGR1_SW_INIT_1_PERST_MASK 0x1
> #define PCIE_RGR1_SW_INIT_1_PERST_SHIFT 0x0
>
> @@ -305,6 +341,8 @@ struct brcm_pcie {
> struct subdev_regulators *sr;
> bool ep_wakeup_capable;
> const struct pcie_cfg_data *cfg;
> + struct notifier_block die_notifier;
> + struct notifier_block panic_notifier;
> bool bridge_on;
> spinlock_t bridge_lock;
> };
> @@ -1730,6 +1768,115 @@ static int brcm_pcie_resume_noirq(struct device *dev)
> return ret;
> }
>
> +/* Dump out PCIe errors on die or panic */
> +static int _brcm_pcie_dump_err(struct brcm_pcie *pcie,
> + const char *type)
> +{
> + void __iomem *base = pcie->base;
> + int i, is_cfg_err, is_mem_err, lanes;
> + char *width_str, *direction_str, lanes_str[9];
> + u32 info, cfg_addr, cfg_cause, mem_cause, lo, hi;
> + unsigned long flags;
> +
> + spin_lock_irqsave(&pcie->bridge_lock, flags);
> + /* Don't access registers when the bridge is off */
> + if (!pcie->bridge_on || readl(base + PCIE_OUTB_ERR_VALID) == 0) {
> + spin_unlock_irqrestore(&pcie->bridge_lock, flags);
> + return NOTIFY_DONE;
> + }
> +
> + /* Read all necessary registers so we can release the spinlock ASAP */
> + info = readl(base + PCIE_OUTB_ERR_ACC_INFO);
> + is_cfg_err = !!(info & PCIE_OUTB_ERR_ACC_INFO_CFG_ERR_MASK);
> + is_mem_err = !!(info & PCIE_OUTB_ERR_ACC_INFO_MEM_ERR_MASK);
> + if (is_cfg_err) {
> + cfg_addr = readl(base + PCIE_OUTB_ERR_ACC_ADDR);
> + cfg_cause = readl(base + PCIE_OUTB_ERR_CFG_CAUSE);
> + }
> + if (is_mem_err) {
> + mem_cause = readl(base + PCIE_OUTB_ERR_MEM_CAUSE);
> + lo = readl(base + PCIE_OUTB_ERR_MEM_ADDR_LO);
> + hi = readl(base + PCIE_OUTB_ERR_MEM_ADDR_HI);
> + }
> + /* We've got all of the info, clear the error */
> + writel(1, base + PCIE_OUTB_ERR_CLEAR);
> + spin_unlock_irqrestore(&pcie->bridge_lock, flags);
> +
> + dev_err(pcie->dev, "handling %s error notification\n", type);
You are not *handling* the error, but just dumping the registers due to the
error. So this error message is misleading.
- Mani
--
மணிவண்ணன் சதாசிவம்
Powered by blists - more mailing lists