[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <06f201dc959c$ec2b74e0$c4825ea0$@trustnetic.com>
Date: Wed, 4 Feb 2026 14:10:12 +0800
From: Jiawen Wu <jiawenwu@...stnetic.com>
To: "'Jakub Kicinski'" <kuba@...nel.org>
Cc: <mengyuanlou@...-swift.com>,
<horms@...nel.org>,
<netdev@...r.kernel.org>,
<edumazet@...gle.com>,
<davem@...emloft.net>,
<andrew+netdev@...n.ch>,
<pabeni@...hat.com>,
<mengyuanlou@...-swift.com>,
<horms@...nel.org>,
<netdev@...r.kernel.org>,
<edumazet@...gle.com>,
<davem@...emloft.net>,
<andrew+netdev@...n.ch>,
<pabeni@...hat.com>
Subject: RE: [net-next,v1,7/7] net: wangxun: add pcie error handler
On Wed, Feb 4, 2026 10:23 AM, Jakub Kicinski wrote:
> This is an AI-generated review of your patch. The human sending this
> email says "Sending this out without checking since you have to
> repost. Please note that the AI code review may be wrong especially
> around defensive programming (which we recommend against)."
>
> For local reproduction steps see https://netdev-ai.bots.linux.dev/ai-local.html
> ---
> net: wangxun: add pcie error handler
>
> Support to check pcie error and invoke aer driver to recover pcie.
>
> > diff --git a/drivers/net/ethernet/wangxun/libwx/wx_err.c b/drivers/net/ethernet/wangxun/libwx/wx_err.c
> > index bb77bdce69d2..36eb06bd1798 100644
> > --- a/drivers/net/ethernet/wangxun/libwx/wx_err.c
> > +++ b/drivers/net/ethernet/wangxun/libwx/wx_err.c
>
> [ ... ]
>
> > @@ -3,10 +3,228 @@
> >
> > #include <linux/netdevice.h>
> > #include <linux/pci.h>
> > +#include <linux/aer.h>
> >
> > #include "wx_type.h"
> > #include "wx_lib.h"
> > #include "wx_err.h"
> > +#include "wx_hw.h"
> > +
> > +/**
> > + * wx_io_error_detected - called when PCI error is detected
> > + * @pdev: Pointer to PCI device
> > + * @state: The current pci connection state
> > + *
> > + * This function is called after a PCI bus error affecting
> > + * this device has been detected.
> > + */
> > +static pci_ers_result_t wx_io_error_detected(struct pci_dev *pdev,
> > + pci_channel_state_t state)
> > +{
> > + struct wx *wx = pci_get_drvdata(pdev);
> > + struct net_device *netdev;
> > +
> > + netdev = wx->netdev;
> > + if (!netif_device_present(netdev))
> > + return PCI_ERS_RESULT_DISCONNECT;
> > +
> > + rtnl_lock();
> > + netif_device_detach(netdev);
> > +
> > + wx->io_err = true;
> > + if (netif_running(netdev))
> > + wx->close_suspend(wx);
> > +
> > + if (state == pci_channel_io_perm_failure) {
> > + rtnl_unlock();
> > + return PCI_ERS_RESULT_DISCONNECT;
> > + }
> > +
> > + if (!test_and_set_bit(WX_STATE_DISABLED, wx->state))
> > + pci_disable_device(pdev);
> > + rtnl_unlock();
> > +
> > + /* Request a slot reset. */
> > + return PCI_ERS_RESULT_NEED_RESET;
> > +}
> > +
> > +/**
> > + * wx_io_slot_reset - called after the pci bus has been reset.
> > + * @pdev: Pointer to PCI device
> > + *
> > + * Restart the card from scratch, as if from a cold-boot.
> > + */
> > +static pci_ers_result_t wx_io_slot_reset(struct pci_dev *pdev)
> > +{
> > + struct wx *wx = pci_get_drvdata(pdev);
> > + pci_ers_result_t result;
> > +
> > + if (pci_enable_device_mem(pdev)) {
> > + wx_err(wx, "Cannot re-enable PCI device after reset.\n");
> > + result = PCI_ERS_RESULT_DISCONNECT;
> > + } else {
> > + /* make all bar access done before reset. */
> > + smp_mb__before_atomic();
> > + clear_bit(WX_STATE_DISABLED, wx->state);
> > + pci_set_master(pdev);
> > + pci_restore_state(pdev);
> > + pci_save_state(pdev);
> > + pci_wake_from_d3(pdev, false);
> > +
> > + wx->do_reset(wx->netdev, false);
> > + result = PCI_ERS_RESULT_RECOVERED;
> > + }
> > +
> > + pci_aer_clear_nonfatal_status(pdev);
> > +
> > + return result;
> > +}
> > +
> > +/**
> > + * wx_io_resume - called when traffic can start flowing again.
> > + * @pdev: Pointer to PCI device
> > + *
> > + * This callback is called when the error recovery driver tells us that
> > + * its OK to resume normal operation.
> > + */
> > +static void wx_io_resume(struct pci_dev *pdev)
> > +{
> > + struct wx *wx = pci_get_drvdata(pdev);
> > + struct net_device *netdev;
> > +
> > + netdev = wx->netdev;
> > + rtnl_lock();
> > + if (netif_running(netdev))
> > + netdev->netdev_ops->ndo_open(netdev);
> > +
> > + wx->io_err = false;
> > + netif_device_attach(netdev);
> > + rtnl_unlock();
> > +}
> > +
> > +const struct pci_error_handlers wx_err_handler = {
> > + .error_detected = wx_io_error_detected,
> > + .slot_reset = wx_io_slot_reset,
> > + .resume = wx_io_resume,
> > +};
> > +EXPORT_SYMBOL(wx_err_handler);
> > +
> > +static bool wx_check_pcie_error(struct wx *wx)
> > +{
> > + u16 vid, pci_cmd, devctl2;
> > + u32 value;
> > +
> > + pci_read_config_word(wx->pdev, PCI_VENDOR_ID, &vid);
> > + wx_warn(wx, "PCI vendor id is 0x%x\n", vid);
> > + pci_read_config_word(wx->pdev, PCI_COMMAND, &pci_cmd);
> > + wx_warn(wx, "PCI command reg is 0x%x\n", pci_cmd);
> > + pcie_capability_read_word(wx->pdev, PCI_EXP_DEVCTL2, &devctl2);
> > + wx_warn(wx, "Device Control2 Register: 0x%04x\n", devctl2);
> > +
> > + value = rd32(wx, WX_MIS_PWR);
> > + wx_warn(wx, "MIS_PWR value is 0x%08x\n", value);
> > + value = rd32(wx, WX_PX_IMS(0));
> > + wx_warn(wx, "PX_IMS0 value is 0x%08x\n", value);
> > + if (test_bit(WX_FLAG_MULTI_64_FUNC, wx->flags)) {
> > + value = rd32(wx, WX_PX_IMS(1));
> > + wx_warn(wx, "PX_IMS1 value is 0x%08x\n", value);
> > + }
> > + value = rd32(wx, WX_TDB_TFCS);
> > + wx_warn(wx, "Tx flow control Status[TDB_TFCS 0xCE00]: 0x%x\n", value);
> > +
> > + /* PCIe link loss or memory space can't access */
> > + if (vid == WX_FAILED_READ_CFG_WORD || !(pci_cmd & 0x2))
> > + return true;
> > +
> > + return false;
> > +}
> > +
> > +static void wx_check_error_subtask(struct wx *wx)
> > +{
> > + u32 sm;
> > +
> > + if (test_bit(WX_FLAG_ERROR_CHECK, wx->flags)) {
> > + /* get PF semaphore */
> > + wr32(wx, WX_MIS_PF_SM, 1);
> > + clear_bit(WX_FLAG_ERROR_CHECK, wx->flags);
> > + }
> > +
> > + sm = rd32(wx, WX_MIS_PF_SM);
> > + /* PCIe memory space access error */
> > + if (sm == U32_MAX)
> > + goto out;
> > +
> > + /* PCIe error may be occurred in another port */
> > + if ((sm == 1 && wx_check_first_lan_up(wx)))
> > + goto out;
> > +
> > + return;
> > +out:
> > + set_bit(WX_FLAG_NEED_PCIE_RECOVER, wx->flags);
> > + wx_warn(wx, "Set PCIe recover on LAN %d\n", wx->bus.func);
> > +}
> > +
> > +static bool wx_check_recovery_capability(struct pci_dev *dev)
> > +{
> > +#if defined(__i386__) || defined(__x86_64__)
> > + return true;
> > +#else
> > + /* check upstream bridge is root or PLX bridge,
> > + * or if CPU is kunpeng 920
> > + */
> > + if (dev->bus->self->vendor == PCI_VENDOR_ID_PLX ||
> > + dev->bus->self->vendor == PCI_VENDOR_ID_HUAWEI)
>
> Can this dereference dev->bus->self without checking if it's NULL first?
>
> When the PCI device is directly attached to the root bus, dev->bus->self
> is NULL. On non-x86 platforms, this code checks the upstream bridge
> vendor without verifying that an upstream bridge exists.
>
> The function comment states "check upstream bridge is root or PLX bridge"
> but the code doesn't actually check for the root bus case. The x86 path
> returns true unconditionally, but the non-x86 path assumes dev->bus->self
> is non-NULL.
I think the comment here should be revised. On the non-x86 path, it should
returns true only when the upstream bridge is PLX or CPU is Kunpeng 920.
When dev->bus->self is null, it is necessary to return false. Although in reality,
it is not null either when device is directly attached to the root bus.
>
> > + return true;
> > + else
> > + return false;
> > +#endif
> > +}
>
> [ ... ]
>
> > diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
> > index 3171a98d81a6..653a98d9835b 100644
> > --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
> > +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
> > @@ -234,6 +234,12 @@ static irqreturn_t __ngbe_msix_misc(struct wx *wx, u32 eicr)
> > if (eicr & NGBE_PX_MISC_IC_VF_MBOX)
> > wx_msg_task(wx);
> >
> > + if (eicr & NGBE_PX_MISC_PCIE_REQ_ERR) {
> > + wx_warn(wx, "PCIe Request Error founded on Lan %d\n",
> > + wx->bus.func);
>
> This isn't a bug, but there's a typo: 'founded' should be 'found'.
>
> > + wx_pcie_error_handler(wx);
> > + }
> > +
> > if (unlikely(eicr & NGBE_PX_MISC_IC_TIMESYNC))
> > wx_ptp_check_pps_event(wx);
> >
>
> [ ... ]
>
> > diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c
> > index aa14958d439a..c832100bcb61 100644
> > --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c
> > +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c
> > @@ -6,6 +6,7 @@
> >
> > #include "../libwx/wx_type.h"
> > #include "../libwx/wx_lib.h"
> > +#include "../libwx/wx_err.h"
> > #include "../libwx/wx_ptp.h"
> > #include "../libwx/wx_hw.h"
> > #include "../libwx/wx_sriov.h"
> > @@ -178,6 +179,12 @@ static irqreturn_t txgbe_misc_irq_thread_fn(int irq, void *data)
> > handle_nested_irq(sub_irq);
> > nhandled++;
> > }
> > + if (eicr & TXGBE_PX_MISC_PCIE_REQ_ERR) {
> > + wx_warn(wx, "PCIe Request Error founded on Lan %d\n",
> > + wx->bus.func);
>
> This isn't a bug, but there's a typo: 'founded' should be 'found'.
>
> > + wx_pcie_error_handler(wx);
> > + nhandled++;
> > + }
> > if (unlikely(eicr & TXGBE_PX_MISC_IC_TIMESYNC)) {
> > wx_ptp_check_pps_event(wx);
> > nhandled++;
>
Powered by blists - more mailing lists