lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <06f201dc959c$ec2b74e0$c4825ea0$@trustnetic.com>
Date: Wed, 4 Feb 2026 14:10:12 +0800
From: Jiawen Wu <jiawenwu@...stnetic.com>
To: "'Jakub Kicinski'" <kuba@...nel.org>
Cc: <mengyuanlou@...-swift.com>,
	<horms@...nel.org>,
	<netdev@...r.kernel.org>,
	<edumazet@...gle.com>,
	<davem@...emloft.net>,
	<andrew+netdev@...n.ch>,
	<pabeni@...hat.com>,
	<mengyuanlou@...-swift.com>,
	<horms@...nel.org>,
	<netdev@...r.kernel.org>,
	<edumazet@...gle.com>,
	<davem@...emloft.net>,
	<andrew+netdev@...n.ch>,
	<pabeni@...hat.com>
Subject: RE: [net-next,v1,7/7] net: wangxun: add pcie error handler

On Wed, Feb 4, 2026 10:23 AM, Jakub Kicinski wrote:
> This is an AI-generated review of your patch. The human sending this
> email says "Sending this out without checking since you have to
> repost. Please note that the AI code review may be wrong especially
> around defensive programming (which we recommend against)."
> 
> For local reproduction steps see https://netdev-ai.bots.linux.dev/ai-local.html
> ---
> net: wangxun: add pcie error handler
> 
> Support to check pcie error and invoke aer driver to recover pcie.
> 
> > diff --git a/drivers/net/ethernet/wangxun/libwx/wx_err.c b/drivers/net/ethernet/wangxun/libwx/wx_err.c
> > index bb77bdce69d2..36eb06bd1798 100644
> > --- a/drivers/net/ethernet/wangxun/libwx/wx_err.c
> > +++ b/drivers/net/ethernet/wangxun/libwx/wx_err.c
> 
> [ ... ]
> 
> > @@ -3,10 +3,228 @@
> >
> >  #include <linux/netdevice.h>
> >  #include <linux/pci.h>
> > +#include <linux/aer.h>
> >
> >  #include "wx_type.h"
> >  #include "wx_lib.h"
> >  #include "wx_err.h"
> > +#include "wx_hw.h"
> > +
> > +/**
> > + * wx_io_error_detected - called when PCI error is detected
> > + * @pdev: Pointer to PCI device
> > + * @state: The current pci connection state
> > + *
> > + * This function is called after a PCI bus error affecting
> > + * this device has been detected.
> > + */
> > +static pci_ers_result_t wx_io_error_detected(struct pci_dev *pdev,
> > +					     pci_channel_state_t state)
> > +{
> > +	struct wx *wx = pci_get_drvdata(pdev);
> > +	struct net_device *netdev;
> > +
> > +	netdev = wx->netdev;
> > +	if (!netif_device_present(netdev))
> > +		return PCI_ERS_RESULT_DISCONNECT;
> > +
> > +	rtnl_lock();
> > +	netif_device_detach(netdev);
> > +
> > +	wx->io_err = true;
> > +	if (netif_running(netdev))
> > +		wx->close_suspend(wx);
> > +
> > +	if (state == pci_channel_io_perm_failure) {
> > +		rtnl_unlock();
> > +		return PCI_ERS_RESULT_DISCONNECT;
> > +	}
> > +
> > +	if (!test_and_set_bit(WX_STATE_DISABLED, wx->state))
> > +		pci_disable_device(pdev);
> > +	rtnl_unlock();
> > +
> > +	/* Request a slot reset. */
> > +	return PCI_ERS_RESULT_NEED_RESET;
> > +}
> > +
> > +/**
> > + * wx_io_slot_reset - called after the pci bus has been reset.
> > + * @pdev: Pointer to PCI device
> > + *
> > + * Restart the card from scratch, as if from a cold-boot.
> > + */
> > +static pci_ers_result_t wx_io_slot_reset(struct pci_dev *pdev)
> > +{
> > +	struct wx *wx = pci_get_drvdata(pdev);
> > +	pci_ers_result_t result;
> > +
> > +	if (pci_enable_device_mem(pdev)) {
> > +		wx_err(wx, "Cannot re-enable PCI device after reset.\n");
> > +		result = PCI_ERS_RESULT_DISCONNECT;
> > +	} else {
> > +		/* make all bar access done before reset. */
> > +		smp_mb__before_atomic();
> > +		clear_bit(WX_STATE_DISABLED, wx->state);
> > +		pci_set_master(pdev);
> > +		pci_restore_state(pdev);
> > +		pci_save_state(pdev);
> > +		pci_wake_from_d3(pdev, false);
> > +
> > +		wx->do_reset(wx->netdev, false);
> > +		result = PCI_ERS_RESULT_RECOVERED;
> > +	}
> > +
> > +	pci_aer_clear_nonfatal_status(pdev);
> > +
> > +	return result;
> > +}
> > +
> > +/**
> > + * wx_io_resume - called when traffic can start flowing again.
> > + * @pdev: Pointer to PCI device
> > + *
> > + * This callback is called when the error recovery driver tells us that
> > + * its OK to resume normal operation.
> > + */
> > +static void wx_io_resume(struct pci_dev *pdev)
> > +{
> > +	struct wx *wx = pci_get_drvdata(pdev);
> > +	struct net_device *netdev;
> > +
> > +	netdev = wx->netdev;
> > +	rtnl_lock();
> > +	if (netif_running(netdev))
> > +		netdev->netdev_ops->ndo_open(netdev);
> > +
> > +	wx->io_err = false;
> > +	netif_device_attach(netdev);
> > +	rtnl_unlock();
> > +}
> > +
> > +const struct pci_error_handlers wx_err_handler = {
> > +	.error_detected = wx_io_error_detected,
> > +	.slot_reset = wx_io_slot_reset,
> > +	.resume = wx_io_resume,
> > +};
> > +EXPORT_SYMBOL(wx_err_handler);
> > +
> > +static bool wx_check_pcie_error(struct wx *wx)
> > +{
> > +	u16 vid, pci_cmd, devctl2;
> > +	u32 value;
> > +
> > +	pci_read_config_word(wx->pdev, PCI_VENDOR_ID, &vid);
> > +	wx_warn(wx, "PCI vendor id is 0x%x\n", vid);
> > +	pci_read_config_word(wx->pdev, PCI_COMMAND, &pci_cmd);
> > +	wx_warn(wx, "PCI command reg is 0x%x\n", pci_cmd);
> > +	pcie_capability_read_word(wx->pdev, PCI_EXP_DEVCTL2, &devctl2);
> > +	wx_warn(wx, "Device Control2 Register: 0x%04x\n", devctl2);
> > +
> > +	value = rd32(wx, WX_MIS_PWR);
> > +	wx_warn(wx, "MIS_PWR value is 0x%08x\n", value);
> > +	value = rd32(wx, WX_PX_IMS(0));
> > +	wx_warn(wx, "PX_IMS0 value is 0x%08x\n", value);
> > +	if (test_bit(WX_FLAG_MULTI_64_FUNC, wx->flags)) {
> > +		value = rd32(wx, WX_PX_IMS(1));
> > +		wx_warn(wx, "PX_IMS1 value is 0x%08x\n", value);
> > +	}
> > +	value = rd32(wx, WX_TDB_TFCS);
> > +	wx_warn(wx, "Tx flow control Status[TDB_TFCS 0xCE00]: 0x%x\n", value);
> > +
> > +	/* PCIe link loss or memory space can't access */
> > +	if (vid == WX_FAILED_READ_CFG_WORD || !(pci_cmd & 0x2))
> > +		return true;
> > +
> > +	return false;
> > +}
> > +
> > +static void wx_check_error_subtask(struct wx *wx)
> > +{
> > +	u32 sm;
> > +
> > +	if (test_bit(WX_FLAG_ERROR_CHECK, wx->flags)) {
> > +		/* get PF semaphore */
> > +		wr32(wx, WX_MIS_PF_SM, 1);
> > +		clear_bit(WX_FLAG_ERROR_CHECK, wx->flags);
> > +	}
> > +
> > +	sm = rd32(wx, WX_MIS_PF_SM);
> > +	/* PCIe memory space access error */
> > +	if (sm == U32_MAX)
> > +		goto out;
> > +
> > +	/* PCIe error may be occurred in another port */
> > +	if ((sm == 1 && wx_check_first_lan_up(wx)))
> > +		goto out;
> > +
> > +	return;
> > +out:
> > +	set_bit(WX_FLAG_NEED_PCIE_RECOVER, wx->flags);
> > +	wx_warn(wx, "Set PCIe recover on LAN %d\n", wx->bus.func);
> > +}
> > +
> > +static bool wx_check_recovery_capability(struct pci_dev *dev)
> > +{
> > +#if defined(__i386__) || defined(__x86_64__)
> > +	return true;
> > +#else
> > +	/* check upstream bridge is root or PLX bridge,
> > +	 * or if CPU is kunpeng 920
> > +	 */
> > +	if (dev->bus->self->vendor == PCI_VENDOR_ID_PLX ||
> > +	    dev->bus->self->vendor == PCI_VENDOR_ID_HUAWEI)
> 
> Can this dereference dev->bus->self without checking if it's NULL first?
> 
> When the PCI device is directly attached to the root bus, dev->bus->self
> is NULL. On non-x86 platforms, this code checks the upstream bridge
> vendor without verifying that an upstream bridge exists.
> 
> The function comment states "check upstream bridge is root or PLX bridge"
> but the code doesn't actually check for the root bus case. The x86 path
> returns true unconditionally, but the non-x86 path assumes dev->bus->self
> is non-NULL.

I think the comment here should be revised. On the non-x86 path, it should
returns true only when the upstream bridge is PLX or CPU is Kunpeng 920.
When dev->bus->self is null, it is necessary to return false. Although in reality,
it is not null either when device is directly attached to the root bus.

> 
> > +		return true;
> > +	else
> > +		return false;
> > +#endif
> > +}
> 
> [ ... ]
> 
> > diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
> > index 3171a98d81a6..653a98d9835b 100644
> > --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
> > +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
> > @@ -234,6 +234,12 @@ static irqreturn_t __ngbe_msix_misc(struct wx *wx, u32 eicr)
> >  	if (eicr & NGBE_PX_MISC_IC_VF_MBOX)
> >  		wx_msg_task(wx);
> >
> > +	if (eicr & NGBE_PX_MISC_PCIE_REQ_ERR) {
> > +		wx_warn(wx, "PCIe Request Error founded on Lan %d\n",
> > +			wx->bus.func);
> 
> This isn't a bug, but there's a typo: 'founded' should be 'found'.
> 
> > +		wx_pcie_error_handler(wx);
> > +	}
> > +
> >  	if (unlikely(eicr & NGBE_PX_MISC_IC_TIMESYNC))
> >  		wx_ptp_check_pps_event(wx);
> >
> 
> [ ... ]
> 
> > diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c
> > index aa14958d439a..c832100bcb61 100644
> > --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c
> > +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c
> > @@ -6,6 +6,7 @@
> >
> >  #include "../libwx/wx_type.h"
> >  #include "../libwx/wx_lib.h"
> > +#include "../libwx/wx_err.h"
> >  #include "../libwx/wx_ptp.h"
> >  #include "../libwx/wx_hw.h"
> >  #include "../libwx/wx_sriov.h"
> > @@ -178,6 +179,12 @@ static irqreturn_t txgbe_misc_irq_thread_fn(int irq, void *data)
> >  		handle_nested_irq(sub_irq);
> >  		nhandled++;
> >  	}
> > +	if (eicr & TXGBE_PX_MISC_PCIE_REQ_ERR) {
> > +		wx_warn(wx, "PCIe Request Error founded on Lan %d\n",
> > +			wx->bus.func);
> 
> This isn't a bug, but there's a typo: 'founded' should be 'found'.
> 
> > +		wx_pcie_error_handler(wx);
> > +		nhandled++;
> > +	}
> >  	if (unlikely(eicr & TXGBE_PX_MISC_IC_TIMESYNC)) {
> >  		wx_ptp_check_pps_event(wx);
> >  		nhandled++;
> 


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ