lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260204022249.1573252-1-kuba@kernel.org>
Date: Tue,  3 Feb 2026 18:22:49 -0800
From: Jakub Kicinski <kuba@...nel.org>
To: jiawenwu@...stnetic.com
Cc: Jakub Kicinski <kuba@...nel.org>,
	mengyuanlou@...-swift.com,
	horms@...nel.org,
	netdev@...r.kernel.org,
	edumazet@...gle.com,
	davem@...emloft.net,
	andrew+netdev@...n.ch,
	pabeni@...hat.com
Subject: Re: [net-next,v1,7/7] net: wangxun: add pcie error handler

This is an AI-generated review of your patch. The human sending this
email says "Sending this out without checking since you have to
repost. Please note that the AI code review may be wrong especially
around defensive programming (which we recommend against)."

For local reproduction steps see https://netdev-ai.bots.linux.dev/ai-local.html
---
net: wangxun: add pcie error handler

Support to check pcie error and invoke aer driver to recover pcie.

> diff --git a/drivers/net/ethernet/wangxun/libwx/wx_err.c b/drivers/net/ethernet/wangxun/libwx/wx_err.c
> index bb77bdce69d2..36eb06bd1798 100644
> --- a/drivers/net/ethernet/wangxun/libwx/wx_err.c
> +++ b/drivers/net/ethernet/wangxun/libwx/wx_err.c

[ ... ]

> @@ -3,10 +3,228 @@
>
>  #include <linux/netdevice.h>
>  #include <linux/pci.h>
> +#include <linux/aer.h>
>
>  #include "wx_type.h"
>  #include "wx_lib.h"
>  #include "wx_err.h"
> +#include "wx_hw.h"
> +
> +/**
> + * wx_io_error_detected - called when PCI error is detected
> + * @pdev: Pointer to PCI device
> + * @state: The current pci connection state
> + *
> + * This function is called after a PCI bus error affecting
> + * this device has been detected.
> + */
> +static pci_ers_result_t wx_io_error_detected(struct pci_dev *pdev,
> +					     pci_channel_state_t state)
> +{
> +	struct wx *wx = pci_get_drvdata(pdev);
> +	struct net_device *netdev;
> +
> +	netdev = wx->netdev;
> +	if (!netif_device_present(netdev))
> +		return PCI_ERS_RESULT_DISCONNECT;
> +
> +	rtnl_lock();
> +	netif_device_detach(netdev);
> +
> +	wx->io_err = true;
> +	if (netif_running(netdev))
> +		wx->close_suspend(wx);
> +
> +	if (state == pci_channel_io_perm_failure) {
> +		rtnl_unlock();
> +		return PCI_ERS_RESULT_DISCONNECT;
> +	}
> +
> +	if (!test_and_set_bit(WX_STATE_DISABLED, wx->state))
> +		pci_disable_device(pdev);
> +	rtnl_unlock();
> +
> +	/* Request a slot reset. */
> +	return PCI_ERS_RESULT_NEED_RESET;
> +}
> +
> +/**
> + * wx_io_slot_reset - called after the pci bus has been reset.
> + * @pdev: Pointer to PCI device
> + *
> + * Restart the card from scratch, as if from a cold-boot.
> + */
> +static pci_ers_result_t wx_io_slot_reset(struct pci_dev *pdev)
> +{
> +	struct wx *wx = pci_get_drvdata(pdev);
> +	pci_ers_result_t result;
> +
> +	if (pci_enable_device_mem(pdev)) {
> +		wx_err(wx, "Cannot re-enable PCI device after reset.\n");
> +		result = PCI_ERS_RESULT_DISCONNECT;
> +	} else {
> +		/* make all bar access done before reset. */
> +		smp_mb__before_atomic();
> +		clear_bit(WX_STATE_DISABLED, wx->state);
> +		pci_set_master(pdev);
> +		pci_restore_state(pdev);
> +		pci_save_state(pdev);
> +		pci_wake_from_d3(pdev, false);
> +
> +		wx->do_reset(wx->netdev, false);
> +		result = PCI_ERS_RESULT_RECOVERED;
> +	}
> +
> +	pci_aer_clear_nonfatal_status(pdev);
> +
> +	return result;
> +}
> +
> +/**
> + * wx_io_resume - called when traffic can start flowing again.
> + * @pdev: Pointer to PCI device
> + *
> + * This callback is called when the error recovery driver tells us that
> + * its OK to resume normal operation.
> + */
> +static void wx_io_resume(struct pci_dev *pdev)
> +{
> +	struct wx *wx = pci_get_drvdata(pdev);
> +	struct net_device *netdev;
> +
> +	netdev = wx->netdev;
> +	rtnl_lock();
> +	if (netif_running(netdev))
> +		netdev->netdev_ops->ndo_open(netdev);
> +
> +	wx->io_err = false;
> +	netif_device_attach(netdev);
> +	rtnl_unlock();
> +}
> +
> +const struct pci_error_handlers wx_err_handler = {
> +	.error_detected = wx_io_error_detected,
> +	.slot_reset = wx_io_slot_reset,
> +	.resume = wx_io_resume,
> +};
> +EXPORT_SYMBOL(wx_err_handler);
> +
> +static bool wx_check_pcie_error(struct wx *wx)
> +{
> +	u16 vid, pci_cmd, devctl2;
> +	u32 value;
> +
> +	pci_read_config_word(wx->pdev, PCI_VENDOR_ID, &vid);
> +	wx_warn(wx, "PCI vendor id is 0x%x\n", vid);
> +	pci_read_config_word(wx->pdev, PCI_COMMAND, &pci_cmd);
> +	wx_warn(wx, "PCI command reg is 0x%x\n", pci_cmd);
> +	pcie_capability_read_word(wx->pdev, PCI_EXP_DEVCTL2, &devctl2);
> +	wx_warn(wx, "Device Control2 Register: 0x%04x\n", devctl2);
> +
> +	value = rd32(wx, WX_MIS_PWR);
> +	wx_warn(wx, "MIS_PWR value is 0x%08x\n", value);
> +	value = rd32(wx, WX_PX_IMS(0));
> +	wx_warn(wx, "PX_IMS0 value is 0x%08x\n", value);
> +	if (test_bit(WX_FLAG_MULTI_64_FUNC, wx->flags)) {
> +		value = rd32(wx, WX_PX_IMS(1));
> +		wx_warn(wx, "PX_IMS1 value is 0x%08x\n", value);
> +	}
> +	value = rd32(wx, WX_TDB_TFCS);
> +	wx_warn(wx, "Tx flow control Status[TDB_TFCS 0xCE00]: 0x%x\n", value);
> +
> +	/* PCIe link loss or memory space can't access */
> +	if (vid == WX_FAILED_READ_CFG_WORD || !(pci_cmd & 0x2))
> +		return true;
> +
> +	return false;
> +}
> +
> +static void wx_check_error_subtask(struct wx *wx)
> +{
> +	u32 sm;
> +
> +	if (test_bit(WX_FLAG_ERROR_CHECK, wx->flags)) {
> +		/* get PF semaphore */
> +		wr32(wx, WX_MIS_PF_SM, 1);
> +		clear_bit(WX_FLAG_ERROR_CHECK, wx->flags);
> +	}
> +
> +	sm = rd32(wx, WX_MIS_PF_SM);
> +	/* PCIe memory space access error */
> +	if (sm == U32_MAX)
> +		goto out;
> +
> +	/* PCIe error may be occurred in another port */
> +	if ((sm == 1 && wx_check_first_lan_up(wx)))
> +		goto out;
> +
> +	return;
> +out:
> +	set_bit(WX_FLAG_NEED_PCIE_RECOVER, wx->flags);
> +	wx_warn(wx, "Set PCIe recover on LAN %d\n", wx->bus.func);
> +}
> +
> +static bool wx_check_recovery_capability(struct pci_dev *dev)
> +{
> +#if defined(__i386__) || defined(__x86_64__)
> +	return true;
> +#else
> +	/* check upstream bridge is root or PLX bridge,
> +	 * or if CPU is kunpeng 920
> +	 */
> +	if (dev->bus->self->vendor == PCI_VENDOR_ID_PLX ||
> +	    dev->bus->self->vendor == PCI_VENDOR_ID_HUAWEI)

Can this dereference dev->bus->self without checking if it's NULL first?

When the PCI device is directly attached to the root bus, dev->bus->self
is NULL. On non-x86 platforms, this code checks the upstream bridge
vendor without verifying that an upstream bridge exists.

The function comment states "check upstream bridge is root or PLX bridge"
but the code doesn't actually check for the root bus case. The x86 path
returns true unconditionally, but the non-x86 path assumes dev->bus->self
is non-NULL.

> +		return true;
> +	else
> +		return false;
> +#endif
> +}

[ ... ]

> diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
> index 3171a98d81a6..653a98d9835b 100644
> --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
> +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
> @@ -234,6 +234,12 @@ static irqreturn_t __ngbe_msix_misc(struct wx *wx, u32 eicr)
>  	if (eicr & NGBE_PX_MISC_IC_VF_MBOX)
>  		wx_msg_task(wx);
>
> +	if (eicr & NGBE_PX_MISC_PCIE_REQ_ERR) {
> +		wx_warn(wx, "PCIe Request Error founded on Lan %d\n",
> +			wx->bus.func);

This isn't a bug, but there's a typo: 'founded' should be 'found'.

> +		wx_pcie_error_handler(wx);
> +	}
> +
>  	if (unlikely(eicr & NGBE_PX_MISC_IC_TIMESYNC))
>  		wx_ptp_check_pps_event(wx);
>

[ ... ]

> diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c
> index aa14958d439a..c832100bcb61 100644
> --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c
> +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c
> @@ -6,6 +6,7 @@
>
>  #include "../libwx/wx_type.h"
>  #include "../libwx/wx_lib.h"
> +#include "../libwx/wx_err.h"
>  #include "../libwx/wx_ptp.h"
>  #include "../libwx/wx_hw.h"
>  #include "../libwx/wx_sriov.h"
> @@ -178,6 +179,12 @@ static irqreturn_t txgbe_misc_irq_thread_fn(int irq, void *data)
>  		handle_nested_irq(sub_irq);
>  		nhandled++;
>  	}
> +	if (eicr & TXGBE_PX_MISC_PCIE_REQ_ERR) {
> +		wx_warn(wx, "PCIe Request Error founded on Lan %d\n",
> +			wx->bus.func);

This isn't a bug, but there's a typo: 'founded' should be 'found'.

> +		wx_pcie_error_handler(wx);
> +		nhandled++;
> +	}
>  	if (unlikely(eicr & TXGBE_PX_MISC_IC_TIMESYNC)) {
>  		wx_ptp_check_pps_event(wx);
>  		nhandled++;

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ