lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <9fdcc731-2b1e-4195-bd9e-5f91b660341d@gmail.com>
Date: Sat, 30 Aug 2025 10:29:40 +0200
From: Heiner Kallweit <hkallweit1@...il.com>
To: Mohammad Amin Hosseini <moahmmad.hosseinii@...il.com>,
 nic_swsd@...ltek.com
Cc: andrew+netdev@...n.ch, davem@...emloft.net, edumazet@...gle.com,
 kuba@...nel.org, pabeni@...hat.com, netdev@...r.kernel.org,
 linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2] r8169: hardening and stability improvements

On 8/30/2025 9:30 AM, Mohammad Amin Hosseini wrote:
> From: mohammad amin hosseini <moahmmad.hosseinii@...il.com>
> 
> This patch improves robustness and reliability of the r8169 driver. The
> changes cover buffer management, interrupt handling, parameter validation,
> and resource cleanup.
> 
Are there any known issues which require these changes?
Then changes which actually fix something should come with a Fixes tag.

Several changes IMO don't make sense because they try to handle error
conditions which can't occur.

Please split this wild mix of changes and explain per patch:
- What is the problematic scenario and can it actually occur?

> While the updates touch multiple areas, they are interdependent parts of a
> cohesive hardening effort. Splitting them would leave intermediate states
> with incomplete validation.
> 
> Key changes:
> - Buffer handling: add packet length checks, NUMA-aware fallback allocation,
>   descriptor zero-initialization, and memory barriers.
> - Interrupt handling: fix return codes, selective NAPI scheduling, and
>   improved SYSErr handling for RTL_GIGA_MAC_VER_52.
> - Parameter validation: stricter RX/TX bounds checking and consistent
>   error codes.
> - Resource management: safer workqueue shutdown, proper clock lifecycle,
>   WARN_ON for unexpected device states.
> - Logging: use severity-appropriate levels, add rate limiting, and extend
>   statistics tracking.
> 
> Testing:
> - Kernel builds and module loads without warnings.
> - Runtime tested in QEMU (rtl8139 emulation).
> - Hardware validation requested from community due to lack of local device.
> 
> v2:
>  - Resent using msmtp (fix whitespace damage)
> 
> Signed-off-by: Mohammad Amin Hosseini <moahmmad.hosseinii@...il.com>
> ---
>  drivers/net/ethernet/realtek/r8169_main.c | 150 ++++++++++++++++++----
>  1 file changed, 123 insertions(+), 27 deletions(-)
> 
> diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
> index 9c601f271c02..66d7dcd8bf7b 100644
> --- a/drivers/net/ethernet/realtek/r8169_main.c
> +++ b/drivers/net/ethernet/realtek/r8169_main.c
> @@ -3981,19 +3981,39 @@ static struct page *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
>  	int node = dev_to_node(d);
>  	dma_addr_t mapping;
>  	struct page *data;
> +	gfp_t gfp_flags = GFP_KERNEL;
>  
> -	data = alloc_pages_node(node, GFP_KERNEL, get_order(R8169_RX_BUF_SIZE));
> -	if (!data)
> -		return NULL;
> +	/* Use atomic allocation in interrupt/atomic context */

Is there any scenario where rtl8169_alloc_rx_data() would be called in atomic context?

> +	if (in_atomic() || irqs_disabled())
> +		gfp_flags = GFP_ATOMIC;
> +
> +	data = alloc_pages_node(node, gfp_flags, get_order(R8169_RX_BUF_SIZE));
> +	if (unlikely(!data)) {
> +		/* Try fallback allocation on any node if local node fails */
> +		data = alloc_pages(gfp_flags | __GFP_NOWARN, get_order(R8169_RX_BUF_SIZE));
> +		if (unlikely(!data)) {
> +			if (net_ratelimit())
> +				netdev_err(tp->dev, "Failed to allocate RX buffer\n");
> +			return NULL;
> +		}
> +		
> +		if (net_ratelimit())
> +			netdev_warn(tp->dev, "Fallback allocation used for RX buffer\n");
> +	}
>  
>  	mapping = dma_map_page(d, data, 0, R8169_RX_BUF_SIZE, DMA_FROM_DEVICE);
>  	if (unlikely(dma_mapping_error(d, mapping))) {
> -		netdev_err(tp->dev, "Failed to map RX DMA!\n");
> +		if (net_ratelimit())
> +			netdev_err(tp->dev, "Failed to map RX DMA page\n");
>  		__free_pages(data, get_order(R8169_RX_BUF_SIZE));
>  		return NULL;
>  	}
>  
>  	desc->addr = cpu_to_le64(mapping);
> +	desc->opts2 = 0;
> +	
> +	/* Ensure writes complete before marking descriptor ready */
> +	wmb();

Changes messing with memory barriers need a thorough explanation.
What is the problematic scenario? Any known issues?

>  	rtl8169_mark_to_asic(desc);
>  
>  	return data;
> @@ -4150,11 +4170,30 @@ static int rtl8169_tx_map(struct rtl8169_private *tp, const u32 *opts, u32 len,
>  	u32 opts1;
>  	int ret;
>  
> +	/* Validate parameters before DMA mapping */
> +	if (unlikely(!addr)) {

Any scenario where addr would be NULL?

> +		if (net_ratelimit())
> +			netdev_err(tp->dev, "TX mapping with NULL address\n");
> +		return -EINVAL;
> +	}
> +	
> +	if (unlikely(!len)) {
> +		if (net_ratelimit())
> +			netdev_err(tp->dev, "TX mapping with zero length\n");
> +		return -EINVAL;
> +	}
> +	
> +	if (unlikely(len > RTL_GSO_MAX_SIZE_V2)) {
> +		if (net_ratelimit())
> +			netdev_err(tp->dev, "TX length too large: %u\n", len);
> +		return -EINVAL;
> +	}
> +
>  	mapping = dma_map_single(d, addr, len, DMA_TO_DEVICE);
>  	ret = dma_mapping_error(d, mapping);
>  	if (unlikely(ret)) {
>  		if (net_ratelimit())
> -			netdev_err(tp->dev, "Failed to map TX data!\n");
> +			netdev_err(tp->dev, "Failed to map TX DMA: len=%u\n", len);
>  		return ret;
>  	}
>  
> @@ -4601,9 +4640,8 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
>  		if (status & DescOwn)
>  			break;
>  
> -		/* This barrier is needed to keep us from reading
> -		 * any other fields out of the Rx descriptor until
> -		 * we know the status of DescOwn
> +		/* Ensure descriptor ownership check completes before accessing
> +		 * other fields to prevent hardware race conditions
>  		 */
>  		dma_rmb();
>  
> @@ -4624,8 +4662,27 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
>  		}
>  
>  		pkt_size = status & GENMASK(13, 0);
> -		if (likely(!(dev->features & NETIF_F_RXFCS)))
> -			pkt_size -= ETH_FCS_LEN;
> +		
> +		/* Validate packet size to prevent buffer overflows */
> +		if (unlikely(pkt_size > R8169_RX_BUF_SIZE)) {
> +			if (net_ratelimit())
> +				netdev_warn(dev, "Oversized packet: %u bytes (status=0x%08x)\n",
> +					    pkt_size, status);
> +			dev->stats.rx_length_errors++;
> +			goto release_descriptor;
> +		}
> +		
> +		if (likely(!(dev->features & NETIF_F_RXFCS))) {
> +			if (pkt_size >= ETH_FCS_LEN) {
> +				pkt_size -= ETH_FCS_LEN;
> +			} else {
> +				if (net_ratelimit())
> +					netdev_warn(dev, "Packet smaller than FCS: %u bytes (status=0x%08x)\n",
> +						    pkt_size, status);
> +				dev->stats.rx_length_errors++;
> +				goto release_descriptor;
> +			}
> +		}
>  
>  		/* The driver does not support incoming fragmented frames.
>  		 * They are seen as a symptom of over-mtu sized frames.
> @@ -4674,26 +4731,44 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
>  {
>  	struct rtl8169_private *tp = dev_instance;
>  	u32 status = rtl_get_events(tp);
> +	bool handled = false;
>  
> +	/* Check for invalid hardware state or no relevant interrupts */
>  	if ((status & 0xffff) == 0xffff || !(status & tp->irq_mask))
>  		return IRQ_NONE;
>  
> -	/* At least RTL8168fp may unexpectedly set the SYSErr bit */
> -	if (unlikely(status & SYSErr &&
> -	    tp->mac_version <= RTL_GIGA_MAC_VER_06)) {
> -		rtl8169_pcierr_interrupt(tp->dev);
> +	/* Handle system errors based on chip version capabilities */
> +	if (unlikely(status & SYSErr)) {
> +		/* SYSErr handling for older chips and specific newer models
> +		 * based on vendor documentation and observed behavior
> +		 */
> +		if (tp->mac_version <= RTL_GIGA_MAC_VER_06 || 
> +		    tp->mac_version == RTL_GIGA_MAC_VER_52) {
> +			rtl8169_pcierr_interrupt(tp->dev);
> +		} else {
> +			/* Log for diagnostic purposes on newer chips */
> +			if (net_ratelimit())
> +				netdev_warn(tp->dev, "SYSErr on newer chip: status=0x%08x\n", status);
> +		}
> +		handled = true;
>  		goto out;
>  	}
>  
> -	if (status & LinkChg)
> +	if (status & LinkChg) {
>  		phy_mac_interrupt(tp->phydev);
> +		handled = true;
> +	}
> +
> +	if (status & (RxOK | RxErr | TxOK | TxErr)) {
> +		rtl_irq_disable(tp);
> +		napi_schedule(&tp->napi);
> +		handled = true;
> +	}
>  
> -	rtl_irq_disable(tp);
> -	napi_schedule(&tp->napi);
>  out:
>  	rtl_ack_events(tp, status);
>  
> -	return IRQ_HANDLED;
> +	return handled ? IRQ_HANDLED : IRQ_NONE;
>  }
>  
>  static void rtl_task(struct work_struct *work)
> @@ -4783,8 +4858,11 @@ static int r8169_phy_connect(struct rtl8169_private *tp)
>  
>  static void rtl8169_down(struct rtl8169_private *tp)
>  {
> -	disable_work_sync(&tp->wk.work);
> -	/* Clear all task flags */
> +	/* Synchronize with pending work to prevent races during shutdown.
> +	 * This is necessary because work items may access hardware registers
> +	 * that we're about to reset.
> +	 */
> +	cancel_work_sync(&tp->wk.work);
>  	bitmap_zero(tp->wk.flags, RTL_FLAG_MAX);
>  
>  	phy_stop(tp->phydev);
> @@ -4798,6 +4876,10 @@ static void rtl8169_down(struct rtl8169_private *tp)
>  	rtl_disable_exit_l1(tp);
>  	rtl_prepare_power_down(tp);
>  
> +	/* Disable clock if it was enabled during resume */
> +	if (tp->clk)
> +		clk_disable_unprepare(tp->clk);

The clk functions can deal with NULL arguments.

> +
>  	if (tp->dash_type != RTL_DASH_NONE)
>  		rtl8168_driver_stop(tp);
>  }
> @@ -4812,7 +4894,6 @@ static void rtl8169_up(struct rtl8169_private *tp)
>  	phy_resume(tp->phydev);
>  	rtl8169_init_phy(tp);
>  	napi_enable(&tp->napi);
> -	enable_work(&tp->wk.work);
>  	rtl_reset_work(tp);
>  
>  	phy_start(tp->phydev);
> @@ -4962,12 +5043,27 @@ static void rtl8169_net_suspend(struct rtl8169_private *tp)
>  static int rtl8169_runtime_resume(struct device *dev)
>  {
>  	struct rtl8169_private *tp = dev_get_drvdata(dev);
> +	int ret = 0;
> +
> +	if (WARN_ON(!tp || !tp->dev)) {
> +		dev_err(dev, "Critical: Invalid device state during resume\n");
> +		return -ENODEV;
> +	}
>  
>  	rtl_rar_set(tp, tp->dev->dev_addr);
>  	__rtl8169_set_wol(tp, tp->saved_wolopts);
>  
> -	if (tp->TxDescArray)
> +	if (tp->TxDescArray) {
> +		/* Enable clock if available */
> +		if (tp->clk) {
> +			ret = clk_prepare_enable(tp->clk);
> +			if (ret) {
> +				dev_err(dev, "Failed to enable clock: %d\n", ret);
> +				return ret;
> +			}
> +		}
>  		rtl8169_up(tp);
> +	}
>  
>  	netif_device_attach(tp->dev);
>  
> @@ -4980,7 +5076,7 @@ static int rtl8169_suspend(struct device *device)
>  
>  	rtnl_lock();
>  	rtl8169_net_suspend(tp);
> -	if (!device_may_wakeup(tp_to_dev(tp)))
> +	if (!device_may_wakeup(tp_to_dev(tp)) && tp->clk)
>  		clk_disable_unprepare(tp->clk);
>  	rtnl_unlock();
>  
> @@ -4991,7 +5087,7 @@ static int rtl8169_resume(struct device *device)
>  {
>  	struct rtl8169_private *tp = dev_get_drvdata(device);
>  
> -	if (!device_may_wakeup(tp_to_dev(tp)))
> +	if (!device_may_wakeup(tp_to_dev(tp)) && tp->clk)
>  		clk_prepare_enable(tp->clk);
>  
>  	/* Reportedly at least Asus X453MA truncates packets otherwise */
> @@ -5059,7 +5155,8 @@ static void rtl_remove_one(struct pci_dev *pdev)
>  	if (pci_dev_run_wake(pdev))
>  		pm_runtime_get_noresume(&pdev->dev);
>  
> -	disable_work_sync(&tp->wk.work);
> +	/* Ensure all work is completed before device removal */
> +	cancel_work_sync(&tp->wk.work);
>  
>  	if (IS_ENABLED(CONFIG_R8169_LEDS))
>  		r8169_remove_leds(tp->leds);
> @@ -5471,7 +5568,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
>  	tp->irq = pci_irq_vector(pdev, 0);
>  
>  	INIT_WORK(&tp->wk.work, rtl_task);
> -	disable_work(&tp->wk.work);
>  
>  	rtl_init_mac_address(tp);
>  
> @@ -5593,4 +5689,4 @@ static struct pci_driver rtl8169_pci_driver = {
>  	.driver.pm	= pm_ptr(&rtl8169_pm_ops),
>  };
>  
> -module_pci_driver(rtl8169_pci_driver);
> +module_pci_driver(rtl8169_pci_driver);
> \ No newline at end of file


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ