[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <9fdcc731-2b1e-4195-bd9e-5f91b660341d@gmail.com>
Date: Sat, 30 Aug 2025 10:29:40 +0200
From: Heiner Kallweit <hkallweit1@...il.com>
To: Mohammad Amin Hosseini <moahmmad.hosseinii@...il.com>,
nic_swsd@...ltek.com
Cc: andrew+netdev@...n.ch, davem@...emloft.net, edumazet@...gle.com,
kuba@...nel.org, pabeni@...hat.com, netdev@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2] r8169: hardening and stability improvements
On 8/30/2025 9:30 AM, Mohammad Amin Hosseini wrote:
> From: mohammad amin hosseini <moahmmad.hosseinii@...il.com>
>
> This patch improves robustness and reliability of the r8169 driver. The
> changes cover buffer management, interrupt handling, parameter validation,
> and resource cleanup.
>
Are there any known issues which require these changes?
Then changes which actually fix something should come with a Fixes tag.
Several changes IMO don't make sense because they try to handle error
conditions which can't occur.
Please split this wild mix of changes and explain per patch:
- What is the problematic scenario and can it actually occur?
> While the updates touch multiple areas, they are interdependent parts of a
> cohesive hardening effort. Splitting them would leave intermediate states
> with incomplete validation.
>
> Key changes:
> - Buffer handling: add packet length checks, NUMA-aware fallback allocation,
> descriptor zero-initialization, and memory barriers.
> - Interrupt handling: fix return codes, selective NAPI scheduling, and
> improved SYSErr handling for RTL_GIGA_MAC_VER_52.
> - Parameter validation: stricter RX/TX bounds checking and consistent
> error codes.
> - Resource management: safer workqueue shutdown, proper clock lifecycle,
> WARN_ON for unexpected device states.
> - Logging: use severity-appropriate levels, add rate limiting, and extend
> statistics tracking.
>
> Testing:
> - Kernel builds and module loads without warnings.
> - Runtime tested in QEMU (rtl8139 emulation).
> - Hardware validation requested from community due to lack of local device.
>
> v2:
> - Resent using msmtp (fix whitespace damage)
>
> Signed-off-by: Mohammad Amin Hosseini <moahmmad.hosseinii@...il.com>
> ---
> drivers/net/ethernet/realtek/r8169_main.c | 150 ++++++++++++++++++----
> 1 file changed, 123 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
> index 9c601f271c02..66d7dcd8bf7b 100644
> --- a/drivers/net/ethernet/realtek/r8169_main.c
> +++ b/drivers/net/ethernet/realtek/r8169_main.c
> @@ -3981,19 +3981,39 @@ static struct page *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
> int node = dev_to_node(d);
> dma_addr_t mapping;
> struct page *data;
> + gfp_t gfp_flags = GFP_KERNEL;
>
> - data = alloc_pages_node(node, GFP_KERNEL, get_order(R8169_RX_BUF_SIZE));
> - if (!data)
> - return NULL;
> + /* Use atomic allocation in interrupt/atomic context */
Is there any scenario where rtl8169_alloc_rx_data() would be called in atomic context?
> + if (in_atomic() || irqs_disabled())
> + gfp_flags = GFP_ATOMIC;
> +
> + data = alloc_pages_node(node, gfp_flags, get_order(R8169_RX_BUF_SIZE));
> + if (unlikely(!data)) {
> + /* Try fallback allocation on any node if local node fails */
> + data = alloc_pages(gfp_flags | __GFP_NOWARN, get_order(R8169_RX_BUF_SIZE));
> + if (unlikely(!data)) {
> + if (net_ratelimit())
> + netdev_err(tp->dev, "Failed to allocate RX buffer\n");
> + return NULL;
> + }
> +
> + if (net_ratelimit())
> + netdev_warn(tp->dev, "Fallback allocation used for RX buffer\n");
> + }
>
> mapping = dma_map_page(d, data, 0, R8169_RX_BUF_SIZE, DMA_FROM_DEVICE);
> if (unlikely(dma_mapping_error(d, mapping))) {
> - netdev_err(tp->dev, "Failed to map RX DMA!\n");
> + if (net_ratelimit())
> + netdev_err(tp->dev, "Failed to map RX DMA page\n");
> __free_pages(data, get_order(R8169_RX_BUF_SIZE));
> return NULL;
> }
>
> desc->addr = cpu_to_le64(mapping);
> + desc->opts2 = 0;
> +
> + /* Ensure writes complete before marking descriptor ready */
> + wmb();
Changes messing with memory barriers need a thorough explanation.
What is the problematic scenario? Any known issues?
> rtl8169_mark_to_asic(desc);
>
> return data;
> @@ -4150,11 +4170,30 @@ static int rtl8169_tx_map(struct rtl8169_private *tp, const u32 *opts, u32 len,
> u32 opts1;
> int ret;
>
> + /* Validate parameters before DMA mapping */
> + if (unlikely(!addr)) {
Any scenario where addr would be NULL?
> + if (net_ratelimit())
> + netdev_err(tp->dev, "TX mapping with NULL address\n");
> + return -EINVAL;
> + }
> +
> + if (unlikely(!len)) {
> + if (net_ratelimit())
> + netdev_err(tp->dev, "TX mapping with zero length\n");
> + return -EINVAL;
> + }
> +
> + if (unlikely(len > RTL_GSO_MAX_SIZE_V2)) {
> + if (net_ratelimit())
> + netdev_err(tp->dev, "TX length too large: %u\n", len);
> + return -EINVAL;
> + }
> +
> mapping = dma_map_single(d, addr, len, DMA_TO_DEVICE);
> ret = dma_mapping_error(d, mapping);
> if (unlikely(ret)) {
> if (net_ratelimit())
> - netdev_err(tp->dev, "Failed to map TX data!\n");
> + netdev_err(tp->dev, "Failed to map TX DMA: len=%u\n", len);
> return ret;
> }
>
> @@ -4601,9 +4640,8 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
> if (status & DescOwn)
> break;
>
> - /* This barrier is needed to keep us from reading
> - * any other fields out of the Rx descriptor until
> - * we know the status of DescOwn
> + /* Ensure descriptor ownership check completes before accessing
> + * other fields to prevent hardware race conditions
> */
> dma_rmb();
>
> @@ -4624,8 +4662,27 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
> }
>
> pkt_size = status & GENMASK(13, 0);
> - if (likely(!(dev->features & NETIF_F_RXFCS)))
> - pkt_size -= ETH_FCS_LEN;
> +
> + /* Validate packet size to prevent buffer overflows */
> + if (unlikely(pkt_size > R8169_RX_BUF_SIZE)) {
> + if (net_ratelimit())
> + netdev_warn(dev, "Oversized packet: %u bytes (status=0x%08x)\n",
> + pkt_size, status);
> + dev->stats.rx_length_errors++;
> + goto release_descriptor;
> + }
> +
> + if (likely(!(dev->features & NETIF_F_RXFCS))) {
> + if (pkt_size >= ETH_FCS_LEN) {
> + pkt_size -= ETH_FCS_LEN;
> + } else {
> + if (net_ratelimit())
> + netdev_warn(dev, "Packet smaller than FCS: %u bytes (status=0x%08x)\n",
> + pkt_size, status);
> + dev->stats.rx_length_errors++;
> + goto release_descriptor;
> + }
> + }
>
> /* The driver does not support incoming fragmented frames.
> * They are seen as a symptom of over-mtu sized frames.
> @@ -4674,26 +4731,44 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
> {
> struct rtl8169_private *tp = dev_instance;
> u32 status = rtl_get_events(tp);
> + bool handled = false;
>
> + /* Check for invalid hardware state or no relevant interrupts */
> if ((status & 0xffff) == 0xffff || !(status & tp->irq_mask))
> return IRQ_NONE;
>
> - /* At least RTL8168fp may unexpectedly set the SYSErr bit */
> - if (unlikely(status & SYSErr &&
> - tp->mac_version <= RTL_GIGA_MAC_VER_06)) {
> - rtl8169_pcierr_interrupt(tp->dev);
> + /* Handle system errors based on chip version capabilities */
> + if (unlikely(status & SYSErr)) {
> + /* SYSErr handling for older chips and specific newer models
> + * based on vendor documentation and observed behavior
> + */
> + if (tp->mac_version <= RTL_GIGA_MAC_VER_06 ||
> + tp->mac_version == RTL_GIGA_MAC_VER_52) {
> + rtl8169_pcierr_interrupt(tp->dev);
> + } else {
> + /* Log for diagnostic purposes on newer chips */
> + if (net_ratelimit())
> + netdev_warn(tp->dev, "SYSErr on newer chip: status=0x%08x\n", status);
> + }
> + handled = true;
> goto out;
> }
>
> - if (status & LinkChg)
> + if (status & LinkChg) {
> phy_mac_interrupt(tp->phydev);
> + handled = true;
> + }
> +
> + if (status & (RxOK | RxErr | TxOK | TxErr)) {
> + rtl_irq_disable(tp);
> + napi_schedule(&tp->napi);
> + handled = true;
> + }
>
> - rtl_irq_disable(tp);
> - napi_schedule(&tp->napi);
> out:
> rtl_ack_events(tp, status);
>
> - return IRQ_HANDLED;
> + return handled ? IRQ_HANDLED : IRQ_NONE;
> }
>
> static void rtl_task(struct work_struct *work)
> @@ -4783,8 +4858,11 @@ static int r8169_phy_connect(struct rtl8169_private *tp)
>
> static void rtl8169_down(struct rtl8169_private *tp)
> {
> - disable_work_sync(&tp->wk.work);
> - /* Clear all task flags */
> + /* Synchronize with pending work to prevent races during shutdown.
> + * This is necessary because work items may access hardware registers
> + * that we're about to reset.
> + */
> + cancel_work_sync(&tp->wk.work);
> bitmap_zero(tp->wk.flags, RTL_FLAG_MAX);
>
> phy_stop(tp->phydev);
> @@ -4798,6 +4876,10 @@ static void rtl8169_down(struct rtl8169_private *tp)
> rtl_disable_exit_l1(tp);
> rtl_prepare_power_down(tp);
>
> + /* Disable clock if it was enabled during resume */
> + if (tp->clk)
> + clk_disable_unprepare(tp->clk);
The clk functions can deal with NULL arguments.
> +
> if (tp->dash_type != RTL_DASH_NONE)
> rtl8168_driver_stop(tp);
> }
> @@ -4812,7 +4894,6 @@ static void rtl8169_up(struct rtl8169_private *tp)
> phy_resume(tp->phydev);
> rtl8169_init_phy(tp);
> napi_enable(&tp->napi);
> - enable_work(&tp->wk.work);
> rtl_reset_work(tp);
>
> phy_start(tp->phydev);
> @@ -4962,12 +5043,27 @@ static void rtl8169_net_suspend(struct rtl8169_private *tp)
> static int rtl8169_runtime_resume(struct device *dev)
> {
> struct rtl8169_private *tp = dev_get_drvdata(dev);
> + int ret = 0;
> +
> + if (WARN_ON(!tp || !tp->dev)) {
> + dev_err(dev, "Critical: Invalid device state during resume\n");
> + return -ENODEV;
> + }
>
> rtl_rar_set(tp, tp->dev->dev_addr);
> __rtl8169_set_wol(tp, tp->saved_wolopts);
>
> - if (tp->TxDescArray)
> + if (tp->TxDescArray) {
> + /* Enable clock if available */
> + if (tp->clk) {
> + ret = clk_prepare_enable(tp->clk);
> + if (ret) {
> + dev_err(dev, "Failed to enable clock: %d\n", ret);
> + return ret;
> + }
> + }
> rtl8169_up(tp);
> + }
>
> netif_device_attach(tp->dev);
>
> @@ -4980,7 +5076,7 @@ static int rtl8169_suspend(struct device *device)
>
> rtnl_lock();
> rtl8169_net_suspend(tp);
> - if (!device_may_wakeup(tp_to_dev(tp)))
> + if (!device_may_wakeup(tp_to_dev(tp)) && tp->clk)
> clk_disable_unprepare(tp->clk);
> rtnl_unlock();
>
> @@ -4991,7 +5087,7 @@ static int rtl8169_resume(struct device *device)
> {
> struct rtl8169_private *tp = dev_get_drvdata(device);
>
> - if (!device_may_wakeup(tp_to_dev(tp)))
> + if (!device_may_wakeup(tp_to_dev(tp)) && tp->clk)
> clk_prepare_enable(tp->clk);
>
> /* Reportedly at least Asus X453MA truncates packets otherwise */
> @@ -5059,7 +5155,8 @@ static void rtl_remove_one(struct pci_dev *pdev)
> if (pci_dev_run_wake(pdev))
> pm_runtime_get_noresume(&pdev->dev);
>
> - disable_work_sync(&tp->wk.work);
> + /* Ensure all work is completed before device removal */
> + cancel_work_sync(&tp->wk.work);
>
> if (IS_ENABLED(CONFIG_R8169_LEDS))
> r8169_remove_leds(tp->leds);
> @@ -5471,7 +5568,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
> tp->irq = pci_irq_vector(pdev, 0);
>
> INIT_WORK(&tp->wk.work, rtl_task);
> - disable_work(&tp->wk.work);
>
> rtl_init_mac_address(tp);
>
> @@ -5593,4 +5689,4 @@ static struct pci_driver rtl8169_pci_driver = {
> .driver.pm = pm_ptr(&rtl8169_pm_ops),
> };
>
> -module_pci_driver(rtl8169_pci_driver);
> +module_pci_driver(rtl8169_pci_driver);
> \ No newline at end of file
Powered by blists - more mailing lists