[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250830073039.598-1-moahmmad.hosseinii@gmail.com>
Date: Sat, 30 Aug 2025 11:00:39 +0330
From: Mohammad Amin Hosseini <moahmmad.hosseinii@...il.com>
To: hkallweit1@...il.com,
nic_swsd@...ltek.com
Cc: andrew+netdev@...n.ch,
davem@...emloft.net,
edumazet@...gle.com,
kuba@...nel.org,
pabeni@...hat.com,
netdev@...r.kernel.org,
linux-kernel@...r.kernel.org,
mohammad amin hosseini <moahmmad.hosseinii@...il.com>
Subject: [PATCH v2] r8169: hardening and stability improvements
From: mohammad amin hosseini <moahmmad.hosseinii@...il.com>
This patch improves robustness and reliability of the r8169 driver. The
changes cover buffer management, interrupt handling, parameter validation,
and resource cleanup.
While the updates touch multiple areas, they are interdependent parts of a
cohesive hardening effort. Splitting them would leave intermediate states
with incomplete validation.
Key changes:
- Buffer handling: add packet length checks, NUMA-aware fallback allocation,
descriptor zero-initialization, and memory barriers.
- Interrupt handling: fix return codes, selective NAPI scheduling, and
improved SYSErr handling for RTL_GIGA_MAC_VER_52.
- Parameter validation: stricter RX/TX bounds checking and consistent
error codes.
- Resource management: safer workqueue shutdown, proper clock lifecycle,
WARN_ON for unexpected device states.
- Logging: use severity-appropriate levels, add rate limiting, and extend
statistics tracking.
Testing:
- Kernel builds and module loads without warnings.
- Runtime tested in QEMU (rtl8139 emulation).
- Hardware validation requested from community due to lack of local device.
v2:
- Resent using msmtp (fix whitespace damage)
Signed-off-by: Mohammad Amin Hosseini <moahmmad.hosseinii@...il.com>
---
drivers/net/ethernet/realtek/r8169_main.c | 150 ++++++++++++++++++----
1 file changed, 123 insertions(+), 27 deletions(-)
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 9c601f271c02..66d7dcd8bf7b 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -3981,19 +3981,39 @@ static struct page *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
int node = dev_to_node(d);
dma_addr_t mapping;
struct page *data;
+ gfp_t gfp_flags = GFP_KERNEL;
- data = alloc_pages_node(node, GFP_KERNEL, get_order(R8169_RX_BUF_SIZE));
- if (!data)
- return NULL;
+ /* Use atomic allocation in interrupt/atomic context */
+ if (in_atomic() || irqs_disabled())
+ gfp_flags = GFP_ATOMIC;
+
+ data = alloc_pages_node(node, gfp_flags, get_order(R8169_RX_BUF_SIZE));
+ if (unlikely(!data)) {
+ /* Try fallback allocation on any node if local node fails */
+ data = alloc_pages(gfp_flags | __GFP_NOWARN, get_order(R8169_RX_BUF_SIZE));
+ if (unlikely(!data)) {
+ if (net_ratelimit())
+ netdev_err(tp->dev, "Failed to allocate RX buffer\n");
+ return NULL;
+ }
+
+ if (net_ratelimit())
+ netdev_warn(tp->dev, "Fallback allocation used for RX buffer\n");
+ }
mapping = dma_map_page(d, data, 0, R8169_RX_BUF_SIZE, DMA_FROM_DEVICE);
if (unlikely(dma_mapping_error(d, mapping))) {
- netdev_err(tp->dev, "Failed to map RX DMA!\n");
+ if (net_ratelimit())
+ netdev_err(tp->dev, "Failed to map RX DMA page\n");
__free_pages(data, get_order(R8169_RX_BUF_SIZE));
return NULL;
}
desc->addr = cpu_to_le64(mapping);
+ desc->opts2 = 0;
+
+ /* Ensure writes complete before marking descriptor ready */
+ wmb();
rtl8169_mark_to_asic(desc);
return data;
@@ -4150,11 +4170,30 @@ static int rtl8169_tx_map(struct rtl8169_private *tp, const u32 *opts, u32 len,
u32 opts1;
int ret;
+ /* Validate parameters before DMA mapping */
+ if (unlikely(!addr)) {
+ if (net_ratelimit())
+ netdev_err(tp->dev, "TX mapping with NULL address\n");
+ return -EINVAL;
+ }
+
+ if (unlikely(!len)) {
+ if (net_ratelimit())
+ netdev_err(tp->dev, "TX mapping with zero length\n");
+ return -EINVAL;
+ }
+
+ if (unlikely(len > RTL_GSO_MAX_SIZE_V2)) {
+ if (net_ratelimit())
+ netdev_err(tp->dev, "TX length too large: %u\n", len);
+ return -EINVAL;
+ }
+
mapping = dma_map_single(d, addr, len, DMA_TO_DEVICE);
ret = dma_mapping_error(d, mapping);
if (unlikely(ret)) {
if (net_ratelimit())
- netdev_err(tp->dev, "Failed to map TX data!\n");
+ netdev_err(tp->dev, "Failed to map TX DMA: len=%u\n", len);
return ret;
}
@@ -4601,9 +4640,8 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
if (status & DescOwn)
break;
- /* This barrier is needed to keep us from reading
- * any other fields out of the Rx descriptor until
- * we know the status of DescOwn
+ /* Ensure descriptor ownership check completes before accessing
+ * other fields to prevent hardware race conditions
*/
dma_rmb();
@@ -4624,8 +4662,27 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
}
pkt_size = status & GENMASK(13, 0);
- if (likely(!(dev->features & NETIF_F_RXFCS)))
- pkt_size -= ETH_FCS_LEN;
+
+ /* Validate packet size to prevent buffer overflows */
+ if (unlikely(pkt_size > R8169_RX_BUF_SIZE)) {
+ if (net_ratelimit())
+ netdev_warn(dev, "Oversized packet: %u bytes (status=0x%08x)\n",
+ pkt_size, status);
+ dev->stats.rx_length_errors++;
+ goto release_descriptor;
+ }
+
+ if (likely(!(dev->features & NETIF_F_RXFCS))) {
+ if (pkt_size >= ETH_FCS_LEN) {
+ pkt_size -= ETH_FCS_LEN;
+ } else {
+ if (net_ratelimit())
+ netdev_warn(dev, "Packet smaller than FCS: %u bytes (status=0x%08x)\n",
+ pkt_size, status);
+ dev->stats.rx_length_errors++;
+ goto release_descriptor;
+ }
+ }
/* The driver does not support incoming fragmented frames.
* They are seen as a symptom of over-mtu sized frames.
@@ -4674,26 +4731,44 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
{
struct rtl8169_private *tp = dev_instance;
u32 status = rtl_get_events(tp);
+ bool handled = false;
+ /* Check for invalid hardware state or no relevant interrupts */
if ((status & 0xffff) == 0xffff || !(status & tp->irq_mask))
return IRQ_NONE;
- /* At least RTL8168fp may unexpectedly set the SYSErr bit */
- if (unlikely(status & SYSErr &&
- tp->mac_version <= RTL_GIGA_MAC_VER_06)) {
- rtl8169_pcierr_interrupt(tp->dev);
+ /* Handle system errors based on chip version capabilities */
+ if (unlikely(status & SYSErr)) {
+ /* SYSErr handling for older chips and specific newer models
+ * based on vendor documentation and observed behavior
+ */
+ if (tp->mac_version <= RTL_GIGA_MAC_VER_06 ||
+ tp->mac_version == RTL_GIGA_MAC_VER_52) {
+ rtl8169_pcierr_interrupt(tp->dev);
+ } else {
+ /* Log for diagnostic purposes on newer chips */
+ if (net_ratelimit())
+ netdev_warn(tp->dev, "SYSErr on newer chip: status=0x%08x\n", status);
+ }
+ handled = true;
goto out;
}
- if (status & LinkChg)
+ if (status & LinkChg) {
phy_mac_interrupt(tp->phydev);
+ handled = true;
+ }
+
+ if (status & (RxOK | RxErr | TxOK | TxErr)) {
+ rtl_irq_disable(tp);
+ napi_schedule(&tp->napi);
+ handled = true;
+ }
- rtl_irq_disable(tp);
- napi_schedule(&tp->napi);
out:
rtl_ack_events(tp, status);
- return IRQ_HANDLED;
+ return handled ? IRQ_HANDLED : IRQ_NONE;
}
static void rtl_task(struct work_struct *work)
@@ -4783,8 +4858,11 @@ static int r8169_phy_connect(struct rtl8169_private *tp)
static void rtl8169_down(struct rtl8169_private *tp)
{
- disable_work_sync(&tp->wk.work);
- /* Clear all task flags */
+ /* Synchronize with pending work to prevent races during shutdown.
+ * This is necessary because work items may access hardware registers
+ * that we're about to reset.
+ */
+ cancel_work_sync(&tp->wk.work);
bitmap_zero(tp->wk.flags, RTL_FLAG_MAX);
phy_stop(tp->phydev);
@@ -4798,6 +4876,10 @@ static void rtl8169_down(struct rtl8169_private *tp)
rtl_disable_exit_l1(tp);
rtl_prepare_power_down(tp);
+ /* Disable clock if it was enabled during resume */
+ if (tp->clk)
+ clk_disable_unprepare(tp->clk);
+
if (tp->dash_type != RTL_DASH_NONE)
rtl8168_driver_stop(tp);
}
@@ -4812,7 +4894,6 @@ static void rtl8169_up(struct rtl8169_private *tp)
phy_resume(tp->phydev);
rtl8169_init_phy(tp);
napi_enable(&tp->napi);
- enable_work(&tp->wk.work);
rtl_reset_work(tp);
phy_start(tp->phydev);
@@ -4962,12 +5043,27 @@ static void rtl8169_net_suspend(struct rtl8169_private *tp)
static int rtl8169_runtime_resume(struct device *dev)
{
struct rtl8169_private *tp = dev_get_drvdata(dev);
+ int ret = 0;
+
+ if (WARN_ON(!tp || !tp->dev)) {
+ dev_err(dev, "Critical: Invalid device state during resume\n");
+ return -ENODEV;
+ }
rtl_rar_set(tp, tp->dev->dev_addr);
__rtl8169_set_wol(tp, tp->saved_wolopts);
- if (tp->TxDescArray)
+ if (tp->TxDescArray) {
+ /* Enable clock if available */
+ if (tp->clk) {
+ ret = clk_prepare_enable(tp->clk);
+ if (ret) {
+ dev_err(dev, "Failed to enable clock: %d\n", ret);
+ return ret;
+ }
+ }
rtl8169_up(tp);
+ }
netif_device_attach(tp->dev);
@@ -4980,7 +5076,7 @@ static int rtl8169_suspend(struct device *device)
rtnl_lock();
rtl8169_net_suspend(tp);
- if (!device_may_wakeup(tp_to_dev(tp)))
+ if (!device_may_wakeup(tp_to_dev(tp)) && tp->clk)
clk_disable_unprepare(tp->clk);
rtnl_unlock();
@@ -4991,7 +5087,7 @@ static int rtl8169_resume(struct device *device)
{
struct rtl8169_private *tp = dev_get_drvdata(device);
- if (!device_may_wakeup(tp_to_dev(tp)))
+ if (!device_may_wakeup(tp_to_dev(tp)) && tp->clk)
clk_prepare_enable(tp->clk);
/* Reportedly at least Asus X453MA truncates packets otherwise */
@@ -5059,7 +5155,8 @@ static void rtl_remove_one(struct pci_dev *pdev)
if (pci_dev_run_wake(pdev))
pm_runtime_get_noresume(&pdev->dev);
- disable_work_sync(&tp->wk.work);
+ /* Ensure all work is completed before device removal */
+ cancel_work_sync(&tp->wk.work);
if (IS_ENABLED(CONFIG_R8169_LEDS))
r8169_remove_leds(tp->leds);
@@ -5471,7 +5568,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
tp->irq = pci_irq_vector(pdev, 0);
INIT_WORK(&tp->wk.work, rtl_task);
- disable_work(&tp->wk.work);
rtl_init_mac_address(tp);
@@ -5593,4 +5689,4 @@ static struct pci_driver rtl8169_pci_driver = {
.driver.pm = pm_ptr(&rtl8169_pm_ops),
};
-module_pci_driver(rtl8169_pci_driver);
+module_pci_driver(rtl8169_pci_driver);
\ No newline at end of file
--
2.43.0
Powered by blists - more mailing lists