[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250131042340.156547-1-sankararaman.jayaraman@broadcom.com>
Date: Fri, 31 Jan 2025 09:53:41 +0530
From: Sankararaman Jayaraman <sankararaman.jayaraman@...adcom.com>
To: netdev@...r.kernel.org
Cc: kuba@...nel.org,
alexanderduyck@...com,
alexandr.lobakin@...el.com,
andrew+netdev@...n.ch,
ast@...nel.org,
bcm-kernel-feedback-list@...adcom.com,
bpf@...r.kernel.org,
daniel@...earbox.net,
davem@...emloft.net,
edumazet@...gle.com,
hawk@...nel.org,
john.fastabend@...il.com,
pabeni@...hat.com,
ronak.doshi@...adcom.com,
sankararaman.jayaraman@...adcom.com,
u9012063@...il.com
Subject: [PATCH net v3] vmxnet3: Fix tx queue race condition with XDP
If XDP traffic runs on a CPU which is greater than or equal to
the number of the Tx queues of the NIC, then vmxnet3_xdp_get_tq()
always picks up queue 0 for transmission as it uses reciprocal scale
instead of simple modulo operation.
vmxnet3_xdp_xmit() and vmxnet3_xdp_xmit_frame() use the above
returned queue without any locking which can lead to race conditions
when multiple XDP xmits run in parallel on different CPU's.
This patch uses a simple module scheme when the current CPU equals or
exceeds the number of Tx queues on the NIC. It also adds locking in
vmxnet3_xdp_xmit() and vmxnet3_xdp_xmit_frame() functions.
Fixes: 54f00cce1178 ("vmxnet3: Add XDP support.")
Signed-off-by: Sankararaman Jayaraman <sankararaman.jayaraman@...adcom.com>
Signed-off-by: Ronak Doshi <ronak.doshi@...adcom.com>
---
v3:
- In vmxnet3_xdp_xmit_frame(), use the irq version of spin lock
- Fixed the ordering of local variables in vmxnet3_xdp_xmit()
v2: https://lore.kernel.org/netdev/20250129181703.148027-1-sankararaman.jayaraman@broadcom.com/
- Retained the earlier copyright dates as it is a bug fix
- Used spin_lock()/spin_unlock() instead of spin_lock_irqsave()
v1: https://lore.kernel.org/netdev/20250124090211.110328-1-sankararaman.jayaraman@broadcom.com/
drivers/net/vmxnet3/vmxnet3_xdp.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/drivers/net/vmxnet3/vmxnet3_xdp.c b/drivers/net/vmxnet3/vmxnet3_xdp.c
index 1341374a4588..616ecc38d172 100644
--- a/drivers/net/vmxnet3/vmxnet3_xdp.c
+++ b/drivers/net/vmxnet3/vmxnet3_xdp.c
@@ -28,7 +28,7 @@ vmxnet3_xdp_get_tq(struct vmxnet3_adapter *adapter)
if (likely(cpu < tq_number))
tq = &adapter->tx_queue[cpu];
else
- tq = &adapter->tx_queue[reciprocal_scale(cpu, tq_number)];
+ tq = &adapter->tx_queue[cpu % tq_number];
return tq;
}
@@ -124,6 +124,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter,
u32 buf_size;
u32 dw2;
+ spin_lock_irq(&tq->tx_lock);
dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
dw2 |= xdpf->len;
ctx.sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill;
@@ -134,6 +135,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter,
if (vmxnet3_cmd_ring_desc_avail(&tq->tx_ring) == 0) {
tq->stats.tx_ring_full++;
+ spin_unlock_irq(&tq->tx_lock);
return -ENOSPC;
}
@@ -142,8 +144,10 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter,
tbi->dma_addr = dma_map_single(&adapter->pdev->dev,
xdpf->data, buf_size,
DMA_TO_DEVICE);
- if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr))
+ if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr)) {
+ spin_unlock_irq(&tq->tx_lock);
return -EFAULT;
+ }
tbi->map_type |= VMXNET3_MAP_SINGLE;
} else { /* XDP buffer from page pool */
page = virt_to_page(xdpf->data);
@@ -182,6 +186,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter,
dma_wmb();
gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^
VMXNET3_TXD_GEN);
+ spin_unlock_irq(&tq->tx_lock);
/* No need to handle the case when tx_num_deferred doesn't reach
* threshold. Backend driver at hypervisor side will poll and reset
@@ -225,6 +230,7 @@ vmxnet3_xdp_xmit(struct net_device *dev,
{
struct vmxnet3_adapter *adapter = netdev_priv(dev);
struct vmxnet3_tx_queue *tq;
+ struct netdev_queue *nq;
int i;
if (unlikely(test_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state)))
@@ -236,6 +242,9 @@ vmxnet3_xdp_xmit(struct net_device *dev,
if (tq->stopped)
return -ENETDOWN;
+ nq = netdev_get_tx_queue(adapter->netdev, tq->qid);
+
+ __netif_tx_lock(nq, smp_processor_id());
for (i = 0; i < n; i++) {
if (vmxnet3_xdp_xmit_frame(adapter, frames[i], tq, true)) {
tq->stats.xdp_xmit_err++;
@@ -243,6 +252,7 @@ vmxnet3_xdp_xmit(struct net_device *dev,
}
}
tq->stats.xdp_xmit += i;
+ __netif_tx_unlock(nq);
return i;
}
--
2.25.1
Powered by blists - more mailing lists