[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20251025065310.5676-3-kerneljasonxing@gmail.com>
Date: Sat, 25 Oct 2025 14:53:10 +0800
From: Jason Xing <kerneljasonxing@...il.com>
To: davem@...emloft.net,
edumazet@...gle.com,
kuba@...nel.org,
pabeni@...hat.com,
bjorn@...nel.org,
magnus.karlsson@...el.com,
maciej.fijalkowski@...el.com,
jonathan.lemon@...il.com,
sdf@...ichev.me,
ast@...nel.org,
daniel@...earbox.net,
hawk@...nel.org,
john.fastabend@...il.com,
horms@...nel.org,
andrew+netdev@...n.ch
Cc: bpf@...r.kernel.org,
netdev@...r.kernel.org,
Jason Xing <kernelxing@...cent.com>
Subject: [PATCH net-next 2/2] xsk: use a smaller new lock for shared pool case
From: Jason Xing <kernelxing@...cent.com>
- Split cq_lock into two smaller locks: cq_prod_lock and
cq_cached_prod_lock
- Avoid disabling/enabling interrupts in the hot xmit path
In either xsk_cq_cancel_locked() or xsk_cq_reserve_locked() function,
the race condition is only between multiple xsks sharing the same
pool. They are all in the process context rather than interrupt context,
so now the small lock named cq_cached_prod_lock can be used without
handling interrupts.
While cq_cached_prod_lock ensures the exclusive modification of
@cached_prod, cq_prod_lock in xsk_cq_submit_addr_locked() only cares
about @producer and corresponding @desc. Both of them don't necessarily
be consistent with @cached_prod protected by cq_cached_prod_lock.
That's the reason why the previous big lock can be split into two
smaller ones.
Frequently disabling and enabling interrupt are very time consuming
in some cases, especially in a per-descriptor granularity, which now
can be avoided after this optimization, even when the pool is shared by
multiple xsks.
Signed-off-by: Jason Xing <kernelxing@...cent.com>
---
include/net/xsk_buff_pool.h | 13 +++++++++----
net/xdp/xsk.c | 14 ++++++--------
net/xdp/xsk_buff_pool.c | 3 ++-
3 files changed, 17 insertions(+), 13 deletions(-)
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index cac56e6b0869..92a2358c6ce3 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -85,11 +85,16 @@ struct xsk_buff_pool {
bool unaligned;
bool tx_sw_csum;
void *addrs;
- /* Mutual exclusion of the completion ring in the SKB mode. Two cases to protect:
- * NAPI TX thread and sendmsg error paths in the SKB destructor callback and when
- * sockets share a single cq when the same netdev and queue id is shared.
+ /* Mutual exclusion of the completion ring in the SKB mode.
+ * Protect: NAPI TX thread and sendmsg error paths in the SKB
+ * destructor callback.
*/
- spinlock_t cq_lock;
+ spinlock_t cq_prod_lock;
+ /* Mutual exclusion of the completion ring in the SKB mode.
+ * Protect: when sockets share a single cq when the same netdev
+ * and queue id is shared.
+ */
+ spinlock_t cq_cached_prod_lock;
struct xdp_buff_xsk *free_heads[];
};
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 76f797fcc49c..d254817b8a53 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -549,14 +549,13 @@ static int xsk_wakeup(struct xdp_sock *xs, u8 flags)
static int xsk_cq_reserve_locked(struct xsk_buff_pool *pool)
{
bool lock = !list_is_singular(&pool->xsk_tx_list);
- unsigned long flags;
int ret;
if (lock)
- spin_lock_irqsave(&pool->cq_lock, flags);
+ spin_lock(&pool->cq_cached_prod_lock);
ret = xskq_prod_reserve(pool->cq);
if (lock)
- spin_unlock_irqrestore(&pool->cq_lock, flags);
+ spin_unlock(&pool->cq_cached_prod_lock);
return ret;
}
@@ -569,7 +568,7 @@ static void xsk_cq_submit_addr_locked(struct xsk_buff_pool *pool,
unsigned long flags;
u32 idx;
- spin_lock_irqsave(&pool->cq_lock, flags);
+ spin_lock_irqsave(&pool->cq_prod_lock, flags);
idx = xskq_get_prod(pool->cq);
xskq_prod_write_addr(pool->cq, idx,
@@ -586,19 +585,18 @@ static void xsk_cq_submit_addr_locked(struct xsk_buff_pool *pool,
}
}
xskq_prod_submit_n(pool->cq, descs_processed);
- spin_unlock_irqrestore(&pool->cq_lock, flags);
+ spin_unlock_irqrestore(&pool->cq_prod_lock, flags);
}
static void xsk_cq_cancel_locked(struct xsk_buff_pool *pool, u32 n)
{
bool lock = !list_is_singular(&pool->xsk_tx_list);
- unsigned long flags;
if (lock)
- spin_lock_irqsave(&pool->cq_lock, flags);
+ spin_lock(&pool->cq_cached_prod_lock);
xskq_prod_cancel_n(pool->cq, n);
if (lock)
- spin_unlock_irqrestore(&pool->cq_lock, flags);
+ spin_unlock(&pool->cq_cached_prod_lock);
}
static void xsk_inc_num_desc(struct sk_buff *skb)
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index aa9788f20d0d..add44bd09cae 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -94,7 +94,8 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
INIT_LIST_HEAD(&pool->xskb_list);
INIT_LIST_HEAD(&pool->xsk_tx_list);
spin_lock_init(&pool->xsk_tx_list_lock);
- spin_lock_init(&pool->cq_lock);
+ spin_lock_init(&pool->cq_prod_lock);
+ spin_lock_init(&pool->cq_cached_prod_lock);
refcount_set(&pool->users, 1);
pool->fq = xs->fq_tmp;
--
2.41.3
Powered by blists - more mailing lists