[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAJ8uoz0Tap7JtQdoHFrXeE96XxUhJroZTPbCKhUeR3u3jzOWjA@mail.gmail.com>
Date: Tue, 22 Apr 2025 11:53:04 +0200
From: Magnus Karlsson <magnus.karlsson@...il.com>
To: "e.kubanski" <e.kubanski@...tner.samsung.com>
Cc: linux-kernel@...r.kernel.org, netdev@...r.kernel.org, bjorn@...nel.org,
magnus.karlsson@...el.com, maciej.fijalkowski@...el.com,
jonathan.lemon@...il.com
Subject: Re: [PATCH v2 bpf] xsk: Fix race condition in AF_XDP generic RX path
On Wed, 16 Apr 2025 at 12:19, e.kubanski <e.kubanski@...tner.samsung.com> wrote:
>
> Move rx_lock from xsk_socket to xsk_buff_pool.
> Fix synchronization for shared umem mode in
> generic RX path where multiple sockets share
> single xsk_buff_pool.
>
> RX queue is exclusive to xsk_socket, while FILL
> queue can be shared between multiple sockets.
> This could result in race condition where two
> CPU cores access RX path of two different sockets
> sharing the same umem.
>
> Protect both queues by acquiring spinlock in shared
> xsk_buff_pool.
>
> Lock contention may be minimized in the future by some
> per-thread FQ buffering.
>
> It's safe and necessary to move spin_lock_bh(rx_lock)
> after xsk_rcv_check():
> * xs->pool and spinlock_init is synchronized by
> xsk_bind() -> xsk_is_bound() memory barriers.
> * xsk_rcv_check() may return true at the moment
> of xsk_release() or xsk_unbind_dev(),
> however this will not cause any data races or
> race conditions. xsk_unbind_dev() removes xdp
> socket from all maps and waits for completion
> of all outstanding rx operations. Packets in
> RX path will either complete safely or drop.
Thanks Eryk.
Acked-by: Magnus Karlsson <magnus.karlsson@...el.com>
> Signed-off-by: Eryk Kubanski <e.kubanski@...tner.samsung.com>
> Fixes: bf0bdd1343efb ("xdp: fix race on generic receive path")
> ---
> include/net/xdp_sock.h | 3 ---
> include/net/xsk_buff_pool.h | 2 ++
> net/xdp/xsk.c | 6 +++---
> net/xdp/xsk_buff_pool.c | 1 +
> 4 files changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
> index bfe625b55d55..df3f5f07bc7c 100644
> --- a/include/net/xdp_sock.h
> +++ b/include/net/xdp_sock.h
> @@ -71,9 +71,6 @@ struct xdp_sock {
> */
> u32 tx_budget_spent;
>
> - /* Protects generic receive. */
> - spinlock_t rx_lock;
> -
> /* Statistics */
> u64 rx_dropped;
> u64 rx_queue_full;
> diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
> index 50779406bc2d..7f0a75d6563d 100644
> --- a/include/net/xsk_buff_pool.h
> +++ b/include/net/xsk_buff_pool.h
> @@ -53,6 +53,8 @@ struct xsk_buff_pool {
> refcount_t users;
> struct xdp_umem *umem;
> struct work_struct work;
> + /* Protects generic receive in shared and non-shared umem mode. */
> + spinlock_t rx_lock;
> struct list_head free_list;
> struct list_head xskb_list;
> u32 heads_cnt;
> diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
> index 89d2bef96469..e2a75f3be237 100644
> --- a/net/xdp/xsk.c
> +++ b/net/xdp/xsk.c
> @@ -337,13 +337,14 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
> u32 len = xdp_get_buff_len(xdp);
> int err;
>
> - spin_lock_bh(&xs->rx_lock);
> err = xsk_rcv_check(xs, xdp, len);
> if (!err) {
> + spin_lock_bh(&xs->pool->rx_lock);
> err = __xsk_rcv(xs, xdp, len);
> xsk_flush(xs);
> + spin_unlock_bh(&xs->pool->rx_lock);
> }
> - spin_unlock_bh(&xs->rx_lock);
> +
> return err;
> }
>
> @@ -1724,7 +1725,6 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
> xs = xdp_sk(sk);
> xs->state = XSK_READY;
> mutex_init(&xs->mutex);
> - spin_lock_init(&xs->rx_lock);
>
> INIT_LIST_HEAD(&xs->map_list);
> spin_lock_init(&xs->map_list_lock);
> diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
> index 1f7975b49657..3a5f16f53178 100644
> --- a/net/xdp/xsk_buff_pool.c
> +++ b/net/xdp/xsk_buff_pool.c
> @@ -87,6 +87,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
> pool->addrs = umem->addrs;
> pool->tx_metadata_len = umem->tx_metadata_len;
> pool->tx_sw_csum = umem->flags & XDP_UMEM_TX_SW_CSUM;
> + spin_lock_init(&pool->rx_lock);
> INIT_LIST_HEAD(&pool->free_list);
> INIT_LIST_HEAD(&pool->xskb_list);
> INIT_LIST_HEAD(&pool->xsk_tx_list);
> --
> 2.34.1
>
>
Powered by blists - more mailing lists