lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-Id: <20250530095957.43248-1-e.kubanski@partner.samsung.com>
Date: Fri, 30 May 2025 11:59:57 +0200
From: "e.kubanski" <e.kubanski@...tner.samsung.com>
To: netdev@...r.kernel.org, linux-kernel@...r.kernel.org
Cc: bjorn@...nel.org, magnus.karlsson@...el.com,
	maciej.fijalkowski@...el.com, jonathan.lemon@...il.com, "e.kubanski"
	<e.kubanski@...tner.samsung.com>
Subject: [PATCH bpf] xsk: Fix out of order segment free in
 __xsk_generic_xmit()

Move xsk completion queue descriptor write-back to destructor.

Fix xsk descriptor management in completion queue. Descriptor
management mechanism didn't take care of situations where
completion queue submission can happen out-of-order to
descriptor write-back.

__xsk_generic_xmit() was assigning descriptor to slot right
after completion queue slot reservation. If multiple CPUs
access the same completion queue after xmit, this can result
in out-of-order submission of invalid descriptor batch.
SKB destructor call can submit descriptor batch that is
currently in use by other CPU, instead of correct transmitted
ones. This could result in User-Space <-> Kernel-Space data race.

Forbid possible out-of-order submissions:
CPU A: Reservation + Descriptor Write
CPU B: Reservation + Descriptor Write
CPU B: Submit (submitted first batch reserved by CPU A)
CPU A: Submit (submitted second batch reserved by CPU B)

Move Descriptor Write to submission phase:
CPU A: Reservation (only moves local writer)
CPU B: Reservation (only moves local writer)
CPU B: Descriptor Write + Submit
CPU A: Descriptor Write + Submit

This solves potential out-of-order free of xsk buffers.

Signed-off-by: Eryk Kubanski <e.kubanski@...tner.samsung.com>
Fixes: e6c4047f5122 ("xsk: Use xsk_buff_pool directly for cq functions")
---
 include/linux/skbuff.h |  2 ++
 net/xdp/xsk.c          | 17 +++++++++++------
 net/xdp/xsk_queue.h    | 11 +++++++++++
 3 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index bb2b751d274a..6785faec0699 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -622,6 +622,8 @@ struct skb_shared_info {
 		void		*destructor_arg;
 	};
 
+	u64 xsk_descs[MAX_SKB_FRAGS];
+
 	/* must be last field, see pskb_expand_head() */
 	skb_frag_t	frags[MAX_SKB_FRAGS];
 };
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index a2249fd2048a..f822393907da 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -526,24 +526,24 @@ static int xsk_wakeup(struct xdp_sock *xs, u8 flags)
 	return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);
 }
 
-static int xsk_cq_reserve_addr_locked(struct xsk_buff_pool *pool, u64 addr)
+static int xsk_cq_reserve_locked(struct xsk_buff_pool *pool)
 {
 	unsigned long flags;
 	int ret;
 
 	spin_lock_irqsave(&pool->cq_lock, flags);
-	ret = xskq_prod_reserve_addr(pool->cq, addr);
+	ret = xskq_prod_reserve(pool->cq);
 	spin_unlock_irqrestore(&pool->cq_lock, flags);
 
 	return ret;
 }
 
-static void xsk_cq_submit_locked(struct xsk_buff_pool *pool, u32 n)
+static void xsk_cq_submit_locked(struct xsk_buff_pool *pool, u64 *descs, u32 n)
 {
 	unsigned long flags;
 
 	spin_lock_irqsave(&pool->cq_lock, flags);
-	xskq_prod_submit_n(pool->cq, n);
+	xskq_prod_write_submit_addr_n(pool->cq, descs, n);
 	spin_unlock_irqrestore(&pool->cq_lock, flags);
 }
 
@@ -570,7 +570,9 @@ static void xsk_destruct_skb(struct sk_buff *skb)
 		*compl->tx_timestamp = ktime_get_tai_fast_ns();
 	}
 
-	xsk_cq_submit_locked(xdp_sk(skb->sk)->pool, xsk_get_num_desc(skb));
+	xsk_cq_submit_locked(xdp_sk(skb->sk)->pool,
+			     skb_shinfo(skb)->xsk_descs,
+			     xsk_get_num_desc(skb));
 	sock_wfree(skb);
 }
 
@@ -749,7 +751,9 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
 	skb->priority = READ_ONCE(xs->sk.sk_priority);
 	skb->mark = READ_ONCE(xs->sk.sk_mark);
 	skb->destructor = xsk_destruct_skb;
+
 	xsk_tx_metadata_to_compl(meta, &skb_shinfo(skb)->xsk_meta);
+	skb_shinfo(skb)->xsk_descs[xsk_get_num_desc(skb)] = desc->addr;
 	xsk_set_destructor_arg(skb);
 
 	return skb;
@@ -760,6 +764,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
 
 	if (err == -EOVERFLOW) {
 		/* Drop the packet */
+		skb_shinfo(xs->skb)->xsk_descs[xsk_get_num_desc(xs->skb)] = desc->addr;
 		xsk_set_destructor_arg(xs->skb);
 		xsk_drop_skb(xs->skb);
 		xskq_cons_release(xs->tx);
@@ -802,7 +807,7 @@ static int __xsk_generic_xmit(struct sock *sk)
 		 * if there is space in it. This avoids having to implement
 		 * any buffering in the Tx path.
 		 */
-		if (xsk_cq_reserve_addr_locked(xs->pool, desc.addr))
+		if (xsk_cq_reserve_locked(xs->pool))
 			goto out;
 
 		skb = xsk_build_skb(xs, &desc);
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 46d87e961ad6..06ce89aae217 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -436,6 +436,17 @@ static inline void xskq_prod_submit_n(struct xsk_queue *q, u32 nb_entries)
 	__xskq_prod_submit(q, q->ring->producer + nb_entries);
 }
 
+static inline void xskq_prod_write_submit_addr_n(struct xsk_queue *q, u64 *addrs, u32 nb_entries)
+{
+	struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
+	u32 prod = q->ring->producer;
+
+	for (u32 i = 0; i < nb_entries; ++i)
+		ring->desc[prod++ & q->ring_mask] = addrs[i];
+
+	__xskq_prod_submit(q, prod);
+}
+
 static inline bool xskq_prod_is_empty(struct xsk_queue *q)
 {
 	/* No barriers needed since data is not accessed */
-- 
2.34.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ