netdev - [RFC bpf-next v1 3/7] bpf: Support pulling non-linear xdp data

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <20250825193918.3445531-4-ameryhung@gmail.com>
Date: Mon, 25 Aug 2025 12:39:14 -0700
From: Amery Hung <ameryhung@...il.com>
To: bpf@...r.kernel.org
Cc: netdev@...r.kernel.org,
	alexei.starovoitov@...il.com,
	andrii@...nel.org,
	daniel@...earbox.net,
	kuba@...nel.org,
	martin.lau@...nel.org,
	mohsin.bashr@...il.com,
	saeedm@...dia.com,
	tariqt@...dia.com,
	mbloch@...dia.com,
	maciej.fijalkowski@...el.com,
	kernel-team@...a.com
Subject: [RFC bpf-next v1 3/7] bpf: Support pulling non-linear xdp data

Add kfunc, bpf_xdp_pull_data(), to support pulling data from xdp
fragments. Similar to bpf_skb_pull_data(), bpf_xdp_pull_data() makes
the first len bytes of data directly readable and writable in bpf
programs. If the "len" argument is larger than the linear data size,
data in fragments will be copied to the linear region when there
is enough room between xdp->data_end and xdp_data_hard_end(xdp),
which is subject to driver implementation.

A use case of the kfunc is to decapsulate headers residing in xdp
fragments. It is possible for a NIC driver to place headers in xdp
fragments. To keep using direct packet access for parsing and
decapsulating headers, users can pull headers into the linear data
area by calling bpf_xdp_pull_data() and then pop the header with
bpf_xdp_adjust_head().

An unused argument, flags is reserved for future extension (e.g.,
tossing the data instead of copying it to the linear data area).

Signed-off-by: Amery Hung <ameryhung@...il.com>
---
 net/core/filter.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/net/core/filter.c b/net/core/filter.c
index f0ee5aec7977..82d953e077ac 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -12211,6 +12211,57 @@ __bpf_kfunc int bpf_sock_ops_enable_tx_tstamp(struct bpf_sock_ops_kern *skops,
 	return 0;
 }
 
+__bpf_kfunc int bpf_xdp_pull_data(struct xdp_md *x, u32 len, u64 flags)
+{
+	struct xdp_buff *xdp = (struct xdp_buff *)x;
+	struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+	void *data_end, *data_hard_end = xdp_data_hard_end(xdp);
+	int i, delta, buff_len, n_frags_free = 0, len_free = 0;
+
+	buff_len = xdp_get_buff_len(xdp);
+
+	if (unlikely(len > buff_len))
+		return -EINVAL;
+
+	if (!len)
+		len = xdp_get_buff_len(xdp);
+
+	data_end = xdp->data + len;
+	delta = data_end - xdp->data_end;
+
+	if (delta <= 0)
+		return 0;
+
+	if (unlikely(data_end > data_hard_end))
+		return -EINVAL;
+
+	for (i = 0; i < sinfo->nr_frags && delta; i++) {
+		skb_frag_t *frag = &sinfo->frags[i];
+		u32 shrink = min_t(u32, delta, skb_frag_size(frag));
+
+		memcpy(xdp->data_end + len_free, skb_frag_address(frag), shrink);
+
+		len_free += shrink;
+		delta -= shrink;
+		if (bpf_xdp_shrink_data(xdp, frag, shrink, false))
+			n_frags_free++;
+	}
+
+	for (i = 0; i < sinfo->nr_frags - n_frags_free; i++) {
+		memcpy(&sinfo->frags[i], &sinfo->frags[i + n_frags_free],
+		       sizeof(skb_frag_t));
+	}
+
+	sinfo->nr_frags -= n_frags_free;
+	sinfo->xdp_frags_size -= len_free;
+	xdp->data_end = data_end;
+
+	if (unlikely(!sinfo->nr_frags))
+		xdp_buff_clear_frags_flag(xdp);
+
+	return 0;
+}
+
 __bpf_kfunc_end_defs();
 
 int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags,
@@ -12238,6 +12289,7 @@ BTF_KFUNCS_END(bpf_kfunc_check_set_skb_meta)
 
 BTF_KFUNCS_START(bpf_kfunc_check_set_xdp)
 BTF_ID_FLAGS(func, bpf_dynptr_from_xdp)
+BTF_ID_FLAGS(func, bpf_xdp_pull_data)
 BTF_KFUNCS_END(bpf_kfunc_check_set_xdp)
 
 BTF_KFUNCS_START(bpf_kfunc_check_set_sock_addr)
-- 
2.47.3