[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20251124-skb-meta-safeproof-netdevs-rx-only-v1-15-8978f5054417@cloudflare.com>
Date: Mon, 24 Nov 2025 17:28:51 +0100
From: Jakub Sitnicki <jakub@...udflare.com>
To: bpf@...r.kernel.org
Cc: netdev@...r.kernel.org, kernel-team@...udflare.com,
Martin KaFai Lau <martin.lau@...ux.dev>
Subject: [PATCH RFC bpf-next 15/15] bpf: Realign skb metadata for TC progs
using data_meta
After decoupling metadata location from MAC header offset, a gap can appear
between metadata and skb->data on L2 decapsulation (e.g., VLAN, GRE). This
breaks the BPF data_meta pointer which assumes metadata is directly before
skb->data.
Introduce bpf_skb_meta_realign() kfunc to close the gap by moving metadata
to immediately precede the MAC header. Inject a call to it in
tc_cls_act_prologue() when the verifier detects data_meta access
(PA_F_DATA_META_LOAD flag).
Update skb_data_move() to handle the gap case: on skb_push(), move metadata
to the top of the head buffer; on skb_pull() where metadata is already
detached, leave it in place.
This restores data_meta functionality for TC programs while keeping the
performance benefit of avoiding memmove on L2 decapsulation for programs
that don't use data_meta.
Signed-off-by: Jakub Sitnicki <jakub@...udflare.com>
---
include/linux/skbuff.h | 25 +++++++++++++++--------
net/core/filter.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++--
2 files changed, 70 insertions(+), 10 deletions(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 8868db976e1f..24c4e216d0cb 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4600,19 +4600,28 @@ static inline void skb_data_move(struct sk_buff *skb, const int len,
if (!meta_len)
goto no_metadata;
- meta_end = skb_metadata_end(skb);
- meta = meta_end - meta_len;
-
- if (WARN_ON_ONCE(meta_end + len != skb->data ||
- meta_len > skb_headroom(skb))) {
+ /* Not enough headroom left for metadata. Drop it. */
+ if (WARN_ONCE(meta_len > skb_headroom(skb),
+ "skb headroom smaller than metadata")) {
skb_metadata_clear(skb);
goto no_metadata;
}
- memmove(meta + len, meta, meta_len + n);
- skb_shinfo(skb)->meta_end += len;
- return;
+ meta_end = skb_metadata_end(skb);
+ meta = meta_end - meta_len;
+ /* Metadata in front of data before push/pull. Keep it that way. */
+ if (meta_end == skb->data - len) {
+ memmove(meta + len, meta, meta_len + n);
+ skb_shinfo(skb)->meta_end += len;
+ return;
+ }
+
+ if (len < 0) {
+ /* Data pushed. Move metadata to the top. */
+ memmove(skb->head, meta, meta_len);
+ skb_shinfo(skb)->meta_end = meta_len;
+ }
no_metadata:
memmove(skb->data, skb->data - len, n);
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 334421910107..91100c923f2c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -9125,11 +9125,62 @@ static int bpf_gen_ld_abs(const struct bpf_insn *orig,
return insn - insn_buf;
}
+__bpf_kfunc_start_defs();
+
+__bpf_kfunc void bpf_skb_meta_realign(struct __sk_buff *skb_)
+{
+ struct sk_buff *skb = (typeof(skb))skb_;
+ u8 *meta_end = skb_metadata_end(skb);
+ u8 meta_len = skb_metadata_len(skb);
+ u8 *meta;
+ int gap;
+
+ gap = skb_mac_header(skb) - meta_end;
+ if (!meta_len || !gap)
+ return;
+
+ if (WARN_ONCE(gap < 0, "skb metadata end past mac header")) {
+ skb_metadata_clear(skb);
+ return;
+ }
+
+ meta = meta_end - meta_len;
+ memmove(meta + gap, meta, meta_len);
+ skb_shinfo(skb)->meta_end += gap;
+
+ bpf_compute_data_pointers(skb);
+}
+
+__bpf_kfunc_end_defs();
+
+BTF_KFUNCS_START(tc_cls_act_hidden_ids)
+BTF_ID_FLAGS(func, bpf_skb_meta_realign, KF_TRUSTED_ARGS)
+BTF_KFUNCS_END(tc_cls_act_hidden_ids)
+
+BTF_ID_LIST_SINGLE(bpf_skb_meta_realign_ids, func, bpf_skb_meta_realign)
+
static int tc_cls_act_prologue(struct bpf_insn *insn_buf, u32 pkt_access_flags,
const struct bpf_prog *prog)
{
- return bpf_unclone_prologue(insn_buf, pkt_access_flags, prog,
- TC_ACT_SHOT);
+ struct bpf_insn *insn = insn_buf;
+ int cnt;
+
+ if (pkt_access_flags & PA_F_DATA_META_LOAD) {
+ /* Realign skb metadata for access through data_meta pointer.
+ *
+ * r6 = r1; // r6 will be "u64 *ctx"
+ * r0 = bpf_skb_meta_realign(r1); // r0 is undefined
+ * r1 = r6;
+ */
+ *insn++ = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
+ *insn++ = BPF_CALL_KFUNC(0, bpf_skb_meta_realign_ids[0]);
+ *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
+ }
+ cnt = bpf_unclone_prologue(insn, pkt_access_flags, prog, TC_ACT_SHOT);
+ if (!cnt && insn > insn_buf)
+ *insn++ = prog->insnsi[0];
+
+ return cnt + insn - insn_buf;
}
static bool tc_cls_act_is_valid_access(int off, int size,
--
2.43.0
Powered by blists - more mailing lists