[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240705103853.21235-1-florian.kauer@linutronix.de>
Date: Fri, 5 Jul 2024 12:38:53 +0200
From: Florian Kauer <florian.kauer@...utronix.de>
To: toke@...hat.com,
ast@...nel.org,
daniel@...earbox.net,
john.fastabend@...il.com
Cc: Florian Kauer <florian.kauer@...utronix.de>,
davem@...emloft.net,
kuba@...nel.org,
hawk@...nel.org,
edumazet@...gle.com,
pabeni@...hat.com,
andrii@...nel.org,
martin.lau@...ux.dev,
eddyz87@...il.com,
song@...nel.org,
yonghong.song@...ux.dev,
kpsingh@...nel.org,
sdf@...gle.com,
haoluo@...gle.com,
jolsa@...nel.org,
netdev@...r.kernel.org,
bpf@...r.kernel.org,
linux-kernel@...r.kernel.org,
xdp-newbies@...r.kernel.org
Subject: [PATCH] bpf: provide map key to BPF program after redirect
Both DEVMAP as well as CPUMAP provide the possibility
to attach BPF programs to their entries that will be
executed after a redirect was performed.
With BPF_F_BROADCAST it is in also possible to execute
BPF programs for multiple clones of the same XDP frame
which is, for example, useful for establishing redundant
traffic paths by setting, for example, different VLAN tags
for the replicated XDP frames.
Currently, this program itself has no information about
the map entry that led to its execution. While egress_ifindex
can be used to get this information indirectly and can
be used for path dependent processing of the replicated frames,
it does not work if multiple entries share the same egress_ifindex.
Therefore, extend the xdp_md struct with a map_key
that contains the key of the associated map entry
after performing a redirect.
See
https://lore.kernel.org/xdp-newbies/5eb6070c-a12e-4d4c-a9f0-a6a6fafa41d1@linutronix.de/T/#u
for the discussion that led to this patch.
Signed-off-by: Florian Kauer <florian.kauer@...utronix.de>
---
include/net/xdp.h | 3 +++
include/uapi/linux/bpf.h | 2 ++
kernel/bpf/devmap.c | 6 +++++-
net/core/filter.c | 18 ++++++++++++++++++
4 files changed, 28 insertions(+), 1 deletion(-)
diff --git a/include/net/xdp.h b/include/net/xdp.h
index e6770dd40c91..e70f4dfea1a2 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -86,6 +86,7 @@ struct xdp_buff {
struct xdp_txq_info *txq;
u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/
u32 flags; /* supported values defined in xdp_buff_flags */
+ u64 map_key; /* set during redirect via a map */
};
static __always_inline bool xdp_buff_has_frags(struct xdp_buff *xdp)
@@ -175,6 +176,7 @@ struct xdp_frame {
struct net_device *dev_rx; /* used by cpumap */
u32 frame_sz;
u32 flags; /* supported values defined in xdp_buff_flags */
+ u64 map_key; /* set during redirect via a map */
};
static __always_inline bool xdp_frame_has_frags(struct xdp_frame *frame)
@@ -257,6 +259,7 @@ void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp)
xdp->data_meta = frame->data - frame->metasize;
xdp->frame_sz = frame->frame_sz;
xdp->flags = frame->flags;
+ xdp->map_key = frame->map_key;
}
static inline
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 35bcf52dbc65..7dbb0f2a236c 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -6455,6 +6455,8 @@ struct xdp_md {
__u32 rx_queue_index; /* rxq->queue_index */
__u32 egress_ifindex; /* txq->dev->ifindex */
+
+ __u64 map_key; /* set during redirect via a map in xdp_buff */
};
/* DEVMAP map-value layout
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index da1fec906b96..fac3e8a6c51e 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -574,6 +574,8 @@ static int dev_map_enqueue_clone(struct bpf_dtab_netdev *obj,
if (!nxdpf)
return -ENOMEM;
+ nxdpf->map_key = obj->idx;
+
bq_enqueue(obj->dev, nxdpf, dev_rx, obj->xdp_prog);
return 0;
@@ -670,8 +672,10 @@ int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx,
}
/* consume the last copy of the frame */
- if (last_dst)
+ if (last_dst) {
+ xdpf->map_key = last_dst->idx;
bq_enqueue(last_dst->dev, xdpf, dev_rx, last_dst->xdp_prog);
+ }
else
xdp_return_frame_rx_napi(xdpf); /* dtab is empty */
diff --git a/net/core/filter.c b/net/core/filter.c
index f1c37c85b858..7762a6d6900f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4394,10 +4394,12 @@ static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri,
err = dev_map_enqueue_multi(xdpf, dev, map,
flags & BPF_F_EXCLUDE_INGRESS);
} else {
+ xdpf->map_key = ri->tgt_index;
err = dev_map_enqueue(fwd, xdpf, dev);
}
break;
case BPF_MAP_TYPE_CPUMAP:
+ xdpf->map_key = ri->tgt_index;
err = cpu_map_enqueue(fwd, xdpf, dev);
break;
case BPF_MAP_TYPE_UNSPEC:
@@ -4407,6 +4409,7 @@ static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri,
err = -EINVAL;
break;
}
+ xdpf->map_key = ri->tgt_index;
err = dev_xdp_enqueue(fwd, xdpf, dev);
break;
}
@@ -9022,6 +9025,16 @@ static bool xdp_is_valid_access(int off, int size,
case offsetof(struct xdp_md, data_end):
info->reg_type = PTR_TO_PACKET_END;
break;
+ case offsetof(struct xdp_md, map_key):
+ if (prog->expected_attach_type != BPF_XDP_DEVMAP &&
+ prog->expected_attach_type != BPF_XDP_CPUMAP) {
+ return false;
+ }
+
+ if (size != sizeof(__u64))
+ return false;
+
+ return true;
}
return __is_valid_xdp_access(off, size);
@@ -10116,6 +10129,11 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
offsetof(struct net_device, ifindex));
break;
+ case offsetof(struct xdp_md, map_key):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, map_key),
+ si->dst_reg, si->src_reg,
+ offsetof(struct xdp_buff, map_key));
+ break;
}
return insn - insn_buf;
--
2.39.2
Powered by blists - more mailing lists