[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220713111430.134810-7-toke@redhat.com>
Date: Wed, 13 Jul 2022 13:14:14 +0200
From: Toke Høiland-Jørgensen <toke@...hat.com>
To: Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
John Fastabend <john.fastabend@...il.com>,
Andrii Nakryiko <andrii@...nel.org>,
Martin KaFai Lau <martin.lau@...ux.dev>,
Song Liu <song@...nel.org>, Yonghong Song <yhs@...com>,
KP Singh <kpsingh@...nel.org>,
Stanislav Fomichev <sdf@...gle.com>,
Hao Luo <haoluo@...gle.com>, Jiri Olsa <jolsa@...nel.org>,
"David S. Miller" <davem@...emloft.net>,
Jakub Kicinski <kuba@...nel.org>,
Jesper Dangaard Brouer <hawk@...nel.org>,
Eric Dumazet <edumazet@...gle.com>,
Paolo Abeni <pabeni@...hat.com>
Cc: Kumar Kartikeya Dwivedi <memxor@...il.com>, netdev@...r.kernel.org,
bpf@...r.kernel.org,
Freysteinn Alfredsson <freysteinn.alfredsson@....se>,
Cong Wang <xiyou.wangcong@...il.com>,
Toke Høiland-Jørgensen <toke@...hat.com>
Subject: [RFC PATCH 06/17] xdp: Add dequeue program type for getting packets from a PIFO
Add a new BPF_PROG_TYPE_DEQUEUE, which will be executed by a new device
hook to retrieve queued packets for transmission. The API of the dequeue
program is simple: it takes a context object containing as its sole member
the ifindex of the device it is being executed on. The program can return a
pointer to a packet, or NULL to indicate it has nothing to transmit at this
time. Packet pointers are obtained by dequeueing them from a PIFO
map (using a helper added in a subsequent commit).
This commit adds dequeue program type and the ability to run it using the
bpf_prog_run() syscall (returning the dequeued packet to userspace); a
subsequent commit introduces the network stack hook to attach and execute
dequeue programs.
Signed-off-by: Toke Høiland-Jørgensen <toke@...hat.com>
---
include/linux/bpf.h | 9 ++++++
include/linux/bpf_types.h | 2 ++
include/net/xdp.h | 4 +++
include/uapi/linux/bpf.h | 5 ++++
kernel/bpf/syscall.c | 1 +
net/bpf/test_run.c | 33 +++++++++++++++++++++
net/core/filter.c | 53 ++++++++++++++++++++++++++++++++++
tools/include/uapi/linux/bpf.h | 5 ++++
8 files changed, 112 insertions(+)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index ea994acebb81..6ea5d6d188cf 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1864,6 +1864,8 @@ int array_map_alloc_check(union bpf_attr *attr);
int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr);
+int bpf_prog_test_run_dequeue(struct bpf_prog *prog, const union bpf_attr *kattr,
+ union bpf_attr __user *uattr);
int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr);
int bpf_prog_test_run_tracing(struct bpf_prog *prog,
@@ -2107,6 +2109,13 @@ static inline int bpf_prog_test_run_xdp(struct bpf_prog *prog,
return -ENOTSUPP;
}
+static inline int bpf_prog_test_run_dequeue(struct bpf_prog *prog,
+ const union bpf_attr *kattr,
+ union bpf_attr __user *uattr)
+{
+ return -ENOTSUPP;
+}
+
static inline int bpf_prog_test_run_skb(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr)
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 26ef981a8aa5..e6bc962befb7 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -10,6 +10,8 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act,
struct __sk_buff, struct sk_buff)
BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp,
struct xdp_md, struct xdp_buff)
+BPF_PROG_TYPE(BPF_PROG_TYPE_DEQUEUE, dequeue,
+ struct dequeue_ctx, struct dequeue_data)
#ifdef CONFIG_CGROUP_BPF
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb,
struct __sk_buff, struct sk_buff)
diff --git a/include/net/xdp.h b/include/net/xdp.h
index 7c694fb26f34..728ce943d352 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -85,6 +85,10 @@ struct xdp_buff {
u32 flags; /* supported values defined in xdp_buff_flags */
};
+struct dequeue_data {
+ struct xdp_txq_info *txq;
+};
+
static __always_inline bool xdp_buff_has_frags(struct xdp_buff *xdp)
{
return !!(xdp->flags & XDP_FLAGS_HAS_FRAGS);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f0947ddee784..974fb5882305 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -954,6 +954,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_LSM,
BPF_PROG_TYPE_SK_LOOKUP,
BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
+ BPF_PROG_TYPE_DEQUEUE,
};
enum bpf_attach_type {
@@ -5961,6 +5962,10 @@ struct xdp_md {
__u32 egress_ifindex; /* txq->dev->ifindex */
};
+struct dequeue_ctx {
+ __u32 egress_ifindex;
+};
+
/* DEVMAP map-value layout
*
* The struct data-layout of map-value is a configuration interface.
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 31899882e513..c4af9119b68a 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2370,6 +2370,7 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
default:
return -EINVAL;
}
+ case BPF_PROG_TYPE_DEQUEUE:
case BPF_PROG_TYPE_SYSCALL:
case BPF_PROG_TYPE_EXT:
if (expected_attach_type)
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index f05d13717430..a7f479a19fe0 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -1390,6 +1390,39 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
return ret;
}
+int bpf_prog_test_run_dequeue(struct bpf_prog *prog, const union bpf_attr *kattr,
+ union bpf_attr __user *uattr)
+{
+ struct xdp_txq_info txq = { .dev = current->nsproxy->net_ns->loopback_dev };
+ u32 repeat = kattr->test.repeat, duration, size;
+ struct dequeue_data ctx = { .txq = &txq };
+ struct xdp_buff xdp = {};
+ struct xdp_frame *pkt;
+ int ret = -EINVAL;
+ u64 retval;
+
+ if (prog->expected_attach_type)
+ return -EINVAL;
+
+ if (kattr->test.data_in || kattr->test.data_size_in ||
+ kattr->test.ctx_in || kattr->test.ctx_out || repeat > 1)
+ return -EINVAL;
+
+ ret = bpf_test_run(prog, &ctx, repeat, &retval, &duration, false);
+ if (ret)
+ return ret;
+ if (!retval)
+ return bpf_test_finish(kattr, uattr, NULL, NULL, 0, retval, duration);
+
+ pkt = (void *)(unsigned long)retval;
+ xdp_convert_frame_to_buff(pkt, &xdp);
+ size = xdp.data_end - xdp.data_meta;
+ /* We set retval == 1 if pkt != NULL, otherwise 0 */
+ ret = bpf_test_finish(kattr, uattr, xdp.data_meta, NULL, size, !!retval, duration);
+ xdp_return_frame(pkt);
+ return ret;
+}
+
static int verify_user_bpf_flow_keys(struct bpf_flow_keys *ctx)
{
/* make sure the fields we don't use are zeroed */
diff --git a/net/core/filter.c b/net/core/filter.c
index 8e6ea17a29db..30bd3a6aedab 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -8062,6 +8062,12 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
}
}
+static const struct bpf_func_proto *
+dequeue_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ return bpf_base_func_proto(func_id);
+}
+
const struct bpf_func_proto bpf_sock_map_update_proto __weak;
const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
@@ -8776,6 +8782,20 @@ void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog,
}
EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
+static bool dequeue_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ if (type == BPF_WRITE)
+ return false;
+ switch (off) {
+ case offsetof(struct dequeue_ctx, egress_ifindex):
+ return true;
+ }
+ return false;
+}
+
static bool sock_addr_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
@@ -9835,6 +9855,28 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
return insn - insn_buf;
}
+static u32 dequeue_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog, u32 *target_size)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ switch (si->off) {
+ case offsetof(struct dequeue_ctx, egress_ifindex):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct dequeue_data, txq),
+ si->dst_reg, si->src_reg,
+ offsetof(struct dequeue_data, txq));
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_txq_info, dev),
+ si->dst_reg, si->dst_reg,
+ offsetof(struct xdp_txq_info, dev));
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+ offsetof(struct net_device, ifindex));
+ break;
+ }
+ return insn - insn_buf;
+}
+
/* SOCK_ADDR_LOAD_NESTED_FIELD() loads Nested Field S.F.NF where S is type of
* context Structure, F is Field in context structure that contains a pointer
* to Nested Structure of type NS that has the field NF.
@@ -10687,6 +10729,17 @@ const struct bpf_prog_ops xdp_prog_ops = {
.test_run = bpf_prog_test_run_xdp,
};
+const struct bpf_verifier_ops dequeue_verifier_ops = {
+ .get_func_proto = dequeue_func_proto,
+ .is_valid_access = dequeue_is_valid_access,
+ .convert_ctx_access = dequeue_convert_ctx_access,
+ .gen_prologue = bpf_noop_prologue,
+};
+
+const struct bpf_prog_ops dequeue_prog_ops = {
+ .test_run = bpf_prog_test_run_dequeue,
+};
+
const struct bpf_verifier_ops cg_skb_verifier_ops = {
.get_func_proto = cg_skb_func_proto,
.is_valid_access = cg_skb_is_valid_access,
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 623421377f6e..4dd8a563f85d 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -954,6 +954,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_LSM,
BPF_PROG_TYPE_SK_LOOKUP,
BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
+ BPF_PROG_TYPE_DEQUEUE,
};
enum bpf_attach_type {
@@ -5961,6 +5962,10 @@ struct xdp_md {
__u32 egress_ifindex; /* txq->dev->ifindex */
};
+struct dequeue_ctx {
+ __u32 egress_ifindex;
+};
+
/* DEVMAP map-value layout
*
* The struct data-layout of map-value is a configuration interface.
--
2.37.0
Powered by blists - more mailing lists