[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220713111430.134810-18-toke@redhat.com>
Date: Wed, 13 Jul 2022 13:14:25 +0200
From: Toke Høiland-Jørgensen <toke@...hat.com>
To: Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
"David S. Miller" <davem@...emloft.net>,
Jakub Kicinski <kuba@...nel.org>,
Jesper Dangaard Brouer <hawk@...nel.org>,
John Fastabend <john.fastabend@...il.com>,
Andrii Nakryiko <andrii@...nel.org>,
Martin KaFai Lau <martin.lau@...ux.dev>,
Song Liu <song@...nel.org>, Yonghong Song <yhs@...com>,
KP Singh <kpsingh@...nel.org>,
Stanislav Fomichev <sdf@...gle.com>,
Hao Luo <haoluo@...gle.com>, Jiri Olsa <jolsa@...nel.org>
Cc: Kumar Kartikeya Dwivedi <memxor@...il.com>, netdev@...r.kernel.org,
bpf@...r.kernel.org,
Freysteinn Alfredsson <freysteinn.alfredsson@....se>,
Cong Wang <xiyou.wangcong@...il.com>,
Toke Høiland-Jørgensen <toke@...hat.com>
Subject: [RFC PATCH 17/17] samples/bpf: Add queueing support to xdp_fwd sample
Add support for queueing packets before forwarding them to the xdp_fwd
sample. This is meant to serve as an example (for the RFC series) of how
one could add queueing to a forwarding application. It doesn't actually
implement any fancy queueing algorithms, it just uses the queue maps to do
simple FIFO queueing, instantiating one queue map per interface.
Signed-off-by: Toke Høiland-Jørgensen <toke@...hat.com>
---
samples/bpf/xdp_fwd_kern.c | 65 +++++++++++-
samples/bpf/xdp_fwd_user.c | 200 +++++++++++++++++++++++++++----------
2 files changed, 205 insertions(+), 60 deletions(-)
diff --git a/samples/bpf/xdp_fwd_kern.c b/samples/bpf/xdp_fwd_kern.c
index 54c099cbd639..125adb02c658 100644
--- a/samples/bpf/xdp_fwd_kern.c
+++ b/samples/bpf/xdp_fwd_kern.c
@@ -23,6 +23,14 @@
#define IPV6_FLOWINFO_MASK cpu_to_be32(0x0FFFFFFF)
+struct pifo_map {
+ __uint(type, BPF_MAP_TYPE_PIFO_XDP);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+ __uint(max_entries, 1024);
+ __uint(map_extra, 8192); /* range */
+} pmap SEC(".maps");
+
struct {
__uint(type, BPF_MAP_TYPE_DEVMAP);
__uint(key_size, sizeof(int));
@@ -30,6 +38,13 @@ struct {
__uint(max_entries, 64);
} xdp_tx_ports SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(key_size, sizeof(__u32));
+ __uint(max_entries, 64);
+ __array(values, struct pifo_map);
+} pifo_maps SEC(".maps");
+
/* from include/net/ip.h */
static __always_inline int ip_decrease_ttl(struct iphdr *iph)
{
@@ -40,7 +55,7 @@ static __always_inline int ip_decrease_ttl(struct iphdr *iph)
return --iph->ttl;
}
-static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags)
+static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags, bool queue)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
@@ -137,22 +152,62 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags)
memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN);
memcpy(eth->h_source, fib_params.smac, ETH_ALEN);
+
+ if (queue) {
+ void *ptr;
+ int ret;
+
+ ptr = bpf_map_lookup_elem(&pifo_maps, &fib_params.ifindex);
+ if (!ptr)
+ return XDP_DROP;
+
+ ret = bpf_redirect_map(ptr, 0, 0);
+ if (ret == XDP_REDIRECT)
+ bpf_schedule_iface_dequeue(ctx, fib_params.ifindex, 0);
+ return ret;
+ }
+
return bpf_redirect_map(&xdp_tx_ports, fib_params.ifindex, 0);
}
return XDP_PASS;
}
-SEC("xdp_fwd")
+SEC("xdp")
int xdp_fwd_prog(struct xdp_md *ctx)
{
- return xdp_fwd_flags(ctx, 0);
+ return xdp_fwd_flags(ctx, 0, false);
}
-SEC("xdp_fwd_direct")
+SEC("xdp")
int xdp_fwd_direct_prog(struct xdp_md *ctx)
{
- return xdp_fwd_flags(ctx, BPF_FIB_LOOKUP_DIRECT);
+ return xdp_fwd_flags(ctx, BPF_FIB_LOOKUP_DIRECT, false);
+}
+
+SEC("xdp")
+int xdp_fwd_queue(struct xdp_md *ctx)
+{
+ return xdp_fwd_flags(ctx, 0, true);
+}
+
+SEC("dequeue")
+void *xdp_dequeue(struct dequeue_ctx *ctx)
+{
+ __u32 ifindex = ctx->egress_ifindex;
+ struct xdp_md *pkt;
+ __u64 prio = 0;
+ void *pifo_ptr;
+
+ pifo_ptr = bpf_map_lookup_elem(&pifo_maps, &ifindex);
+ if (!pifo_ptr)
+ return NULL;
+
+ pkt = (void *)bpf_packet_dequeue(ctx, pifo_ptr, 0, &prio);
+ if (!pkt)
+ return NULL;
+
+ return pkt;
}
char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c
index 84f57f1209ce..ec3f29d0babe 100644
--- a/samples/bpf/xdp_fwd_user.c
+++ b/samples/bpf/xdp_fwd_user.c
@@ -11,6 +11,7 @@
* General Public License for more details.
*/
+#include "linux/if_link.h"
#include <linux/bpf.h>
#include <linux/if_link.h>
#include <linux/limits.h>
@@ -29,66 +30,122 @@
static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-static int do_attach(int idx, int prog_fd, int map_fd, const char *name)
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+const char *redir_prog_names[] = {
+ "xdp_fwd_prog",
+ "xdp_fwd_direct_", /* name truncated to BPF_OBJ_NAME_LEN */
+ "xdp_fwd_queue",
+};
+
+const char *dequeue_prog_names[] = {
+ "xdp_dequeue"
+};
+
+static int do_attach(int idx, int redir_prog_fd, int dequeue_prog_fd,
+ int redir_map_fd, int pifos_map_fd, const char *name)
{
int err;
- err = bpf_xdp_attach(idx, prog_fd, xdp_flags, NULL);
+ if (pifos_map_fd > -1) {
+ LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_extra = 8192);
+ char map_name[BPF_OBJ_NAME_LEN];
+ int pifo_fd;
+
+ snprintf(map_name, sizeof(map_name), "pifo_%d", idx);
+ map_name[BPF_OBJ_NAME_LEN - 1] = '\0';
+
+ pifo_fd = bpf_map_create(BPF_MAP_TYPE_PIFO_XDP, map_name,
+ sizeof(__u32), sizeof(__u32), 10240, &map_opts);
+ if (pifo_fd < 0) {
+ err = -errno;
+ printf("ERROR: Couldn't create PIFO map: %s\n", strerror(-err));
+ return err;
+ }
+
+ err = bpf_map_update_elem(pifos_map_fd, &idx, &pifo_fd, 0);
+ if (err)
+ printf("ERROR: failed adding PIFO map for device %s\n", name);
+ }
+
+ if (dequeue_prog_fd > -1) {
+ LIBBPF_OPTS(bpf_xdp_attach_opts, prog_opts, .old_prog_fd = -1);
+
+ err = bpf_xdp_attach(idx, dequeue_prog_fd,
+ (XDP_FLAGS_DEQUEUE_MODE | XDP_FLAGS_REPLACE),
+ &prog_opts);
+ if (err < 0) {
+ printf("ERROR: failed to attach dequeue program to %s\n", name);
+ return err;
+ }
+ }
+
+ err = bpf_xdp_attach(idx, redir_prog_fd, xdp_flags, NULL);
if (err < 0) {
- printf("ERROR: failed to attach program to %s\n", name);
+ printf("ERROR: failed to attach redir program to %s\n", name);
return err;
}
/* Adding ifindex as a possible egress TX port */
- err = bpf_map_update_elem(map_fd, &idx, &idx, 0);
+ err = bpf_map_update_elem(redir_map_fd, &idx, &idx, 0);
if (err)
printf("ERROR: failed using device %s as TX-port\n", name);
return err;
}
+static bool should_detach(__u32 prog_fd, const char **prog_names, int num_prog_names)
+{
+ struct bpf_prog_info prog_info = {};
+ __u32 info_len = sizeof(prog_info);
+ int err, i;
+
+ err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len);
+ if (err) {
+ printf("ERROR: bpf_obj_get_info_by_fd failed (%s)\n",
+ strerror(errno));
+ return false;
+ }
+
+ for (i = 0; i < num_prog_names; i++)
+ if (!strcmp(prog_info.name, prog_names[i]))
+ return true;
+
+ return false;
+}
+
static int do_detach(int ifindex, const char *ifname, const char *app_name)
{
LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
- struct bpf_prog_info prog_info = {};
- char prog_name[BPF_OBJ_NAME_LEN];
- __u32 info_len, curr_prog_id;
- int prog_fd;
- int err = 1;
+ LIBBPF_OPTS(bpf_xdp_query_opts, query_opts);
+ int prog_fd, err = 1;
+ __u32 curr_prog_id;
- if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) {
+ if (bpf_xdp_query(ifindex, xdp_flags, &query_opts)) {
printf("ERROR: bpf_xdp_query_id failed (%s)\n",
strerror(errno));
return err;
}
+ curr_prog_id = (xdp_flags & XDP_FLAGS_SKB_MODE) ? query_opts.skb_prog_id
+ : query_opts.drv_prog_id;
if (!curr_prog_id) {
printf("ERROR: flags(0x%x) xdp prog is not attached to %s\n",
xdp_flags, ifname);
return err;
}
- info_len = sizeof(prog_info);
prog_fd = bpf_prog_get_fd_by_id(curr_prog_id);
if (prog_fd < 0) {
printf("ERROR: bpf_prog_get_fd_by_id failed (%s)\n",
strerror(errno));
- return prog_fd;
- }
-
- err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len);
- if (err) {
- printf("ERROR: bpf_obj_get_info_by_fd failed (%s)\n",
- strerror(errno));
- goto close_out;
+ return err;
}
- snprintf(prog_name, sizeof(prog_name), "%s_prog", app_name);
- prog_name[BPF_OBJ_NAME_LEN - 1] = '\0';
- if (strcmp(prog_info.name, prog_name)) {
+ if (!should_detach(prog_fd, redir_prog_names, ARRAY_SIZE(redir_prog_names))) {
printf("ERROR: %s isn't attached to %s\n", app_name, ifname);
- err = 1;
- goto close_out;
+ close(prog_fd);
+ return 1;
}
opts.old_prog_fd = prog_fd;
@@ -96,11 +153,34 @@ static int do_detach(int ifindex, const char *ifname, const char *app_name)
if (err < 0)
printf("ERROR: failed to detach program from %s (%s)\n",
ifname, strerror(errno));
- /* TODO: Remember to cleanup map, when adding use of shared map
+
+ close(prog_fd);
+
+ if (query_opts.dequeue_prog_id) {
+ prog_fd = bpf_prog_get_fd_by_id(query_opts.dequeue_prog_id);
+ if (prog_fd < 0) {
+ printf("ERROR: bpf_prog_get_fd_by_id failed (%s)\n",
+ strerror(errno));
+ return err;
+ }
+
+ if (!should_detach(prog_fd, dequeue_prog_names, ARRAY_SIZE(dequeue_prog_names))) {
+ close(prog_fd);
+ return err;
+ }
+
+ opts.old_prog_fd = prog_fd;
+ err = bpf_xdp_detach(ifindex,
+ (XDP_FLAGS_DEQUEUE_MODE | XDP_FLAGS_REPLACE),
+ &opts);
+ if (err < 0)
+ printf("ERROR: failed to detach dequeue program from %s (%s)\n",
+ ifname, strerror(errno));
+ }
+
+ /* todo: Remember to cleanup map, when adding use of shared map
* bpf_map_delete_elem((map_fd, &idx);
*/
-close_out:
- close(prog_fd);
return err;
}
@@ -112,24 +192,23 @@ static void usage(const char *prog)
" -d detach program\n"
" -S use skb-mode\n"
" -F force loading prog\n"
- " -D direct table lookups (skip fib rules)\n",
+ " -D direct table lookups (skip fib rules)\n"
+ " -Q direct table lookups (skip fib rules)\n",
prog);
}
int main(int argc, char **argv)
{
- const char *prog_name = "xdp_fwd";
- struct bpf_program *prog = NULL;
- struct bpf_program *pos;
- const char *sec_name;
- int prog_fd = -1, map_fd = -1;
+ int redir_prog_fd = -1, dequeue_prog_fd = -1, redir_map_fd = -1, pifos_map_fd = -1;
+ const char *prog_name = "xdp_fwd_prog";
char filename[PATH_MAX];
struct bpf_object *obj;
int opt, i, idx, err;
+ bool queue = false;
int attach = 1;
int ret = 0;
- while ((opt = getopt(argc, argv, ":dDSF")) != -1) {
+ while ((opt = getopt(argc, argv, ":dDQSF")) != -1) {
switch (opt) {
case 'd':
attach = 0;
@@ -141,7 +220,11 @@ int main(int argc, char **argv)
xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
break;
case 'D':
- prog_name = "xdp_fwd_direct";
+ prog_name = "xdp_fwd_direct_prog";
+ break;
+ case 'Q':
+ prog_name = "xdp_fwd_queue";
+ queue = true;
break;
default:
usage(basename(argv[0]));
@@ -170,9 +253,6 @@ int main(int argc, char **argv)
if (libbpf_get_error(obj))
return 1;
- prog = bpf_object__next_program(obj, NULL);
- bpf_program__set_type(prog, BPF_PROG_TYPE_XDP);
-
err = bpf_object__load(obj);
if (err) {
printf("Does kernel support devmap lookup?\n");
@@ -181,25 +261,34 @@ int main(int argc, char **argv)
*/
return 1;
}
-
- bpf_object__for_each_program(pos, obj) {
- sec_name = bpf_program__section_name(pos);
- if (sec_name && !strcmp(sec_name, prog_name)) {
- prog = pos;
- break;
- }
- }
- prog_fd = bpf_program__fd(prog);
- if (prog_fd < 0) {
- printf("program not found: %s\n", strerror(prog_fd));
+ redir_prog_fd = bpf_program__fd(bpf_object__find_program_by_name(obj,
+ prog_name));
+ if (redir_prog_fd < 0) {
+ printf("program not found: %s\n", strerror(redir_prog_fd));
return 1;
}
- map_fd = bpf_map__fd(bpf_object__find_map_by_name(obj,
- "xdp_tx_ports"));
- if (map_fd < 0) {
- printf("map not found: %s\n", strerror(map_fd));
+
+ redir_map_fd = bpf_map__fd(bpf_object__find_map_by_name(obj,
+ "xdp_tx_ports"));
+ if (redir_map_fd < 0) {
+ printf("map not found: %s\n", strerror(redir_map_fd));
return 1;
}
+
+ if (queue) {
+ dequeue_prog_fd = bpf_program__fd(bpf_object__find_program_by_name(obj,
+ "xdp_dequeue"));
+ if (dequeue_prog_fd < 0) {
+ printf("dequeue program not found: %s\n",
+ strerror(-dequeue_prog_fd));
+ return 1;
+ }
+ pifos_map_fd = bpf_map__fd(bpf_object__find_map_by_name(obj, "pifo_maps"));
+ if (pifos_map_fd < 0) {
+ printf("map not found: %s\n", strerror(-pifos_map_fd));
+ return 1;
+ }
+ }
}
for (i = optind; i < argc; ++i) {
@@ -212,11 +301,12 @@ int main(int argc, char **argv)
return 1;
}
if (!attach) {
- err = do_detach(idx, argv[i], prog_name);
+ err = do_detach(idx, argv[i], argv[0]);
if (err)
ret = err;
} else {
- err = do_attach(idx, prog_fd, map_fd, argv[i]);
+ err = do_attach(idx, redir_prog_fd, dequeue_prog_fd,
+ redir_map_fd, pifos_map_fd, argv[i]);
if (err)
ret = err;
}
--
2.37.0
Powered by blists - more mailing lists