netdev - Re: [1/2 bpf-next] bpf: expose net_device from xdp for metadata

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <0697cf41-eaa0-0181-b5c0-7691cb316733@meta.com>
Date:   Wed, 9 Nov 2022 17:37:39 -0800
From:   Yonghong Song <yhs@...a.com>
To:     John Fastabend <john.fastabend@...il.com>, hawk@...nel.org,
        daniel@...earbox.net, kuba@...nel.org, davem@...emloft.net,
        ast@...nel.org
Cc:     netdev@...r.kernel.org, bpf@...r.kernel.org, sdf@...gle.com
Subject: Re: [1/2 bpf-next] bpf: expose net_device from xdp for metadata



On 11/9/22 1:52 PM, John Fastabend wrote:
> Allow xdp progs to read the net_device structure. Its useful to extract
> info from the dev itself. Currently, our tracing tooling uses kprobes
> to capture statistics and information about running net devices. We use
> kprobes instead of other hooks tc/xdp because we need to collect
> information about the interface not exposed through the xdp_md structures.
> This has some down sides that we want to avoid by moving these into the
> XDP hook itself. First, placing the kprobes in a generic function in
> the kernel is after XDP so we miss redirects and such done by the
> XDP networking program. And its needless overhead because we are
> already paying the cost for calling the XDP program, calling yet
> another prog is a waste. Better to do everything in one hook from
> performance side.
> 
> Of course we could one-off each one of these fields, but that would
> explode the xdp_md struct and then require writing convert_ctx_access
> writers for each field. By using BTF we avoid writing field specific
> convertion logic, BTF just knows how to read the fields, we don't
> have to add many fields to xdp_md, and I don't have to get every
> field we will use in the future correct.
> 
> For reference current examples in our code base use the ifindex,
> ifname, qdisc stats, net_ns fields, among others. With this
> patch we can now do the following,
> 
>          dev = ctx->rx_dev;
>          net = dev->nd_net.net;
> 
> 	uid.ifindex = dev->ifindex;
> 	memcpy(uid.ifname, dev->ifname, NAME);
>          if (net)
> 		uid.inum = net->ns.inum;
> 
> to report the name, index and ns.inum which identifies an
> interface in our system.

In
https://lore.kernel.org/bpf/ad15b398-9069-4a0e-48cb-4bb651ec3088@meta.com/
Namhyung Kim wanted to access new perf data with a helper.
I proposed a helper bpf_get_kern_ctx() which will get
the kernel ctx struct from which the actual perf data
can be retrieved. The interface looks like
	void *bpf_get_kern_ctx(void *)
the input parameter needs to be a PTR_TO_CTX and
the verifer is able to return the corresponding kernel
ctx struct based on program type.

The following is really hacked demonstration with
some of change coming from my bpf_rcu_read_lock()
patch set https://lore.kernel.org/bpf/20221109211944.3213817-1-yhs@fb.com/

I modified your test to utilize the
bpf_get_kern_ctx() helper in your test_xdp_md.c.

With this single helper, we can cover the above perf
data use case and your use case and maybe others
to avoid new UAPI changes.


diff --git a/include/linux/bpf.h b/include/linux/bpf.h 
 

index 798aec816970..da5e3f79f8a4 100644 
 

--- a/include/linux/bpf.h 
 

+++ b/include/linux/bpf.h 
 

@@ -2113,6 +2113,7 @@ bool bpf_prog_has_kfunc_call(const struct bpf_prog 
*prog); 

  const struct btf_func_model * 
 

  bpf_jit_find_kfunc_model(const struct bpf_prog *prog, 
 

                          const struct bpf_insn *insn); 
 

+void *bpf_get_kern_ctx(void *); 
 

  struct bpf_core_ctx { 
 

         struct bpf_verifier_log *log; 
 

         const struct btf *btf; 
 

diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c 
 

index 5579ff3a5b54..cd2832e73de3 100644 
 

--- a/kernel/bpf/btf.c 
 

+++ b/kernel/bpf/btf.c 
 

@@ -199,6 +199,7 @@ DEFINE_IDR(btf_idr); 
 

  DEFINE_SPINLOCK(btf_idr_lock); 
 


  enum btf_kfunc_hook {
+       BTF_KFUNC_HOOK_GENERIC,
         BTF_KFUNC_HOOK_XDP,
         BTF_KFUNC_HOOK_TC,
         BTF_KFUNC_HOOK_STRUCT_OPS,
@@ -6428,6 +6429,10 @@ static int btf_check_func_arg_match(struct 
bpf_verifier_env *env,
                         return -EINVAL;
                 }

+               /* HACK: pointer to void, skip the rest of processing */
+               if (t->type == 0)
+                       continue;
+
                 ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
                 ref_tname = btf_name_by_offset(btf, ref_t->name_off);

@@ -7499,6 +7504,8 @@ static u32 *__btf_kfunc_id_set_contains(const 
struct btf *btf,
  static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
  {
         switch (prog_type) {
+       case BPF_PROG_TYPE_UNSPEC:
+               return BTF_KFUNC_HOOK_GENERIC;
         case BPF_PROG_TYPE_XDP:
                 return BTF_KFUNC_HOOK_XDP;
         case BPF_PROG_TYPE_SCHED_CLS:
@@ -7527,6 +7534,11 @@ u32 *btf_kfunc_id_set_contains(const struct btf *btf,
                                u32 kfunc_btf_id)
  {
         enum btf_kfunc_hook hook;
+       u32 *kfunc_flags;
+
+       kfunc_flags = __btf_kfunc_id_set_contains(btf, 
BTF_KFUNC_HOOK_GENERIC, kfunc_btf_id);
+       if (kfunc_flags)
+               return kfunc_flags;

         hook = bpf_prog_type_to_kfunc_hook(prog_type);
         return __btf_kfunc_id_set_contains(btf, hook, kfunc_btf_id);
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 283f55bbeb70..b18e3464acc3 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1717,9 +1717,25 @@ static const struct btf_kfunc_id_set 
tracing_kfunc_set = {
         .set   = &tracing_btf_ids,
  };

+void *bpf_get_kern_ctx(void *ctx) {
+       return ctx;
+}
+
+BTF_SET8_START(generic_btf_ids)
+BTF_ID_FLAGS(func, bpf_get_kern_ctx)
+BTF_SET8_END(generic_btf_ids)
+
+static const struct btf_kfunc_id_set generic_kfunc_set = {
+       .owner = THIS_MODULE,
+       .set   = &generic_btf_ids,
+};
+
  static int __init kfunc_init(void)
  {
-       return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, 
&tracing_kfunc_set);
+       int ret;
+
+       ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, 
&tracing_kfunc_set);
+       return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, 
&generic_kfunc_set);
  }

  late_initcall(kfunc_init);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index d3b75aa0c54d..d8303abdb377 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7784,6 +7784,16 @@ static void mark_btf_func_reg_size(struct 
bpf_verifier_env *env, u32 regno,
         }
  }

+BTF_ID_LIST_SINGLE(bpf_get_kern_ctx_id, func, bpf_get_kern_ctx)
+BTF_ID_LIST_SINGLE(xdp_buff_btf_ids, struct, xdp_buff)
+
+static int get_kctx_btf_id(enum bpf_prog_type prog_type) {
+       if (prog_type == BPF_PROG_TYPE_XDP)
+               return *xdp_buff_btf_ids;
+       /* other program types */
+       return -EINVAL;
+}
+
  static int check_kfunc_call(struct bpf_verifier_env *env, struct 
bpf_insn *insn,
                             int *insn_idx_p)
  {
@@ -7853,7 +7863,20 @@ static int check_kfunc_call(struct 
bpf_verifier_env *env, struct bpf_insn *insn,
                 return -EINVAL;
         }

-       if (btf_type_is_scalar(t)) {
+       if (func_id == *bpf_get_kern_ctx_id) {
+               /* HACK: bpf_get_kern_ctx() kfunc */
+               int btf_id = get_kctx_btf_id(resolve_prog_type(env->prog));
+
+               if (btf_id > 0) {
+                       mark_reg_known_zero(env, regs, BPF_REG_0);
+                       regs[BPF_REG_0].btf = desc_btf;
+                       regs[BPF_REG_0].type = PTR_TO_BTF_ID;
+                       regs[BPF_REG_0].btf_id = btf_id;
+               } else {
+                       verbose(env, "Cannot get kctx\n");
+                       return -EINVAL;
+               }
+       } else if (btf_type_is_scalar(t)) {
                 mark_reg_unknown(env, regs, BPF_REG_0);
                 mark_btf_func_reg_size(env, BPF_REG_0, t->size);
         } else if (btf_type_is_ptr(t)) {
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_md.c 
b/tools/testing/selftests/bpf/prog_tests/xdp_md.c
new file mode 100644
index 000000000000..facf3f3ab86f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_md.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "test_xdp_md.skel.h"
+
+void test_xdp_md(void)
+{
+       struct test_xdp_md *skel;
+       int err, prog_fd;
+       char buf[128];
+
+       LIBBPF_OPTS(bpf_test_run_opts, topts,
+               .data_in = &pkt_v4,
+               .data_size_in = sizeof(pkt_v4),
+               .data_out = buf,
+               .data_size_out = sizeof(buf),
+               .repeat = 1,
+       );
+
+       skel = test_xdp_md__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "skel_open"))
+               return;
+
+       prog_fd = bpf_program__fd(skel->progs.md_xdp);
+       err = bpf_prog_test_run_opts(prog_fd, &topts);
+       ASSERT_OK(err, "test_run");
+       ASSERT_EQ(topts.retval, XDP_PASS, "xdp_md test_run retval");
+
+       ASSERT_EQ(skel->bss->ifindex, 1, "xdp_md ifindex");
+       ASSERT_EQ(skel->bss->ifindex, skel->bss->ingress_ifindex, 
"xdp_md ingress_ifindex");
+       ASSERT_STREQ(skel->bss->name, "lo", "xdp_md name");
+       ASSERT_NEQ(skel->bss->inum, 0, "xdp_md inum");
+
+       test_xdp_md__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_md.c 
b/tools/testing/selftests/bpf/progs/test_xdp_md.c
new file mode 100644
index 000000000000..c6a7686ec9c6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_md.c
@@ -0,0 +1,28 @@
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <string.h>
+
+#define        IFNAMSIZ 16
+
+int ifindex, ingress_ifindex;
+char name[IFNAMSIZ];
+unsigned int inum;
+
+extern void *bpf_get_kern_ctx(void *) __ksym;
+
+SEC("xdp")
+int md_xdp(struct xdp_md *ctx)
+{
+       struct xdp_buff *kctx = bpf_get_kern_ctx(ctx);
+       struct net_device *dev;
+
+       dev = kctx->rxq->dev;
+
+       inum = dev->nd_net.net->ns.inum;
+       memcpy(name, dev->name, IFNAMSIZ);
+       ingress_ifindex = dev->ifindex;
+       return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";

> 
> Signed-off-by: John Fastabend <john.fastabend@...il.com>
> ---
>   include/uapi/linux/bpf.h       |  1 +
>   net/core/filter.c              | 19 +++++++++++++++++++
>   tools/include/uapi/linux/bpf.h |  1 +
>   3 files changed, 21 insertions(+)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 94659f6b3395..50403eb3b6cf 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -6123,6 +6123,7 @@ struct xdp_md {
>   	__u32 rx_queue_index;  /* rxq->queue_index  */
>   
>   	__u32 egress_ifindex;  /* txq->dev->ifindex */
> +	__bpf_md_ptr(struct net_device *, rx_dev); /* rxq->dev */
>   };
>   
>   /* DEVMAP map-value layout
> diff --git a/net/core/filter.c b/net/core/filter.c
> index bb0136e7a8e4..d445ffbea8f1 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -8686,6 +8686,8 @@ static bool __is_valid_xdp_access(int off, int size)
>   	return true;
>   }
>   
> +BTF_ID_LIST_SINGLE(btf_xdp_get_netdev_id, struct, net_device)
> +
>   static bool xdp_is_valid_access(int off, int size,
>   				enum bpf_access_type type,
>   				const struct bpf_prog *prog,
> @@ -8718,6 +8720,15 @@ static bool xdp_is_valid_access(int off, int size,
>   	case offsetof(struct xdp_md, data_end):
>   		info->reg_type = PTR_TO_PACKET_END;
>   		break;
> +	case offsetof(struct xdp_md, rx_dev):
> +		info->reg_type = PTR_TO_BTF_ID;
> +		info->btf_id = btf_xdp_get_netdev_id[0];
> +		info->btf = bpf_get_btf_vmlinux();
> +	        if (IS_ERR_OR_NULL(info->btf))
> +			return false;
> +		if (size != sizeof(u64))
> +			return false;
> +		return true;
>   	}
>   
>   	return __is_valid_xdp_access(off, size);
> @@ -9808,6 +9819,14 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
>   		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
>   				      offsetof(struct net_device, ifindex));
>   		break;
> +	case offsetof(struct xdp_md, rx_dev):
> +		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
> +				      si->dst_reg, si->src_reg,
> +				      offsetof(struct xdp_buff, rxq));
> +		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_rxq_info, dev),
> +				      si->dst_reg, si->dst_reg,
> +				      offsetof(struct xdp_rxq_info, dev));
> +		break;
>   	}
>   
>   	return insn - insn_buf;
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 94659f6b3395..50403eb3b6cf 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -6123,6 +6123,7 @@ struct xdp_md {
>   	__u32 rx_queue_index;  /* rxq->queue_index  */
>   
>   	__u32 egress_ifindex;  /* txq->dev->ifindex */
> +	__bpf_md_ptr(struct net_device *, rx_dev); /* rxq->dev */
>   };
>   
>   /* DEVMAP map-value layout