[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20201006183302.337a9502@carbon>
Date: Tue, 6 Oct 2020 18:33:02 +0200
From: Jesper Dangaard Brouer <brouer@...hat.com>
To: bpf@...r.kernel.org
Cc: netdev@...r.kernel.org, Daniel Borkmann <borkmann@...earbox.net>,
Alexei Starovoitov <alexei.starovoitov@...il.com>,
maze@...gle.com, lmb@...udflare.com, shaun@...era.io,
Lorenzo Bianconi <lorenzo@...nel.org>, marek@...udflare.com,
John Fastabend <john.fastabend@...il.com>,
Jakub Kicinski <kuba@...nel.org>, brouer@...hat.com
Subject: Re: [PATCH bpf-next V1 3/6] bpf: add BPF-helper for reading MTU
from net_device via ifindex
On Tue, 06 Oct 2020 18:03:01 +0200
Jesper Dangaard Brouer <brouer@...hat.com> wrote:
> FIXME: add description.
Ups, I will obviously send a V2.
I still want feedback on whether I should implement another BPF-helper
as sketched below:
> FIXME: IMHO we can create a better BPF-helper named bpf_mtu_check()
> instead of bpf_mtu_lookup(), because a flag can be used for requesting
> GRO segment size checking. The ret value of bpf_mtu_check() says
> if MTU was violoated, but also return MTU via pointer arg to allow
> BPF-progs to do own logic.
>
> Signed-off-by: Jesper Dangaard Brouer <brouer@...hat.com>
> ---
> include/uapi/linux/bpf.h | 13 +++++++++++
> net/core/filter.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 69 insertions(+)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 50ce65e37b16..29b335cb96ef 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -3718,6 +3718,18 @@ union bpf_attr {
> * never return NULL.
> * Return
> * A pointer pointing to the kernel percpu variable on this cpu.
> + *
> + * int bpf_mtu_lookup(void *ctx, u32 ifindex, u64 flags)
> + * Description
> + * Lookup MTU of net device based on ifindex. The Linux kernel
> + * route table can configure MTUs on a more specific per route
> + * level, which is not provided by this helper. For route level
> + * MTU checks use the **bpf_fib_lookup**\ () helper.
> + *
> + * *ctx* is either **struct xdp_md** for XDP programs or
> + * **struct sk_buff** tc cls_act programs.
> + * Return
> + * On success, MTU size is returned. On error, a negative value.
> */
> #define __BPF_FUNC_MAPPER(FN) \
> FN(unspec), \
> @@ -3875,6 +3887,7 @@ union bpf_attr {
> FN(redirect_neigh), \
> FN(bpf_per_cpu_ptr), \
> FN(bpf_this_cpu_ptr), \
> + FN(mtu_lookup), \
> /* */
>
> /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> diff --git a/net/core/filter.c b/net/core/filter.c
> index d84723f347c0..49ae3b80027b 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -5512,6 +5512,58 @@ static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
> .arg4_type = ARG_ANYTHING,
> };
>
> +static int bpf_mtu_lookup(struct net *netns, u32 ifindex, u64 flags)
> +{
> + struct net_device *dev;
> +
> + // XXX: Do we even need flags?
> + // Flag idea: get ctx dev->mtu for XDP_TX or redir out-same-dev
> + if (flags)
> + return -EINVAL;
> +
> + dev = dev_get_by_index_rcu(netns, ifindex);
> + if (!dev)
> + return -ENODEV;
> +
> + return dev->mtu;
> +}
> +
> +BPF_CALL_3(bpf_skb_mtu_lookup, struct sk_buff *, skb,
> + u32, ifindex, u64, flags)
> +{
> + struct net *netns = dev_net(skb->dev);
> +
> + return bpf_mtu_lookup(netns, ifindex, flags);
> +}
> +
> +BPF_CALL_3(bpf_xdp_mtu_lookup, struct xdp_buff *, xdp,
> + u32, ifindex, u64, flags)
> +{
> + struct net *netns = dev_net(xdp->rxq->dev);
> + // XXX: Handle if this runs in devmap prog (then is rxq invalid?)
> +
> + return bpf_mtu_lookup(netns, ifindex, flags);
> +}
> +
> +static const struct bpf_func_proto bpf_skb_mtu_lookup_proto = {
> + .func = bpf_skb_mtu_lookup,
> + .gpl_only = true,
> + .ret_type = RET_INTEGER,
> + .arg1_type = ARG_PTR_TO_CTX,
> + .arg2_type = ARG_ANYTHING,
> + .arg3_type = ARG_ANYTHING,
> +};
> +
> +static const struct bpf_func_proto bpf_xdp_mtu_lookup_proto = {
> + .func = bpf_xdp_mtu_lookup,
> + .gpl_only = true,
> + .ret_type = RET_INTEGER,
> + .arg1_type = ARG_PTR_TO_CTX,
> + .arg2_type = ARG_ANYTHING,
> + .arg3_type = ARG_ANYTHING,
> +};
> +
> +
> #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
> static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
> {
> @@ -7075,6 +7127,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> return &bpf_get_socket_uid_proto;
> case BPF_FUNC_fib_lookup:
> return &bpf_skb_fib_lookup_proto;
> + case BPF_FUNC_mtu_lookup:
> + return &bpf_skb_mtu_lookup_proto;
> case BPF_FUNC_sk_fullsock:
> return &bpf_sk_fullsock_proto;
> case BPF_FUNC_sk_storage_get:
> @@ -7144,6 +7198,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> return &bpf_xdp_adjust_tail_proto;
> case BPF_FUNC_fib_lookup:
> return &bpf_xdp_fib_lookup_proto;
> + case BPF_FUNC_mtu_lookup:
> + return &bpf_xdp_mtu_lookup_proto;
> #ifdef CONFIG_INET
> case BPF_FUNC_sk_lookup_udp:
> return &bpf_xdp_sk_lookup_udp_proto;
>
>
--
Best regards,
Jesper Dangaard Brouer
MSc.CS, Principal Kernel Engineer at Red Hat
LinkedIn: http://www.linkedin.com/in/brouer
Powered by blists - more mailing lists