[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <463c8ea7-08cf-412e-bb31-6fbb15b4df8b@linux.dev>
Date: Thu, 25 Apr 2024 16:27:22 -0700
From: Martin KaFai Lau <martin.lau@...ux.dev>
To: Brad Cowie <brad@...cet.nz>
Cc: lorenzo@...nel.org, memxor@...il.com, pablo@...filter.org,
davem@...emloft.net, kuba@...nel.org, pabeni@...hat.com, ast@...nel.org,
daniel@...earbox.net, andrii@...nel.org, song@...nel.org,
john.fastabend@...il.com, sdf@...gle.com, jolsa@...nel.org,
netfilter-devel@...r.kernel.org, coreteam@...filter.org,
bpf@...r.kernel.org, netdev@...r.kernel.org
Subject: Re: [PATCH bpf-next v2 1/2] net: netfilter: Make ct zone opts
configurable for bpf ct helpers
On 4/23/24 8:00 PM, Brad Cowie wrote:
> Add ct zone id/flags/dir to bpf_ct_opts so that arbitrary ct zones
> can be used for xdp/tc bpf ct helper functions bpf_{xdp,skb}_ct_alloc
> and bpf_{xdp,skb}_ct_lookup.
>
> Signed-off-by: Brad Cowie <brad@...cet.nz>
> ---
> v1 -> v2:
> - Make ct zone flags/dir configurable
> ---
> net/netfilter/nf_conntrack_bpf.c | 97 ++++++++++++++++++++------------
> 1 file changed, 61 insertions(+), 36 deletions(-)
>
> diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
> index d2492d050fe6..67f73b57089b 100644
> --- a/net/netfilter/nf_conntrack_bpf.c
> +++ b/net/netfilter/nf_conntrack_bpf.c
> @@ -21,41 +21,44 @@
> /* bpf_ct_opts - Options for CT lookup helpers
> *
> * Members:
> - * @netns_id - Specify the network namespace for lookup
> - * Values:
> - * BPF_F_CURRENT_NETNS (-1)
> - * Use namespace associated with ctx (xdp_md, __sk_buff)
> - * [0, S32_MAX]
> - * Network Namespace ID
> - * @error - Out parameter, set for any errors encountered
> - * Values:
> - * -EINVAL - Passed NULL for bpf_tuple pointer
> - * -EINVAL - opts->reserved is not 0
> - * -EINVAL - netns_id is less than -1
> - * -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12)
> - * -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
> - * -ENONET - No network namespace found for netns_id
> - * -ENOENT - Conntrack lookup could not find entry for tuple
> - * -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4)
> - * or sizeof(tuple->ipv6)
> - * @l4proto - Layer 4 protocol
> - * Values:
> - * IPPROTO_TCP, IPPROTO_UDP
> - * @dir: - connection tracking tuple direction.
> - * @reserved - Reserved member, will be reused for more options in future
> - * Values:
> - * 0
> + * @netns_id - Specify the network namespace for lookup
> + * Values:
> + * BPF_F_CURRENT_NETNS (-1)
> + * Use namespace associated with ctx (xdp_md, __sk_buff)
> + * [0, S32_MAX]
> + * Network Namespace ID
> + * @error - Out parameter, set for any errors encountered
> + * Values:
> + * -EINVAL - Passed NULL for bpf_tuple pointer
> + * -EINVAL - netns_id is less than -1
> + * -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (16)
> + * or NF_BPF_CT_OPTS_OLD_SZ (12)
> + * -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
> + * -ENONET - No network namespace found for netns_id
> + * -ENOENT - Conntrack lookup could not find entry for tuple
> + * -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4)
> + * or sizeof(tuple->ipv6)
> + * @l4proto - Layer 4 protocol
> + * Values:
> + * IPPROTO_TCP, IPPROTO_UDP
> + * @dir: - connection tracking tuple direction.
Please avoid whitespace changes. It is unnecessary code churns.
> + * @ct_zone_id - connection tracking zone id.
> + * @ct_zone_flags - connection tracking zone flags.
> + * @ct_zone_dir - connection tracking zone direction.
> */
> struct bpf_ct_opts {
> s32 netns_id;
> s32 error;
> u8 l4proto;
> u8 dir;
> - u8 reserved[2];
> + u16 ct_zone_id;
> + u8 ct_zone_flags;
> + u8 ct_zone_dir;
The size is 16 now with 2 tail paddings.
It needs a "u8 reserved[2];" at the end and need to check its 0.
> };
>
> enum {
> - NF_BPF_CT_OPTS_SZ = 12,
> + NF_BPF_CT_OPTS_SZ = 16,
> + NF_BPF_CT_OPTS_OLD_SZ = 12,
Avoid adding NF_BPF_CT_OPTS_OLD_SZ for now. I don't see how it may be useful.
> };
>
> static int bpf_nf_ct_tuple_parse(struct bpf_sock_tuple *bpf_tuple,
> @@ -104,11 +107,13 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
> u32 timeout)
> {
> struct nf_conntrack_tuple otuple, rtuple;
> + struct nf_conntrack_zone ct_zone;
> struct nf_conn *ct;
> int err;
>
> - if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
> - opts_len != NF_BPF_CT_OPTS_SZ)
> + if (!opts || !bpf_tuple)
> + return ERR_PTR(-EINVAL);
> + if (!(opts_len == NF_BPF_CT_OPTS_SZ || opts_len == NF_BPF_CT_OPTS_OLD_SZ))
> return ERR_PTR(-EINVAL);
>
> if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
> @@ -130,7 +135,16 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
> return ERR_PTR(-ENONET);
> }
>
> - ct = nf_conntrack_alloc(net, &nf_ct_zone_dflt, &otuple, &rtuple,
> + if (opts_len == NF_BPF_CT_OPTS_SZ) {
> + if (opts->ct_zone_dir == 0)
I don't know the details about the dir in ct_zone, so a question: a 0
ct_zone_dir is invalid and can be reused to mean NF_CT_DEFAULT_ZONE_DIR?
> + opts->ct_zone_dir = NF_CT_DEFAULT_ZONE_DIR;
> + nf_ct_zone_init(&ct_zone,
> + opts->ct_zone_id, opts->ct_zone_dir, opts->ct_zone_flags);
> + } else {
Better enforce "ct_zone_id == 0" also instead of ignoring it.
> + ct_zone = nf_ct_zone_dflt;
> + }
> +
> + ct = nf_conntrack_alloc(net, &ct_zone, &otuple, &rtuple,
> GFP_ATOMIC);
> if (IS_ERR(ct))
> goto out;
> @@ -152,11 +166,13 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
> {
> struct nf_conntrack_tuple_hash *hash;
> struct nf_conntrack_tuple tuple;
> + struct nf_conntrack_zone ct_zone;
> struct nf_conn *ct;
> int err;
>
> - if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
> - opts_len != NF_BPF_CT_OPTS_SZ)
> + if (!opts || !bpf_tuple)
> + return ERR_PTR(-EINVAL);
> + if (!(opts_len == NF_BPF_CT_OPTS_SZ || opts_len == NF_BPF_CT_OPTS_OLD_SZ))
> return ERR_PTR(-EINVAL);
> if (unlikely(opts->l4proto != IPPROTO_TCP && opts->l4proto != IPPROTO_UDP))
> return ERR_PTR(-EPROTO);
> @@ -174,7 +190,16 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
> return ERR_PTR(-ENONET);
> }
>
> - hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple);
> + if (opts_len == NF_BPF_CT_OPTS_SZ) {
> + if (opts->ct_zone_dir == 0)
> + opts->ct_zone_dir = NF_CT_DEFAULT_ZONE_DIR;
> + nf_ct_zone_init(&ct_zone,
> + opts->ct_zone_id, opts->ct_zone_dir, opts->ct_zone_flags);
> + } else {
> + ct_zone = nf_ct_zone_dflt;
> + }
> +
> + hash = nf_conntrack_find_get(net, &ct_zone, &tuple);
> if (opts->netns_id >= 0)
> put_net(net);
> if (!hash)
> @@ -245,7 +270,7 @@ __bpf_kfunc_start_defs();
> * @opts - Additional options for allocation (documented above)
> * Cannot be NULL
> * @opts__sz - Length of the bpf_ct_opts structure
> - * Must be NF_BPF_CT_OPTS_SZ (12)
> + * Must be NF_BPF_CT_OPTS_SZ (16)
> */
> __bpf_kfunc struct nf_conn___init *
> bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
> @@ -279,7 +304,7 @@ bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
> * @opts - Additional options for lookup (documented above)
> * Cannot be NULL
> * @opts__sz - Length of the bpf_ct_opts structure
> - * Must be NF_BPF_CT_OPTS_SZ (12)
> + * Must be NF_BPF_CT_OPTS_SZ (16)
Either 16 or 12. Same for below.
> */
> __bpf_kfunc struct nf_conn *
> bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
> @@ -312,7 +337,7 @@ bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
> * @opts - Additional options for allocation (documented above)
> * Cannot be NULL
> * @opts__sz - Length of the bpf_ct_opts structure
> - * Must be NF_BPF_CT_OPTS_SZ (12)
> + * Must be NF_BPF_CT_OPTS_SZ (16)
> */
> __bpf_kfunc struct nf_conn___init *
> bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
> @@ -347,7 +372,7 @@ bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
> * @opts - Additional options for lookup (documented above)
> * Cannot be NULL
> * @opts__sz - Length of the bpf_ct_opts structure
> - * Must be NF_BPF_CT_OPTS_SZ (12)
> + * Must be NF_BPF_CT_OPTS_SZ (16)
> */
> __bpf_kfunc struct nf_conn *
> bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
Powered by blists - more mailing lists