[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <793fd9a3-0562-1edd-e2b4-f88fa81d876d@iogearbox.net>
Date: Fri, 28 Jun 2024 23:19:15 +0200
From: Daniel Borkmann <daniel@...earbox.net>
To: Lorenzo Bianconi <lorenzo@...nel.org>, bpf@...r.kernel.org
Cc: pablo@...filter.org, kadlec@...filter.org, davem@...emloft.net,
edumazet@...gle.com, kuba@...nel.org, pabeni@...hat.com,
netfilter-devel@...r.kernel.org, netdev@...r.kernel.org, ast@...nel.org,
andrii@...nel.org, martin.lau@...ux.dev, eddyz87@...il.com,
lorenzo.bianconi@...hat.com, toke@...hat.com, fw@...len.de, hawk@...nel.org,
horms@...nel.org, donhunte@...hat.com, memxor@...il.com
Subject: Re: [PATCH v5 bpf-next 1/3] netfilter: nf_tables: add flowtable map
for xdp offload
On 6/14/24 5:40 PM, Lorenzo Bianconi wrote:
[...]
> diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
> index a010b25076ca0..d9b019c98694b 100644
> --- a/net/netfilter/nf_flow_table_offload.c
> +++ b/net/netfilter/nf_flow_table_offload.c
> @@ -1192,7 +1192,7 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
> int err;
>
> if (!nf_flowtable_hw_offload(flowtable))
> - return 0;
> + return nf_flow_offload_xdp_setup(flowtable, dev, cmd);
>
> if (dev->netdev_ops->ndo_setup_tc)
> err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd,
> @@ -1200,8 +1200,10 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
> else
> err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd,
> &extack);
> - if (err < 0)
> + if (err < 0) {
> + nf_flow_offload_xdp_cancel(flowtable, dev, cmd);
> return err;
> + }
>
> return nf_flow_table_block_setup(flowtable, &bo, cmd);
> }
> diff --git a/net/netfilter/nf_flow_table_xdp.c b/net/netfilter/nf_flow_table_xdp.c
> new file mode 100644
> index 0000000000000..b9bdf27ba9bd3
> --- /dev/null
> +++ b/net/netfilter/nf_flow_table_xdp.c
> @@ -0,0 +1,163 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/netfilter.h>
> +#include <linux/rhashtable.h>
> +#include <linux/netdevice.h>
> +#include <net/flow_offload.h>
> +#include <net/netfilter/nf_flow_table.h>
> +
> +struct flow_offload_xdp_ft {
> + struct list_head head;
> + struct nf_flowtable *ft;
> + struct rcu_head rcuhead;
> +};
> +
> +struct flow_offload_xdp {
> + struct hlist_node hnode;
> + unsigned long net_device_addr;
> + struct list_head head;
> +};
> +
> +#define NF_XDP_HT_BITS 4
> +static DEFINE_HASHTABLE(nf_xdp_hashtable, NF_XDP_HT_BITS);
> +static DEFINE_MUTEX(nf_xdp_hashtable_lock);
> +
> +/* caller must hold rcu read lock */
> +struct nf_flowtable *nf_flowtable_by_dev(const struct net_device *dev)
> +{
> + unsigned long key = (unsigned long)dev;
> + struct flow_offload_xdp *iter;
> +
> + hash_for_each_possible_rcu(nf_xdp_hashtable, iter, hnode, key) {
> + if (key == iter->net_device_addr) {
> + struct flow_offload_xdp_ft *ft_elem;
> +
> + /* The user is supposed to insert a given net_device
> + * just into a single nf_flowtable so we always return
> + * the first element here.
> + */
> + ft_elem = list_first_or_null_rcu(&iter->head,
> + struct flow_offload_xdp_ft,
> + head);
> + return ft_elem ? ft_elem->ft : NULL;
> + }
> + }
> +
> + return NULL;
> +}
> +
> +static int nf_flowtable_by_dev_insert(struct nf_flowtable *ft,
> + const struct net_device *dev)
> +{
> + struct flow_offload_xdp *iter, *elem = NULL;
> + unsigned long key = (unsigned long)dev;
> + struct flow_offload_xdp_ft *ft_elem;
> +
> + ft_elem = kzalloc(sizeof(*ft_elem), GFP_KERNEL_ACCOUNT);
> + if (!ft_elem)
> + return -ENOMEM;
> +
> + ft_elem->ft = ft;
> +
> + mutex_lock(&nf_xdp_hashtable_lock);
> +
> + hash_for_each_possible(nf_xdp_hashtable, iter, hnode, key) {
> + if (key == iter->net_device_addr) {
> + elem = iter;
> + break;
> + }
> + }
> +
> + if (!elem) {
> + elem = kzalloc(sizeof(*elem), GFP_KERNEL_ACCOUNT);
> + if (!elem)
> + goto err_unlock;
> +
> + elem->net_device_addr = key;
Looks good, as I understand (but just to double check) if a device goes away then
upper layers in the nf flowtable code will trigger the nf_flowtable_by_dev_remove()
based on the device pointer to clean this up again from nf_xdp_hashtable.
> + INIT_LIST_HEAD(&elem->head);
> + hash_add_rcu(nf_xdp_hashtable, &elem->hnode, key);
> + }
> + list_add_tail_rcu(&ft_elem->head, &elem->head);
> +
> + mutex_unlock(&nf_xdp_hashtable_lock);
> +
> + return 0;
> +
> +err_unlock:
> + mutex_unlock(&nf_xdp_hashtable_lock);
> + kfree(ft_elem);
> +
> + return -ENOMEM;
> +}
Powered by blists - more mailing lists