[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ZoCFgFj7JFMSwlRQ@lore-desk>
Date: Sun, 30 Jun 2024 00:06:56 +0200
From: Lorenzo Bianconi <lorenzo@...nel.org>
To: Daniel Borkmann <daniel@...earbox.net>
Cc: bpf@...r.kernel.org, pablo@...filter.org, kadlec@...filter.org,
davem@...emloft.net, edumazet@...gle.com, kuba@...nel.org,
pabeni@...hat.com, netfilter-devel@...r.kernel.org,
netdev@...r.kernel.org, ast@...nel.org, andrii@...nel.org,
martin.lau@...ux.dev, eddyz87@...il.com,
lorenzo.bianconi@...hat.com, toke@...hat.com, fw@...len.de,
hawk@...nel.org, horms@...nel.org, donhunte@...hat.com,
memxor@...il.com
Subject: Re: [PATCH v5 bpf-next 1/3] netfilter: nf_tables: add flowtable map
for xdp offload
> On 6/14/24 5:40 PM, Lorenzo Bianconi wrote:
> [...]
> > diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
> > index a010b25076ca0..d9b019c98694b 100644
> > --- a/net/netfilter/nf_flow_table_offload.c
> > +++ b/net/netfilter/nf_flow_table_offload.c
> > @@ -1192,7 +1192,7 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
> > int err;
> > if (!nf_flowtable_hw_offload(flowtable))
> > - return 0;
> > + return nf_flow_offload_xdp_setup(flowtable, dev, cmd);
> > if (dev->netdev_ops->ndo_setup_tc)
> > err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd,
> > @@ -1200,8 +1200,10 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
> > else
> > err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd,
> > &extack);
> > - if (err < 0)
> > + if (err < 0) {
> > + nf_flow_offload_xdp_cancel(flowtable, dev, cmd);
> > return err;
> > + }
> > return nf_flow_table_block_setup(flowtable, &bo, cmd);
> > }
> > diff --git a/net/netfilter/nf_flow_table_xdp.c b/net/netfilter/nf_flow_table_xdp.c
> > new file mode 100644
> > index 0000000000000..b9bdf27ba9bd3
> > --- /dev/null
> > +++ b/net/netfilter/nf_flow_table_xdp.c
> > @@ -0,0 +1,163 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +#include <linux/kernel.h>
> > +#include <linux/module.h>
> > +#include <linux/netfilter.h>
> > +#include <linux/rhashtable.h>
> > +#include <linux/netdevice.h>
> > +#include <net/flow_offload.h>
> > +#include <net/netfilter/nf_flow_table.h>
> > +
> > +struct flow_offload_xdp_ft {
> > + struct list_head head;
> > + struct nf_flowtable *ft;
> > + struct rcu_head rcuhead;
> > +};
> > +
> > +struct flow_offload_xdp {
> > + struct hlist_node hnode;
> > + unsigned long net_device_addr;
> > + struct list_head head;
> > +};
> > +
> > +#define NF_XDP_HT_BITS 4
> > +static DEFINE_HASHTABLE(nf_xdp_hashtable, NF_XDP_HT_BITS);
> > +static DEFINE_MUTEX(nf_xdp_hashtable_lock);
> > +
> > +/* caller must hold rcu read lock */
> > +struct nf_flowtable *nf_flowtable_by_dev(const struct net_device *dev)
> > +{
> > + unsigned long key = (unsigned long)dev;
> > + struct flow_offload_xdp *iter;
> > +
> > + hash_for_each_possible_rcu(nf_xdp_hashtable, iter, hnode, key) {
> > + if (key == iter->net_device_addr) {
> > + struct flow_offload_xdp_ft *ft_elem;
> > +
> > + /* The user is supposed to insert a given net_device
> > + * just into a single nf_flowtable so we always return
> > + * the first element here.
> > + */
> > + ft_elem = list_first_or_null_rcu(&iter->head,
> > + struct flow_offload_xdp_ft,
> > + head);
> > + return ft_elem ? ft_elem->ft : NULL;
> > + }
> > + }
> > +
> > + return NULL;
> > +}
> > +
> > +static int nf_flowtable_by_dev_insert(struct nf_flowtable *ft,
> > + const struct net_device *dev)
> > +{
> > + struct flow_offload_xdp *iter, *elem = NULL;
> > + unsigned long key = (unsigned long)dev;
> > + struct flow_offload_xdp_ft *ft_elem;
> > +
> > + ft_elem = kzalloc(sizeof(*ft_elem), GFP_KERNEL_ACCOUNT);
> > + if (!ft_elem)
> > + return -ENOMEM;
> > +
> > + ft_elem->ft = ft;
> > +
> > + mutex_lock(&nf_xdp_hashtable_lock);
> > +
> > + hash_for_each_possible(nf_xdp_hashtable, iter, hnode, key) {
> > + if (key == iter->net_device_addr) {
> > + elem = iter;
> > + break;
> > + }
> > + }
> > +
> > + if (!elem) {
> > + elem = kzalloc(sizeof(*elem), GFP_KERNEL_ACCOUNT);
> > + if (!elem)
> > + goto err_unlock;
> > +
> > + elem->net_device_addr = key;
>
> Looks good, as I understand (but just to double check) if a device goes away then
> upper layers in the nf flowtable code will trigger the nf_flowtable_by_dev_remove()
> based on the device pointer to clean this up again from nf_xdp_hashtable.
yep, correct. Core nft infrastructure runs nf_flow_offload_xdp_setup() with cmd set
to FLOW_BLOCK_UNBIND (so we run nf_flowtable_by_dev_remove()) when the net_device
is removed.
Regards,
Lorenzo
>
> > + INIT_LIST_HEAD(&elem->head);
> > + hash_add_rcu(nf_xdp_hashtable, &elem->hnode, key);
> > + }
> > + list_add_tail_rcu(&ft_elem->head, &elem->head);
> > +
> > + mutex_unlock(&nf_xdp_hashtable_lock);
> > +
> > + return 0;
> > +
> > +err_unlock:
> > + mutex_unlock(&nf_xdp_hashtable_lock);
> > + kfree(ft_elem);
> > +
> > + return -ENOMEM;
> > +}
Download attachment "signature.asc" of type "application/pgp-signature" (229 bytes)
Powered by blists - more mailing lists