[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ZVy99gga2EnjnTWP@lore-desk>
Date: Tue, 21 Nov 2023 15:25:58 +0100
From: Lorenzo Bianconi <lorenzo@...nel.org>
To: Florian Westphal <fw@...len.de>
Cc: netfilter-devel@...r.kernel.org, netdev@...r.kernel.org
Subject: Re: [PATCH nf-next 7/8] netfilter: nf_tables: add flowtable map for
xdp offload
> This adds a small internal mapping table so that a new bpf (xdp) kfunc
> can perform lookups in a flowtable.
>
> As-is, xdp program has access to the device pointer, but no way to do a
> lookup in a flowtable -- there is no way to obtain the needed struct
> without questionable stunts.
>
> This allows to obtain an nf_flowtable pointer given a net_device
> structure.
>
> A device cannot be added to multiple flowtables, the mapping needs
> to be unique. This is enforced when a flowtables with the
> NF_FLOWTABLE_XDP_OFFLOAD was added.
>
> Exposure of this NF_FLOWTABLE_XDP_OFFLOAD in UAPI could be avoided,
> iff the 'net_device maps to 0 or 1 flowtable' paradigm is enforced
> regardless of offload-or-not flag.
>
> HOWEVER, that does break existing behaviour.
>
> An alternative would be to repurpose the hw offload flag by allowing
> XDP fallback when hw offload cannot be done due to lack of ndo
> callbacks.
>
> Signed-off-by: Florian Westphal <fw@...len.de>
Tested-by: Lorenzo Bianconi <lorenzo@...nel.org>
> ---
> include/net/netfilter/nf_flow_table.h | 7 ++
> net/netfilter/nf_flow_table_offload.c | 131 +++++++++++++++++++++++++-
> net/netfilter/nf_tables_api.c | 3 +-
> 3 files changed, 139 insertions(+), 2 deletions(-)
>
> diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
> index 11985d9b8370..b8b7fcb98732 100644
> --- a/include/net/netfilter/nf_flow_table.h
> +++ b/include/net/netfilter/nf_flow_table.h
> @@ -93,6 +93,11 @@ static inline bool nf_flowtable_hw_offload(struct nf_flowtable *flowtable)
> return flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD;
> }
>
> +static inline bool nf_flowtable_xdp_offload(struct nf_flowtable *flowtable)
> +{
> + return flowtable->flags & NF_FLOWTABLE_XDP_OFFLOAD;
> +}
> +
> enum flow_offload_tuple_dir {
> FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL,
> FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY,
> @@ -299,6 +304,8 @@ struct flow_ports {
> __be16 source, dest;
> };
>
> +struct nf_flowtable *nf_flowtable_by_dev(const struct net_device *dev);
> +
> unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
> const struct nf_hook_state *state);
> unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
> diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
> index a010b25076ca..9ec7aa4ad2e5 100644
> --- a/net/netfilter/nf_flow_table_offload.c
> +++ b/net/netfilter/nf_flow_table_offload.c
> @@ -17,6 +17,92 @@ static struct workqueue_struct *nf_flow_offload_add_wq;
> static struct workqueue_struct *nf_flow_offload_del_wq;
> static struct workqueue_struct *nf_flow_offload_stats_wq;
>
> +struct flow_offload_xdp {
> + struct hlist_node hnode;
> +
> + unsigned long net_device_addr;
> + struct nf_flowtable *ft;
> +
> + struct rcu_head rcuhead;
> +};
> +
> +#define NF_XDP_HT_BITS 4
> +static DEFINE_HASHTABLE(nf_xdp_hashtable, NF_XDP_HT_BITS);
> +static DEFINE_MUTEX(nf_xdp_hashtable_lock);
> +
> +/* caller must hold rcu read lock */
> +struct nf_flowtable *nf_flowtable_by_dev(const struct net_device *dev)
> +{
> + unsigned long key = (unsigned long)dev;
> + const struct flow_offload_xdp *cur;
> +
> + hash_for_each_possible_rcu(nf_xdp_hashtable, cur, hnode, key) {
> + if (key == cur->net_device_addr)
> + return cur->ft;
> + }
> +
> + return NULL;
> +}
> +
> +static int nf_flowtable_by_dev_insert(struct nf_flowtable *ft,
> + const struct net_device *dev)
> +{
> + unsigned long key = (unsigned long)dev;
> + struct flow_offload_xdp *cur;
> + int err = 0;
> +
> + mutex_lock(&nf_xdp_hashtable_lock);
> + hash_for_each_possible(nf_xdp_hashtable, cur, hnode, key) {
> + if (key != cur->net_device_addr)
> + continue;
> + err = -EEXIST;
> + break;
> + }
> +
> + if (err == 0) {
> + struct flow_offload_xdp *new;
> +
> + new = kzalloc(sizeof(*new), GFP_KERNEL_ACCOUNT);
> + if (new) {
> + new->net_device_addr = key;
> + new->ft = ft;
> +
> + hash_add_rcu(nf_xdp_hashtable, &new->hnode, key);
> + } else {
> + err = -ENOMEM;
> + }
> + }
> +
> + mutex_unlock(&nf_xdp_hashtable_lock);
> +
> + DEBUG_NET_WARN_ON_ONCE(err == 0 && nf_flowtable_by_dev(dev) != ft);
> +
> + return err;
> +}
> +
> +static void nf_flowtable_by_dev_remove(const struct net_device *dev)
> +{
> + unsigned long key = (unsigned long)dev;
> + struct flow_offload_xdp *cur;
> + bool found = false;
> +
> + mutex_lock(&nf_xdp_hashtable_lock);
> +
> + hash_for_each_possible(nf_xdp_hashtable, cur, hnode, key) {
> + if (key != cur->net_device_addr)
> + continue;
> +
> + hash_del_rcu(&cur->hnode);
> + kfree_rcu(cur, rcuhead);
> + found = true;
> + break;
> + }
> +
> + mutex_unlock(&nf_xdp_hashtable_lock);
> +
> + WARN_ON_ONCE(!found);
> +}
> +
> struct flow_offload_work {
> struct list_head list;
> enum flow_cls_command cmd;
> @@ -1183,6 +1269,44 @@ static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
> return 0;
> }
>
> +static int nf_flow_offload_xdp_setup(struct nf_flowtable *flowtable,
> + struct net_device *dev,
> + enum flow_block_command cmd)
> +{
> + if (!nf_flowtable_xdp_offload(flowtable))
> + return 0;
> +
> + switch (cmd) {
> + case FLOW_BLOCK_BIND:
> + return nf_flowtable_by_dev_insert(flowtable, dev);
> + case FLOW_BLOCK_UNBIND:
> + nf_flowtable_by_dev_remove(dev);
> + return 0;
> + }
> +
> + WARN_ON_ONCE(1);
> + return 0;
> +}
> +
> +static void nf_flow_offload_xdp_cancel(struct nf_flowtable *flowtable,
> + struct net_device *dev,
> + enum flow_block_command cmd)
> +{
> + if (!nf_flowtable_xdp_offload(flowtable))
> + return;
> +
> + switch (cmd) {
> + case FLOW_BLOCK_BIND:
> + nf_flowtable_by_dev_remove(dev);
> + return;
> + case FLOW_BLOCK_UNBIND:
> + /* We do not re-bind in case hw offload would report error
> + * on *unregister*.
> + */
> + break;
> + }
> +}
> +
> int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
> struct net_device *dev,
> enum flow_block_command cmd)
> @@ -1191,6 +1315,9 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
> struct flow_block_offload bo;
> int err;
>
> + if (nf_flow_offload_xdp_setup(flowtable, dev, cmd))
> + return -EBUSY;
> +
> if (!nf_flowtable_hw_offload(flowtable))
> return 0;
>
> @@ -1200,8 +1327,10 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
> else
> err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd,
> &extack);
> - if (err < 0)
> + if (err < 0) {
> + nf_flow_offload_xdp_cancel(flowtable, dev, cmd);
> return err;
> + }
>
> return nf_flow_table_block_setup(flowtable, &bo, cmd);
> }
> diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
> index 4e21311ec768..223ca4d0e2a5 100644
> --- a/net/netfilter/nf_tables_api.c
> +++ b/net/netfilter/nf_tables_api.c
> @@ -8198,7 +8198,8 @@ static bool nft_flowtable_offload_clash(struct net *net,
> const struct nft_table *table;
>
> /* No offload requested, no need to validate */
> - if (!nf_flowtable_hw_offload(flowtable->ft))
> + if (!nf_flowtable_hw_offload(flowtable->ft) &&
> + !nf_flowtable_xdp_offload(flowtable->ft))
> return false;
>
> nft_net = nft_pernet(net);
> --
> 2.41.0
>
Download attachment "signature.asc" of type "application/pgp-signature" (229 bytes)
Powered by blists - more mailing lists