[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <7f26d180d40e6c575360aff4706a4e2957975cba.1716026761.git.lorenzo@kernel.org>
Date: Sat, 18 May 2024 12:12:35 +0200
From: Lorenzo Bianconi <lorenzo@...nel.org>
To: bpf@...r.kernel.org
Cc: pablo@...filter.org,
kadlec@...filter.org,
davem@...emloft.net,
edumazet@...gle.com,
kuba@...nel.org,
pabeni@...hat.com,
netfilter-devel@...r.kernel.org,
netdev@...r.kernel.org,
ast@...nel.org,
daniel@...earbox.net,
andrii@...nel.org,
lorenzo.bianconi@...hat.com,
toke@...hat.com,
fw@...len.de,
hawk@...nel.org,
horms@...nel.org,
donhunte@...hat.com,
memxor@...il.com
Subject: [PATCH bpf-next v2 1/4] netfilter: nf_tables: add flowtable map for xdp offload
From: Florian Westphal <fw@...len.de>
This adds a small internal mapping table so that a new bpf (xdp) kfunc
can perform lookups in a flowtable.
As-is, xdp program has access to the device pointer, but no way to do a
lookup in a flowtable -- there is no way to obtain the needed struct
without questionable stunts.
This allows to obtain an nf_flowtable pointer given a net_device
structure.
In order to keep backward compatibility, the infrastructure allows the
user to add a given device to multiple flowtables, but it will always
return the first added mapping performing the lookup since it assumes
the right configuration is 1:1 mapping between flowtables and net_devices.
Signed-off-by: Florian Westphal <fw@...len.de>
Co-developed-by: Lorenzo Bianconi <lorenzo@...nel.org>
Signed-off-by: Lorenzo Bianconi <lorenzo@...nel.org>
---
include/net/netfilter/nf_flow_table.h | 2 +
net/netfilter/nf_flow_table_offload.c | 161 +++++++++++++++++++++++++-
2 files changed, 161 insertions(+), 2 deletions(-)
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 9abb7ee40d72f..0bbe6ea8e0651 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -305,6 +305,8 @@ struct flow_ports {
__be16 source, dest;
};
+struct nf_flowtable *nf_flowtable_by_dev(const struct net_device *dev);
+
unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state);
unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index a010b25076ca0..1acfcdbee42e8 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -17,6 +17,129 @@ static struct workqueue_struct *nf_flow_offload_add_wq;
static struct workqueue_struct *nf_flow_offload_del_wq;
static struct workqueue_struct *nf_flow_offload_stats_wq;
+struct flow_offload_xdp_ft {
+ struct list_head head;
+ struct nf_flowtable *ft;
+ struct rcu_head rcuhead;
+};
+
+struct flow_offload_xdp {
+ struct hlist_node hnode;
+ unsigned long net_device_addr;
+ struct list_head head;
+};
+
+#define NF_XDP_HT_BITS 4
+static DEFINE_HASHTABLE(nf_xdp_hashtable, NF_XDP_HT_BITS);
+static DEFINE_MUTEX(nf_xdp_hashtable_lock);
+
+/* caller must hold rcu read lock */
+struct nf_flowtable *nf_flowtable_by_dev(const struct net_device *dev)
+{
+ unsigned long key = (unsigned long)dev;
+ struct flow_offload_xdp *iter;
+
+ hash_for_each_possible_rcu(nf_xdp_hashtable, iter, hnode, key) {
+ if (key == iter->net_device_addr) {
+ struct flow_offload_xdp_ft *ft_elem;
+
+ /* The user is supposed to insert a given net_device
+ * just into a single nf_flowtable so we always return
+ * the first element here.
+ */
+ ft_elem = list_first_or_null_rcu(&iter->head,
+ struct flow_offload_xdp_ft,
+ head);
+ return ft_elem ? ft_elem->ft : NULL;
+ }
+ }
+
+ return NULL;
+}
+
+static int nf_flowtable_by_dev_insert(struct nf_flowtable *ft,
+ const struct net_device *dev)
+{
+ struct flow_offload_xdp *iter, *elem = NULL;
+ unsigned long key = (unsigned long)dev;
+ struct flow_offload_xdp_ft *ft_elem;
+
+ ft_elem = kzalloc(sizeof(*ft_elem), GFP_KERNEL_ACCOUNT);
+ if (!ft_elem)
+ return -ENOMEM;
+
+ ft_elem->ft = ft;
+
+ mutex_lock(&nf_xdp_hashtable_lock);
+
+ hash_for_each_possible(nf_xdp_hashtable, iter, hnode, key) {
+ if (key == iter->net_device_addr) {
+ elem = iter;
+ break;
+ }
+ }
+
+ if (!elem) {
+ elem = kzalloc(sizeof(*elem), GFP_KERNEL_ACCOUNT);
+ if (!elem)
+ goto err_unlock;
+
+ elem->net_device_addr = key;
+ INIT_LIST_HEAD(&elem->head);
+ hash_add_rcu(nf_xdp_hashtable, &elem->hnode, key);
+ }
+ list_add_tail_rcu(&ft_elem->head, &elem->head);
+
+ mutex_unlock(&nf_xdp_hashtable_lock);
+
+ return 0;
+
+err_unlock:
+ mutex_unlock(&nf_xdp_hashtable_lock);
+ kfree(ft_elem);
+
+ return -ENOMEM;
+}
+
+static void nf_flowtable_by_dev_remove(struct nf_flowtable *ft,
+ const struct net_device *dev)
+{
+ struct flow_offload_xdp *iter, *elem = NULL;
+ unsigned long key = (unsigned long)dev;
+
+ mutex_lock(&nf_xdp_hashtable_lock);
+
+ hash_for_each_possible(nf_xdp_hashtable, iter, hnode, key) {
+ if (key == iter->net_device_addr) {
+ elem = iter;
+ break;
+ }
+ }
+
+ if (elem) {
+ struct flow_offload_xdp_ft *ft_elem, *ft_next;
+
+ list_for_each_entry_safe(ft_elem, ft_next, &elem->head, head) {
+ if (ft_elem->ft == ft) {
+ list_del_rcu(&ft_elem->head);
+ kfree_rcu(ft_elem, rcuhead);
+ }
+ }
+
+ if (list_empty(&elem->head))
+ hash_del_rcu(&elem->hnode);
+ else
+ elem = NULL;
+ }
+
+ mutex_unlock(&nf_xdp_hashtable_lock);
+
+ if (elem) {
+ synchronize_rcu();
+ kfree(elem);
+ }
+}
+
struct flow_offload_work {
struct list_head list;
enum flow_cls_command cmd;
@@ -1183,6 +1306,38 @@ static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
return 0;
}
+static int nf_flow_offload_xdp_setup(struct nf_flowtable *flowtable,
+ struct net_device *dev,
+ enum flow_block_command cmd)
+{
+ switch (cmd) {
+ case FLOW_BLOCK_BIND:
+ return nf_flowtable_by_dev_insert(flowtable, dev);
+ case FLOW_BLOCK_UNBIND:
+ nf_flowtable_by_dev_remove(flowtable, dev);
+ return 0;
+ }
+
+ WARN_ON_ONCE(1);
+ return 0;
+}
+
+static void nf_flow_offload_xdp_cancel(struct nf_flowtable *flowtable,
+ struct net_device *dev,
+ enum flow_block_command cmd)
+{
+ switch (cmd) {
+ case FLOW_BLOCK_BIND:
+ nf_flowtable_by_dev_remove(flowtable, dev);
+ return;
+ case FLOW_BLOCK_UNBIND:
+ /* We do not re-bind in case hw offload would report error
+ * on *unregister*.
+ */
+ break;
+ }
+}
+
int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
struct net_device *dev,
enum flow_block_command cmd)
@@ -1192,7 +1347,7 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
int err;
if (!nf_flowtable_hw_offload(flowtable))
- return 0;
+ return nf_flow_offload_xdp_setup(flowtable, dev, cmd);
if (dev->netdev_ops->ndo_setup_tc)
err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd,
@@ -1200,8 +1355,10 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
else
err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd,
&extack);
- if (err < 0)
+ if (err < 0) {
+ nf_flow_offload_xdp_cancel(flowtable, dev, cmd);
return err;
+ }
return nf_flow_table_block_setup(flowtable, &bo, cmd);
}
--
2.45.1
Powered by blists - more mailing lists