lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <61553c87-a3d3-07ae-8c2f-93cf0cb52263@nvidia.com>
Date:   Mon, 24 Jan 2022 20:32:07 +0200
From:   Nikolay Aleksandrov <nikolay@...dia.com>
To:     Lorenzo Bianconi <lorenzo@...nel.org>, <bpf@...r.kernel.org>,
        <netdev@...r.kernel.org>
CC:     <lorenzo.bianconi@...hat.com>, <davem@...emloft.net>,
        <kuba@...nel.org>, <ast@...nel.org>, <daniel@...earbox.net>,
        <dsahern@...nel.org>, <komachi.yoshiki@...il.com>,
        <brouer@...hat.com>, <toke@...hat.com>, <memxor@...il.com>,
        <andrii.nakryiko@...il.com>, Roopa Prabhu <roopa@...dia.com>,
        "bridge@...ts.linux-foundation.org" 
        <bridge@...ts.linux-foundation.org>,
        Ido Schimmel <idosch@...sch.org>
Subject: Re: [RFC bpf-next 1/2] net: bridge: add unstable
 br_fdb_find_port_from_ifindex helper

On 24/01/2022 19:20, Lorenzo Bianconi wrote:
> Similar to bpf_xdp_ct_lookup routine, introduce
> br_fdb_find_port_from_ifindex unstable helper in order to accelerate
> linux bridge with XDP. br_fdb_find_port_from_ifindex will perform a
> lookup in the associated bridge fdb table and it will return the
> output ifindex if the destination address is associated to a bridge
> port or -ENODEV for BOM traffic or if lookup fails.
> 
> Signed-off-by: Lorenzo Bianconi <lorenzo@...nel.org>
> ---
>  net/bridge/br.c         | 21 +++++++++++++
>  net/bridge/br_fdb.c     | 67 +++++++++++++++++++++++++++++++++++------
>  net/bridge/br_private.h | 12 ++++++++
>  3 files changed, 91 insertions(+), 9 deletions(-)
> 

Hi Lorenzo,
Please CC bridge maintainers for bridge-related patches, I've added Roopa and the
bridge mailing list as well. Aside from that, the change is certainly interesting, I've been
thinking about a similar helper for some time now, few comments below.

Have you thought about the egress path and if by the current bridge state the packet would
be allowed to egress through the found port from the lookup? I'd guess you have to keep updating
the active ports list based on netlink events, but there's a lot of egress bridge logic that
either have to be duplicated or somehow synced. Check should_deliver() (br_forward.c) and later
egress stages, but I see how this is a good first step and perhaps we can build upon it.
There are a few possible solutions, but I haven't tried anything yet, most obvious being
yet another helper. :)

> diff --git a/net/bridge/br.c b/net/bridge/br.c
> index 1fac72cc617f..d2d1c2341d9c 100644
> --- a/net/bridge/br.c
> +++ b/net/bridge/br.c
> @@ -16,6 +16,8 @@
>  #include <net/llc.h>
>  #include <net/stp.h>
>  #include <net/switchdev.h>
> +#include <linux/btf.h>
> +#include <linux/btf_ids.h>
>  
>  #include "br_private.h"
>  
> @@ -365,6 +367,17 @@ static const struct stp_proto br_stp_proto = {
>  	.rcv	= br_stp_rcv,
>  };
>  
> +#if (IS_ENABLED(CONFIG_DEBUG_INFO_BTF) || IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
> +BTF_SET_START(br_xdp_fdb_check_kfunc_ids)
> +BTF_ID(func, br_fdb_find_port_from_ifindex)
> +BTF_SET_END(br_xdp_fdb_check_kfunc_ids)
> +
> +static const struct btf_kfunc_id_set br_xdp_fdb_kfunc_set = {
> +	.owner     = THIS_MODULE,
> +	.check_set = &br_xdp_fdb_check_kfunc_ids,
> +};
> +#endif
> +
>  static int __init br_init(void)
>  {
>  	int err;
> @@ -417,6 +430,14 @@ static int __init br_init(void)
>  		"need this.\n");
>  #endif
>  
> +#if (IS_ENABLED(CONFIG_DEBUG_INFO_BTF) || IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
> +	err = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &br_xdp_fdb_kfunc_set);
> +	if (err < 0) {
> +		br_netlink_fini();
> +		goto err_out6;

Add err_out7 and handle it there please. Let's keep it consistent.
Also I cannot find register_btf_kfunc_id_set() in net-next or Linus' master, but
should it be paired with an unregister on unload (br_deinit) ?

> +	}
> +#endif
> +
>  	return 0;
>  
>  err_out6:
> diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
> index 6ccda68bd473..cd3afa240298 100644
> --- a/net/bridge/br_fdb.c
> +++ b/net/bridge/br_fdb.c
> @@ -235,30 +235,79 @@ static struct net_bridge_fdb_entry *br_fdb_find(struct net_bridge *br,
>  	return fdb;
>  }
>  
> -struct net_device *br_fdb_find_port(const struct net_device *br_dev,
> -				    const unsigned char *addr,
> -				    __u16 vid)
> +static struct net_device *
> +__br_fdb_find_port(const struct net_device *br_dev,
> +		   const unsigned char *addr,
> +		   __u16 vid, bool ts_update)
>  {
>  	struct net_bridge_fdb_entry *f;
> -	struct net_device *dev = NULL;
>  	struct net_bridge *br;
>  
> -	ASSERT_RTNL();
> -
>  	if (!netif_is_bridge_master(br_dev))
>  		return NULL;
>  
>  	br = netdev_priv(br_dev);
> -	rcu_read_lock();
>  	f = br_fdb_find_rcu(br, addr, vid);
> -	if (f && f->dst)
> -		dev = f->dst->dev;
> +
> +	if (f && f->dst) {
> +		f->updated = jiffies;
> +		f->used = f->updated;

This is wrong, f->updated should be set only if anything changed for the fdb.
Also you can optimize f->used a little bit if you check if jiffies != current value
before setting, you can have millions of packets per sec dirtying that cache line.

Aside from the above, it will change expected behaviour for br_fdb_find_port users
(mlxsw, added Ido to CC as well) because it will mark the fdb as active and refresh it
which should be done only for the ebpf helper, or might be exported through another helper
so ebpf users can decide if they want it updated. There are 2 different use cases and it is
not ok for both as we'll start refreshing fdbs that have been inactive for a while
and would've expired otherwise.

> +		return f->dst->dev;

This is wrong as well, f->dst can become NULL (fdb switched to point to the bridge itself).
You should make sure to read f->dst only once and work with the result. I know it's
been like that, but it was ok when accessed with rtnl held.

> +	}
> +	return NULL;
> +}
> +
> +struct net_device *br_fdb_find_port(const struct net_device *br_dev,
> +				    const unsigned char *addr,
> +				    __u16 vid)
> +{
> +	struct net_device *dev;
> +
> +	ASSERT_RTNL();
> +
> +	rcu_read_lock();
> +	dev = __br_fdb_find_port(br_dev, addr, vid, false);
>  	rcu_read_unlock();
>  
>  	return dev;
>  }
>  EXPORT_SYMBOL_GPL(br_fdb_find_port);
>  
> +int br_fdb_find_port_from_ifindex(struct xdp_md *xdp_ctx,
> +				  struct bpf_fdb_lookup *opt,
> +				  u32 opt__sz)
> +{
> +	struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx;
> +	struct net_bridge_port *port;
> +	struct net_device *dev;
> +	int ret = -ENODEV;
> +
> +	BUILD_BUG_ON(sizeof(struct bpf_fdb_lookup) != NF_BPF_FDB_OPTS_SZ);
> +	if (!opt || opt__sz != sizeof(struct bpf_fdb_lookup))
> +		return -ENODEV;
> +
> +	rcu_read_lock();
> +
> +	dev = dev_get_by_index_rcu(dev_net(ctx->rxq->dev), opt->ifindex);
> +	if (!dev)
> +		goto out;
> +
> +	if (unlikely(!netif_is_bridge_port(dev)))
> +		goto out;

This check shouldn't be needed if the port checks below succeed.

> +
> +	port = br_port_get_check_rcu(dev);
> +	if (unlikely(!port || !port->br))
> +		goto out;
> +
> +	dev = __br_fdb_find_port(port->br->dev, opt->addr, opt->vid, true);
> +	if (dev)
> +		ret = dev->ifindex;
> +out:
> +	rcu_read_unlock();
> +
> +	return ret;
> +}
> +
>  struct net_bridge_fdb_entry *br_fdb_find_rcu(struct net_bridge *br,
>  					     const unsigned char *addr,
>  					     __u16 vid)
> diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
> index 2661dda1a92b..64d4f1727da2 100644
> --- a/net/bridge/br_private.h
> +++ b/net/bridge/br_private.h
> @@ -18,6 +18,7 @@
>  #include <linux/if_vlan.h>
>  #include <linux/rhashtable.h>
>  #include <linux/refcount.h>
> +#include <linux/bpf.h>
>  
>  #define BR_HASH_BITS 8
>  #define BR_HASH_SIZE (1 << BR_HASH_BITS)
> @@ -2094,4 +2095,15 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br,
>  void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br,
>  		       u16 vid, struct net_bridge_port *p, struct nd_msg *msg);
>  struct nd_msg *br_is_nd_neigh_msg(struct sk_buff *skb, struct nd_msg *m);
> +
> +#define NF_BPF_FDB_OPTS_SZ	12
> +struct bpf_fdb_lookup {
> +	u8	addr[ETH_ALEN]; /* ETH_ALEN */
> +	u16	vid;
> +	u32	ifindex;
> +};
> +
> +int br_fdb_find_port_from_ifindex(struct xdp_md *xdp_ctx,
> +				  struct bpf_fdb_lookup *opt,
> +				  u32 opt__sz);
>  #endif

Thanks,
 Nik

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ