lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ef77b43d-0b5a-9e5f-640a-5e3c981bd642@blackwall.org>
Date: Thu, 13 Jul 2023 12:33:19 +0300
From: Nikolay Aleksandrov <razor@...ckwall.org>
To: Ido Schimmel <idosch@...dia.com>, netdev@...r.kernel.org,
 bridge@...ts.linux-foundation.org
Cc: davem@...emloft.net, kuba@...nel.org, pabeni@...hat.com,
 edumazet@...gle.com, roopa@...dia.com, dsahern@...il.com, petrm@...dia.com,
 taspelund@...dia.com
Subject: Re: [RFC PATCH net-next 3/4] bridge: Add backup nexthop ID support

On 13/07/2023 10:09, Ido Schimmel wrote:
> Add a new bridge port attribute that allows attaching a nexthop object
> ID to an skb that is redirected to a backup bridge port with VLAN
> tunneling enabled.
> 
> Specifically, when redirecting a known unicast packet, read the backup
> nexthop ID from the bridge port that lost its carrier and set it in the
> bridge control block of the skb before forwarding it via the backup
> port. Note that reading the ID from the bridge port should not result in
> a cache miss as the ID is added next to the 'backup_port' field that was
> already accessed. After this change, the 'state' field still stays on
> the first cache line, together with other data path related fields such
> as 'flags and 'vlgrp':
> 
> struct net_bridge_port {
>          struct net_bridge *        br;                   /*     0     8 */
>          struct net_device *        dev;                  /*     8     8 */
>          netdevice_tracker          dev_tracker;          /*    16     0 */
>          struct list_head           list;                 /*    16    16 */
>          long unsigned int          flags;                /*    32     8 */
>          struct net_bridge_vlan_group * vlgrp;            /*    40     8 */
>          struct net_bridge_port *   backup_port;          /*    48     8 */
>          u32                        backup_nhid;          /*    56     4 */
>          u8                         priority;             /*    60     1 */
>          u8                         state;                /*    61     1 */
>          u16                        port_no;              /*    62     2 */
>          /* --- cacheline 1 boundary (64 bytes) --- */
> [...]
> } __attribute__((__aligned__(8)));
> 
> When forwarding an skb via a bridge port that has VLAN tunneling
> enabled, check if the backup nexthop ID stored in the bridge control
> block is valid (i.e., not zero). If so, instead of attaching the
> pre-allocated metadata (that only has the tunnel key set), allocate a
> new metadata, set both the tunnel key and the nexthop object ID and
> attach it to the skb.
> 
> By default, do not dump the new attribute to user space as a value of
> zero is an invalid nexthop object ID.
> 
> The above is useful for EVPN multihoming. When one of the links
> composing an Ethernet Segment (ES) fails, traffic needs to be redirected
> towards the host via one of the other ES peers. For example, if a host
> is multihomed to three different VTEPs, the backup port of each ES link
> needs to be set to the VXLAN device and the backup nexthop ID needs to
> point to an FDB nexthop group that includes the IP addresses of the
> other two VTEPs. The VXLAN driver will extract the ID from the metadata
> of the redirected skb, calculate its flow hash and forward it towards
> one of the other VTEPs. If the ID does not exist, or represents an
> invalid nexthop object, the VXLAN driver will drop the skb. This
> relieves the bridge driver from the need to validate the ID.
> 
> Signed-off-by: Ido Schimmel <idosch@...dia.com>
> ---
>   include/uapi/linux/if_link.h |  1 +
>   net/bridge/br_forward.c      |  1 +
>   net/bridge/br_netlink.c      | 12 ++++++++++++
>   net/bridge/br_private.h      |  3 +++
>   net/bridge/br_vlan_tunnel.c  | 15 +++++++++++++++
>   net/core/rtnetlink.c         |  2 +-
>   6 files changed, 33 insertions(+), 1 deletion(-)
> 

One comment below, with that fixed you can add
Acked-by: Nikolay Aleksandrov <razor@...ckwall.org>

> diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
> index 0f6a0fe09bdb..ce3117df9cec 100644
> --- a/include/uapi/linux/if_link.h
> +++ b/include/uapi/linux/if_link.h
> @@ -570,6 +570,7 @@ enum {
>   	IFLA_BRPORT_MCAST_N_GROUPS,
>   	IFLA_BRPORT_MCAST_MAX_GROUPS,
>   	IFLA_BRPORT_NEIGH_VLAN_SUPPRESS,
> +	IFLA_BRPORT_BACKUP_NHID,
>   	__IFLA_BRPORT_MAX
>   };
>   #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
> diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
> index 6116eba1bd89..9d7bc8b96b53 100644
> --- a/net/bridge/br_forward.c
> +++ b/net/bridge/br_forward.c
> @@ -154,6 +154,7 @@ void br_forward(const struct net_bridge_port *to,
>   		backup_port = rcu_dereference(to->backup_port);
>   		if (unlikely(!backup_port))
>   			goto out;
> +		BR_INPUT_SKB_CB(skb)->backup_nhid = READ_ONCE(to->backup_nhid);
>   		to = backup_port;
>   	}
>   
> diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
> index 05c5863d2e20..10f0d33d8ccf 100644
> --- a/net/bridge/br_netlink.c
> +++ b/net/bridge/br_netlink.c
> @@ -211,6 +211,7 @@ static inline size_t br_port_info_size(void)
>   		+ nla_total_size(sizeof(u8))	/* IFLA_BRPORT_MRP_IN_OPEN */
>   		+ nla_total_size(sizeof(u32))	/* IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT */
>   		+ nla_total_size(sizeof(u32))	/* IFLA_BRPORT_MCAST_EHT_HOSTS_CNT */
> +		+ nla_total_size(sizeof(u32))	/* IFLA_BRPORT_BACKUP_NHID */
>   		+ 0;
>   }
>   
> @@ -319,6 +320,10 @@ static int br_port_fill_attrs(struct sk_buff *skb,
>   			    backup_p->dev->ifindex);
>   	rcu_read_unlock();
>   
> +	if (p->backup_nhid &&
> +	    nla_put_u32(skb, IFLA_BRPORT_BACKUP_NHID, p->backup_nhid))
> +		return -EMSGSIZE;
> +

READ_ONCE(), see the comment above backup_port :)

>   	return 0;
>   }
>   
> @@ -895,6 +900,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
>   	[IFLA_BRPORT_MCAST_N_GROUPS] = { .type = NLA_REJECT },
>   	[IFLA_BRPORT_MCAST_MAX_GROUPS] = { .type = NLA_U32 },
>   	[IFLA_BRPORT_NEIGH_VLAN_SUPPRESS] = NLA_POLICY_MAX(NLA_U8, 1),
> +	[IFLA_BRPORT_BACKUP_NHID] = { .type = NLA_U32 },
>   };
>   
>   /* Change the state of the port and notify spanning tree */
> @@ -1065,6 +1071,12 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[],
>   			return err;
>   	}
>   
> +	if (tb[IFLA_BRPORT_BACKUP_NHID]) {
> +		u32 backup_nhid = nla_get_u32(tb[IFLA_BRPORT_BACKUP_NHID]);
> +
> +		WRITE_ONCE(p->backup_nhid, backup_nhid);
> +	}
> +
>   	return 0;
>   }
>   
> diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
> index a63b32c1638e..05a965ef76f1 100644
> --- a/net/bridge/br_private.h
> +++ b/net/bridge/br_private.h
> @@ -387,6 +387,7 @@ struct net_bridge_port {
>   	struct net_bridge_vlan_group	__rcu *vlgrp;
>   #endif
>   	struct net_bridge_port		__rcu *backup_port;
> +	u32				backup_nhid;
>   
>   	/* STP */
>   	u8				priority;
> @@ -605,6 +606,8 @@ struct br_input_skb_cb {
>   	 */
>   	unsigned long fwd_hwdoms;
>   #endif
> +
> +	u32 backup_nhid;
>   };
>   
>   #define BR_INPUT_SKB_CB(__skb)	((struct br_input_skb_cb *)(__skb)->cb)
> diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c
> index 6399a8a69d07..81833ca7a2c7 100644
> --- a/net/bridge/br_vlan_tunnel.c
> +++ b/net/bridge/br_vlan_tunnel.c
> @@ -201,6 +201,21 @@ int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
>   	if (err)
>   		return err;
>   
> +	if (BR_INPUT_SKB_CB(skb)->backup_nhid) {
> +		tunnel_dst = __ip_tun_set_dst(0, 0, 0, 0, 0, TUNNEL_KEY,
> +					      tunnel_id, 0);
> +		if (!tunnel_dst)
> +			return -ENOMEM;
> +
> +		tunnel_dst->u.tun_info.mode |= IP_TUNNEL_INFO_TX |
> +					       IP_TUNNEL_INFO_BRIDGE;
> +		tunnel_dst->u.tun_info.key.nhid =
> +			BR_INPUT_SKB_CB(skb)->backup_nhid;
> +		skb_dst_set(skb, &tunnel_dst->dst);
> +
> +		return 0;
> +	}
> +
>   	tunnel_dst = rcu_dereference(vlan->tinfo.tunnel_dst);
>   	if (tunnel_dst && dst_hold_safe(&tunnel_dst->dst))
>   		skb_dst_set(skb, &tunnel_dst->dst);
> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
> index 3ad4e030846d..9e7e3377ec10 100644
> --- a/net/core/rtnetlink.c
> +++ b/net/core/rtnetlink.c
> @@ -61,7 +61,7 @@
>   #include "dev.h"
>   
>   #define RTNL_MAX_TYPE		50
> -#define RTNL_SLAVE_MAX_TYPE	43
> +#define RTNL_SLAVE_MAX_TYPE	44
>   
>   struct rtnl_link {
>   	rtnl_doit_func		doit;


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ