[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <ZSZKjDA2ZBAHn5EH@shredder>
Date: Wed, 11 Oct 2023 10:11:08 +0300
From: Ido Schimmel <idosch@...dia.com>
To: Alce Lafranque <alce@...ranque.net>
Cc: "David S. Miller" <davem@...emloft.net>,
Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>,
David Ahern <dsahern@...nel.org>, netdev@...r.kernel.org,
Vincent Bernat <vincent@...nat.ch>
Subject: Re: [PATCH net-next v2] vxlan: add support for flowlabel inherit
Process note: Please post new versions as a separate thread with a
changelog:
https://www.kernel.org/doc/html/latest/process/maintainer-netdev.html#resending-after-review
On Sat, Oct 07, 2023 at 09:26:24AM -0500, Alce Lafranque wrote:
> By default, VXLAN encapsulation over IPv6 sets the flow label to 0, with
> an option for a fixed value. This commits add the ability to inherit the
> flow label from the inner packet, like for other tunnel implementations.
> This enables devices using only L3 headers for ECMP to correctly balance
> VXLAN-encapsulated IPv6 packets.
>
> ```
> $ ./ip/ip link add dummy1 type dummy
> $ ./ip/ip addr add 2001:db8::2/64 dev dummy1
> $ ./ip/ip link set up dev dummy1
> $ ./ip/ip link add vxlan1 type vxlan id 100 flowlabel inherit remote 2001:db8::1 local 2001:db8::2
> $ ./ip/ip link set up dev vxlan1
> $ ./ip/ip addr add 2001:db8:1::2/64 dev vxlan1
> $ ./ip/ip link set arp off dev vxlan1
> $ ping -q 2001:db8:1::1 &
> $ tshark -d udp.port==8472,vxlan -Vpni dummy1 -c1
> [...]
> Internet Protocol Version 6, Src: 2001:db8::2, Dst: 2001:db8::1
> 0110 .... = Version: 6
> .... 0000 0000 .... .... .... .... .... = Traffic Class: 0x00 (DSCP: CS0, ECN: Not-ECT)
> .... 0000 00.. .... .... .... .... .... = Differentiated Services Codepoint: Default (0)
> .... .... ..00 .... .... .... .... .... = Explicit Congestion Notification: Not ECN-Capable Transport (0)
> .... 1011 0001 1010 1111 1011 = Flow Label: 0xb1afb
> [...]
> Virtual eXtensible Local Area Network
> Flags: 0x0800, VXLAN Network ID (VNI)
> Group Policy ID: 0
> VXLAN Network Identifier (VNI): 100
> [...]
> Internet Protocol Version 6, Src: 2001:db8:1::2, Dst: 2001:db8:1::1
> 0110 .... = Version: 6
> .... 0000 0000 .... .... .... .... .... = Traffic Class: 0x00 (DSCP: CS0, ECN: Not-ECT)
> .... 0000 00.. .... .... .... .... .... = Differentiated Services Codepoint: Default (0)
> .... .... ..00 .... .... .... .... .... = Explicit Congestion Notification: Not ECN-Capable Transport (0)
> .... 1011 0001 1010 1111 1011 = Flow Label: 0xb1afb
> ```
>
> Signed-off-by: Alce Lafranque <alce@...ranque.net>
> Co-developed-by: Vincent Bernat <vincent@...nat.ch>
> Signed-off-by: Vincent Bernat <vincent@...nat.ch>
> ---
> drivers/net/vxlan/vxlan_core.c | 15 ++++++++++++++-
> include/net/ip_tunnels.h | 11 +++++++++++
> include/net/vxlan.h | 33 +++++++++++++++++----------------
> include/uapi/linux/if_link.h | 8 ++++++++
> 4 files changed, 50 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
> index 5b5597073b00..d1f2376c0c73 100644
> --- a/drivers/net/vxlan/vxlan_core.c
> +++ b/drivers/net/vxlan/vxlan_core.c
> @@ -2475,7 +2475,14 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
> else
> udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
> #if IS_ENABLED(CONFIG_IPV6)
> - label = vxlan->cfg.label;
> + switch (vxlan->cfg.label_behavior) {
> + case VXLAN_LABEL_FIXED:
> + label = vxlan->cfg.label;
> + break;
> + case VXLAN_LABEL_INHERIT:
> + label = ip_tunnel_get_flowlabel(old_iph, skb);
> + break;
I saw the kbuild robot complaining about this. You can add:
default:
DEBUG_NET_WARN_ON_ONCE(1);
goto drop;
> + }
> #endif
> } else {
> if (!info) {
> @@ -3286,6 +3293,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
> [IFLA_VXLAN_DF] = { .type = NLA_U8 },
> [IFLA_VXLAN_VNIFILTER] = { .type = NLA_U8 },
> [IFLA_VXLAN_LOCALBYPASS] = NLA_POLICY_MAX(NLA_U8, 1),
> + [IFLA_VXLAN_LABEL_BEHAVIOR] = NLA_POLICY_MAX(NLA_U8, VXLAN_LABEL_MAX),
My preference would be IFLA_VXLAN_LABEL_POLICY.
> };
>
> static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
> @@ -4003,6 +4011,9 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
> conf->label = nla_get_be32(data[IFLA_VXLAN_LABEL]) &
> IPV6_FLOWLABEL_MASK;
>
> + if (data[IFLA_VXLAN_LABEL_BEHAVIOR])
> + conf->label_behavior = nla_get_u8(data[IFLA_VXLAN_LABEL_BEHAVIOR]);
There is a check in vxlan_config_validate() that prevents setting a
non-zero flow label when the VXLAN device encapsulates using IPv4. For
consistency it would be good to include a similar check regarding the
flow label policy.
> +
> if (data[IFLA_VXLAN_LEARNING]) {
> err = vxlan_nl2flag(conf, data, IFLA_VXLAN_LEARNING,
> VXLAN_F_LEARN, changelink, true,
> @@ -4315,6 +4326,7 @@ static size_t vxlan_get_size(const struct net_device *dev)
> nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TOS */
> nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_DF */
> nla_total_size(sizeof(__be32)) + /* IFLA_VXLAN_LABEL */
> + nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LABEL_BEHAVIOR */
> nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LEARNING */
> nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_PROXY */
> nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_RSC */
> @@ -4387,6 +4399,7 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
> nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) ||
> nla_put_u8(skb, IFLA_VXLAN_DF, vxlan->cfg.df) ||
> nla_put_be32(skb, IFLA_VXLAN_LABEL, vxlan->cfg.label) ||
> + nla_put_u8(skb, IFLA_VXLAN_LABEL_BEHAVIOR, vxlan->cfg.label_behavior) ||
> nla_put_u8(skb, IFLA_VXLAN_LEARNING,
> !!(vxlan->cfg.flags & VXLAN_F_LEARN)) ||
> nla_put_u8(skb, IFLA_VXLAN_PROXY,
> diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
> index f346b4efbc30..2d746f4c9a0a 100644
> --- a/include/net/ip_tunnels.h
> +++ b/include/net/ip_tunnels.h
> @@ -416,6 +416,17 @@ static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph,
> return 0;
> }
>
> +static inline __be32 ip_tunnel_get_flowlabel(const struct iphdr *iph,
> + const struct sk_buff *skb)
> +{
> + __be16 payload_protocol = skb_protocol(skb, true);
> +
> + if (payload_protocol == htons(ETH_P_IPV6))
> + return ip6_flowlabel((const struct ipv6hdr *)iph);
> + else
> + return 0;
> +}
> +
> static inline u8 ip_tunnel_get_ttl(const struct iphdr *iph,
> const struct sk_buff *skb)
> {
> diff --git a/include/net/vxlan.h b/include/net/vxlan.h
> index 6a9f8a5f387c..9ccbc8b7b8f9 100644
> --- a/include/net/vxlan.h
> +++ b/include/net/vxlan.h
> @@ -210,22 +210,23 @@ struct vxlan_rdst {
> };
>
> struct vxlan_config {
> - union vxlan_addr remote_ip;
> - union vxlan_addr saddr;
> - __be32 vni;
> - int remote_ifindex;
> - int mtu;
> - __be16 dst_port;
> - u16 port_min;
> - u16 port_max;
> - u8 tos;
> - u8 ttl;
> - __be32 label;
> - u32 flags;
> - unsigned long age_interval;
> - unsigned int addrmax;
> - bool no_share;
> - enum ifla_vxlan_df df;
> + union vxlan_addr remote_ip;
> + union vxlan_addr saddr;
> + __be32 vni;
> + int remote_ifindex;
> + int mtu;
> + __be16 dst_port;
> + u16 port_min;
> + u16 port_max;
> + u8 tos;
> + u8 ttl;
> + __be32 label;
> + enum ifla_vxlan_label_behavior label_behavior;
> + u32 flags;
> + unsigned long age_interval;
> + unsigned int addrmax;
> + bool no_share;
> + enum ifla_vxlan_df df;
> };
>
> enum {
> diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
> index fac351a93aed..13afc4afcc76 100644
> --- a/include/uapi/linux/if_link.h
> +++ b/include/uapi/linux/if_link.h
> @@ -830,6 +830,7 @@ enum {
> IFLA_VXLAN_DF,
> IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */
> IFLA_VXLAN_LOCALBYPASS,
> + IFLA_VXLAN_LABEL_BEHAVIOR,
> __IFLA_VXLAN_MAX
> };
> #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
> @@ -847,6 +848,13 @@ enum ifla_vxlan_df {
> VXLAN_DF_MAX = __VXLAN_DF_END - 1,
> };
>
> +enum ifla_vxlan_label_behavior {
> + VXLAN_LABEL_FIXED = 0,
> + VXLAN_LABEL_INHERIT = 1,
> + __VXLAN_LABEL_END,
> + VXLAN_LABEL_MAX = __VXLAN_LABEL_END - 1,
> +};
> +
> /* GENEVE section */
> enum {
> IFLA_GENEVE_UNSPEC,
> --
> 2.39.2
>
Powered by blists - more mailing lists