[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <18391efa-3328-4e3f-ad9c-d278ea128095@kernel.org>
Date: Wed, 25 Oct 2023 18:19:15 -0600
From: David Ahern <dsahern@...nel.org>
To: Alce Lafranque <alce@...ranque.net>, "David S. Miller"
<davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>,
Ido Schimmel <idosch@...dia.com>, netdev@...r.kernel.org
Cc: Vincent Bernat <vincent@...nat.ch>
Subject: Re: [PATCH net-next v7] vxlan: add support for flowlabel inherit
On 10/24/23 10:50 AM, Alce Lafranque wrote:
> By default, VXLAN encapsulation over IPv6 sets the flow label to 0, with
> an option for a fixed value. This commits add the ability to inherit the
> flow label from the inner packet, like for other tunnel implementations.
> This enables devices using only L3 headers for ECMP to correctly balance
> VXLAN-encapsulated IPv6 packets.
>
> ```
> $ ./ip/ip link add dummy1 type dummy
> $ ./ip/ip addr add 2001:db8::2/64 dev dummy1
> $ ./ip/ip link set up dev dummy1
> $ ./ip/ip link add vxlan1 type vxlan id 100 flowlabel inherit remote 2001:db8::1 local 2001:db8::2
> $ ./ip/ip link set up dev vxlan1
> $ ./ip/ip addr add 2001:db8:1::2/64 dev vxlan1
> $ ./ip/ip link set arp off dev vxlan1
> $ ping -q 2001:db8:1::1 &
> $ tshark -d udp.port==8472,vxlan -Vpni dummy1 -c1
> [...]
> Internet Protocol Version 6, Src: 2001:db8::2, Dst: 2001:db8::1
> 0110 .... = Version: 6
> .... 0000 0000 .... .... .... .... .... = Traffic Class: 0x00 (DSCP: CS0, ECN: Not-ECT)
> .... 0000 00.. .... .... .... .... .... = Differentiated Services Codepoint: Default (0)
> .... .... ..00 .... .... .... .... .... = Explicit Congestion Notification: Not ECN-Capable Transport (0)
> .... 1011 0001 1010 1111 1011 = Flow Label: 0xb1afb
> [...]
> Virtual eXtensible Local Area Network
> Flags: 0x0800, VXLAN Network ID (VNI)
> Group Policy ID: 0
> VXLAN Network Identifier (VNI): 100
> [...]
> Internet Protocol Version 6, Src: 2001:db8:1::2, Dst: 2001:db8:1::1
> 0110 .... = Version: 6
> .... 0000 0000 .... .... .... .... .... = Traffic Class: 0x00 (DSCP: CS0, ECN: Not-ECT)
> .... 0000 00.. .... .... .... .... .... = Differentiated Services Codepoint: Default (0)
> .... .... ..00 .... .... .... .... .... = Explicit Congestion Notification: Not ECN-Capable Transport (0)
> .... 1011 0001 1010 1111 1011 = Flow Label: 0xb1afb
> ```
>
> Signed-off-by: Alce Lafranque <alce@...ranque.net>
> Co-developed-by: Vincent Bernat <vincent@...nat.ch>
> Signed-off-by: Vincent Bernat <vincent@...nat.ch>
> Reviewed-by: Ido Schimmel <idosch@...dia.com>
>
> ---
> v7:
> - Rebase patch
> v6:
> - Rebase patch
> v5: https://lore.kernel.org/netdev/20231019180417.210523-1-alce@lafranque.net/
> - Rollback policy label to fixed by default
> v4: https://lore.kernel.org/all/20231014132102.54051-1-alce@lafranque.net/
> - Fix tabs
> v3: https://lore.kernel.org/all/20231014131320.51810-1-alce@lafranque.net/
> - Adopt policy label inherit by default
> - Set policy to label fixed when flowlabel is set
> - Rename IFLA_VXLAN_LABEL_BEHAVIOR to IFLA_VXLAN_LABEL_POLICY
> v2: https://lore.kernel.org/all/20231007142624.739192-1-alce@lafranque.net/
> - Use an enum instead of flag to define label behavior
> v1: https://lore.kernel.org/all/4444C5AE-FA5A-49A4-9700-7DD9D7916C0F.1@mail.lac-coloc.fr/
> ---
> drivers/net/vxlan/vxlan_core.c | 23 ++++++++++++++++++++++-
> include/net/ip_tunnels.h | 11 +++++++++++
> include/net/vxlan.h | 33 +++++++++++++++++----------------
> include/uapi/linux/if_link.h | 8 ++++++++
> 4 files changed, 58 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
> index 7b526ae16ed0..821f8c4de784 100644
> --- a/drivers/net/vxlan/vxlan_core.c
> +++ b/drivers/net/vxlan/vxlan_core.c
> @@ -2379,7 +2379,17 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
> else
> udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
> #if IS_ENABLED(CONFIG_IPV6)
> - key.label = vxlan->cfg.label;
> + switch (vxlan->cfg.label_policy) {
> + case VXLAN_LABEL_FIXED:
> + key.label = vxlan->cfg.label;
> + break;
> + case VXLAN_LABEL_INHERIT:
> + key.label = ip_tunnel_get_flowlabel(old_iph, skb);
> + break;
> + default:
> + DEBUG_NET_WARN_ON_ONCE(1);
> + goto drop;
> + }
> #endif
> } else {
> if (!info) {
> @@ -3365,6 +3375,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
> [IFLA_VXLAN_DF] = { .type = NLA_U8 },
> [IFLA_VXLAN_VNIFILTER] = { .type = NLA_U8 },
> [IFLA_VXLAN_LOCALBYPASS] = NLA_POLICY_MAX(NLA_U8, 1),
> + [IFLA_VXLAN_LABEL_POLICY] = NLA_POLICY_MAX(NLA_U8, VXLAN_LABEL_MAX),
> };
>
> static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
> @@ -3739,6 +3750,12 @@ static int vxlan_config_validate(struct net *src_net, struct vxlan_config *conf,
> return -EINVAL;
> }
>
> + if (conf->label_policy && !use_ipv6) {
> + NL_SET_ERR_MSG(extack,
> + "Label policy only applies to IPv6 VXLAN devices");
> + return -EINVAL;
> + }
> +
> if (conf->remote_ifindex) {
> struct net_device *lowerdev;
>
> @@ -4081,6 +4098,8 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
> if (data[IFLA_VXLAN_LABEL])
> conf->label = nla_get_be32(data[IFLA_VXLAN_LABEL]) &
> IPV6_FLOWLABEL_MASK;
> + if (data[IFLA_VXLAN_LABEL_POLICY])
> + conf->label_policy = nla_get_u8(data[IFLA_VXLAN_LABEL_POLICY]);
>
> if (data[IFLA_VXLAN_LEARNING]) {
> err = vxlan_nl2flag(conf, data, IFLA_VXLAN_LEARNING,
> @@ -4398,6 +4417,7 @@ static size_t vxlan_get_size(const struct net_device *dev)
> nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TOS */
> nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_DF */
> nla_total_size(sizeof(__be32)) + /* IFLA_VXLAN_LABEL */
> + nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LABEL_POLICY */
> nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LEARNING */
> nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_PROXY */
> nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_RSC */
> @@ -4470,6 +4490,7 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
> nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) ||
> nla_put_u8(skb, IFLA_VXLAN_DF, vxlan->cfg.df) ||
> nla_put_be32(skb, IFLA_VXLAN_LABEL, vxlan->cfg.label) ||
> + nla_put_u8(skb, IFLA_VXLAN_LABEL_POLICY, vxlan->cfg.label_policy) ||
> nla_put_u8(skb, IFLA_VXLAN_LEARNING,
> !!(vxlan->cfg.flags & VXLAN_F_LEARN)) ||
> nla_put_u8(skb, IFLA_VXLAN_PROXY,
> diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
> index f346b4efbc30..2d746f4c9a0a 100644
> --- a/include/net/ip_tunnels.h
> +++ b/include/net/ip_tunnels.h
> @@ -416,6 +416,17 @@ static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph,
> return 0;
> }
>
> +static inline __be32 ip_tunnel_get_flowlabel(const struct iphdr *iph,
> + const struct sk_buff *skb)
> +{
> + __be16 payload_protocol = skb_protocol(skb, true);
> +
> + if (payload_protocol == htons(ETH_P_IPV6))
> + return ip6_flowlabel((const struct ipv6hdr *)iph);
> + else
> + return 0;
> +}
> +
> static inline u8 ip_tunnel_get_ttl(const struct iphdr *iph,
> const struct sk_buff *skb)
> {
> diff --git a/include/net/vxlan.h b/include/net/vxlan.h
> index 6a9f8a5f387c..33ba6fc151cf 100644
> --- a/include/net/vxlan.h
> +++ b/include/net/vxlan.h
> @@ -210,22 +210,23 @@ struct vxlan_rdst {
> };
>
> struct vxlan_config {
> - union vxlan_addr remote_ip;
> - union vxlan_addr saddr;
> - __be32 vni;
> - int remote_ifindex;
> - int mtu;
> - __be16 dst_port;
> - u16 port_min;
> - u16 port_max;
> - u8 tos;
> - u8 ttl;
> - __be32 label;
> - u32 flags;
> - unsigned long age_interval;
> - unsigned int addrmax;
> - bool no_share;
> - enum ifla_vxlan_df df;
> + union vxlan_addr remote_ip;
> + union vxlan_addr saddr;
> + __be32 vni;
> + int remote_ifindex;
> + int mtu;
> + __be16 dst_port;
> + u16 port_min;
> + u16 port_max;
> + u8 tos;
> + u8 ttl;
> + __be32 label;
> + enum ifla_vxlan_label_policy label_policy;
> + u32 flags;
> + unsigned long age_interval;
> + unsigned int addrmax;
> + bool no_share;
> + enum ifla_vxlan_df df;
> };
>
> enum {
> diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
> index 9f8a3da0f14f..f71c195b7abf 100644
> --- a/include/uapi/linux/if_link.h
> +++ b/include/uapi/linux/if_link.h
> @@ -832,6 +832,7 @@ enum {
> IFLA_VXLAN_DF,
> IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */
> IFLA_VXLAN_LOCALBYPASS,
> + IFLA_VXLAN_LABEL_POLICY,
If you do a respin, a comment here would be good. e.g.,
IFLA_VXLAN_LABEL_POLICY, /* IPv6 flow label policy; see
ifla_vxlan_label_policy */
> __IFLA_VXLAN_MAX
> };
> #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
> @@ -849,6 +850,13 @@ enum ifla_vxlan_df {
> VXLAN_DF_MAX = __VXLAN_DF_END - 1,
> };
>
> +enum ifla_vxlan_label_policy {
> + VXLAN_LABEL_FIXED = 0,
> + VXLAN_LABEL_INHERIT = 1,
> + __VXLAN_LABEL_END,
> + VXLAN_LABEL_MAX = __VXLAN_LABEL_END - 1,
> +};
> +
> /* GENEVE section */
> enum {
> IFLA_GENEVE_UNSPEC,
Reviewed-by: David Ahern <dsahern@...nel.org>
Powered by blists - more mailing lists