netdev - Re: [net-next v4 1/4] seg6: add support for SRv6 H.Encaps.Red behavior

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <cd046e93a9783be5944cf15974afa534c94fb15e.camel@redhat.com>
Date:   Tue, 05 Jul 2022 09:33:16 +0200
From:   Paolo Abeni <pabeni@...hat.com>
To:     Andrea Mayer <andrea.mayer@...roma2.it>,
        "David S. Miller" <davem@...emloft.net>,
        Hideaki YOSHIFUJI <yoshfuji@...ux-ipv6.org>,
        David Ahern <dsahern@...nel.org>,
        Eric Dumazet <edumazet@...gle.com>,
        Jakub Kicinski <kuba@...nel.org>,
        Shuah Khan <shuah@...nel.org>,
        Anton Makarov <anton.makarov11235@...il.com>,
        linux-kernel@...r.kernel.org, netdev@...r.kernel.org,
        linux-kselftest@...r.kernel.org
Cc:     Stefano Salsano <stefano.salsano@...roma2.it>,
        Paolo Lungaroni <paolo.lungaroni@...roma2.it>,
        Ahmed Abdelsalam <ahabdels.dev@...il.com>
Subject: Re: [net-next v4 1/4] seg6: add support for SRv6 H.Encaps.Red
 behavior

On Fri, 2022-07-01 at 17:01 +0200, Andrea Mayer wrote:
> The SRv6 H.Encaps.Red behavior described in [1] is an optimization of
> the SRv6 H.Encaps behavior [2].
> 
> H.Encaps.Red reduces the length of the SRH by excluding the first
> segment (SID) in the SRH of the pushed IPv6 header. The first SID is
> only placed in the IPv6 Destination Address field of the pushed IPv6
> header.
> When the SRv6 Policy only contains one SID the SRH is omitted, unless
> there is an HMAC TLV to be carried.
> 
> [1] - https://datatracker.ietf.org/doc/html/rfc8986#section-5.2
> [2] - https://datatracker.ietf.org/doc/html/rfc8986#section-5.1
> 
> Signed-off-by: Andrea Mayer <andrea.mayer@...roma2.it>
> Signed-off-by: Anton Makarov <anton.makarov11235@...il.com>
> ---
>  include/uapi/linux/seg6_iptunnel.h |   1 +
>  net/ipv6/seg6_iptunnel.c           | 126 ++++++++++++++++++++++++++++-
>  2 files changed, 126 insertions(+), 1 deletion(-)
> 
> diff --git a/include/uapi/linux/seg6_iptunnel.h b/include/uapi/linux/seg6_iptunnel.h
> index eb815e0d0ac3..538152a7b2c3 100644
> --- a/include/uapi/linux/seg6_iptunnel.h
> +++ b/include/uapi/linux/seg6_iptunnel.h
> @@ -35,6 +35,7 @@ enum {
>  	SEG6_IPTUN_MODE_INLINE,
>  	SEG6_IPTUN_MODE_ENCAP,
>  	SEG6_IPTUN_MODE_L2ENCAP,
> +	SEG6_IPTUN_MODE_ENCAP_RED,
>  };
>  
>  #endif
> diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
> index d64855010948..4942073650d3 100644
> --- a/net/ipv6/seg6_iptunnel.c
> +++ b/net/ipv6/seg6_iptunnel.c
> @@ -36,6 +36,7 @@ static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo)
>  	case SEG6_IPTUN_MODE_INLINE:
>  		break;
>  	case SEG6_IPTUN_MODE_ENCAP:
> +	case SEG6_IPTUN_MODE_ENCAP_RED:
>  		head = sizeof(struct ipv6hdr);
>  		break;
>  	case SEG6_IPTUN_MODE_L2ENCAP:
> @@ -195,6 +196,122 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
>  }
>  EXPORT_SYMBOL_GPL(seg6_do_srh_encap);
>  
> +/* encapsulate an IPv6 packet within an outer IPv6 header with reduced SRH */
> +static int seg6_do_srh_encap_red(struct sk_buff *skb,
> +				 struct ipv6_sr_hdr *osrh, int proto)
> +{
> +	__u8 first_seg = osrh->first_segment;
> +	struct dst_entry *dst = skb_dst(skb);
> +	struct net *net = dev_net(dst->dev);
> +	struct ipv6hdr *hdr, *inner_hdr;
> +	int hdrlen = ipv6_optlen(osrh);
> +	int red_tlv_offset, tlv_offset;
> +	struct ipv6_sr_hdr *isrh;
> +	bool skip_srh = false;
> +	__be32 flowlabel;
> +	int tot_len, err;
> +	int red_hdrlen;
> +	int tlvs_len;
> +
> +	if (first_seg > 0) {
> +		red_hdrlen = hdrlen - sizeof(struct in6_addr);
> +	} else {
> +		/* NOTE: if tag/flags and/or other TLVs are introduced in the
> +		 * seg6_iptunnel infrastructure, they should be considered when
> +		 * deciding to skip the SRH.
> +		 */
> +		skip_srh = !sr_has_hmac(osrh);
> +
> +		red_hdrlen = skip_srh ? 0 : hdrlen;
> +	}
> +
> +	tot_len = red_hdrlen + sizeof(struct ipv6hdr);
> +
> +	err = skb_cow_head(skb, tot_len + skb->mac_len);
> +	if (unlikely(err))
> +		return err;
> +
> +	inner_hdr = ipv6_hdr(skb);
> +	flowlabel = seg6_make_flowlabel(net, skb, inner_hdr);
> +
> +	skb_push(skb, tot_len);
> +	skb_reset_network_header(skb);
> +	skb_mac_header_rebuild(skb);
> +	hdr = ipv6_hdr(skb);
> +
> +	/* based on seg6_do_srh_encap() */
> +	if (skb->protocol == htons(ETH_P_IPV6)) {
> +		ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
> +			     flowlabel);
> +		hdr->hop_limit = inner_hdr->hop_limit;
> +	} else {
> +		ip6_flow_hdr(hdr, 0, flowlabel);
> +		hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
> +
> +		memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
> +		IP6CB(skb)->iif = skb->skb_iif;
> +	}
> +
> +	/* no matter if we have to skip the SRH or not, the first segment
> +	 * always comes in the pushed IPv6 header.
> +	 */
> +	hdr->daddr = osrh->segments[first_seg];
> +
> +	if (skip_srh) {
> +		hdr->nexthdr = proto;
> +
> +		set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr);
> +		goto out;
> +	}
> +
> +	/* we cannot skip the SRH, slow path */
> +
> +	hdr->nexthdr = NEXTHDR_ROUTING;
> +	isrh = (void *)hdr + sizeof(struct ipv6hdr);
> +
> +	if (unlikely(!first_seg)) {
> +		/* this is a very rare case; we have only one SID but
> +		 * we cannot skip the SRH since we are carrying some
> +		 * other info.
> +		 */
> +		memcpy(isrh, osrh, hdrlen);
> +		goto srcaddr;
> +	}
> +
> +	tlv_offset = sizeof(*osrh) + (first_seg + 1) * sizeof(struct in6_addr);
> +	red_tlv_offset = tlv_offset - sizeof(struct in6_addr);
> +
> +	memcpy(isrh, osrh, red_tlv_offset);
> +
> +	tlvs_len = hdrlen - tlv_offset;
> +	if (unlikely(tlvs_len > 0)) {
> +		const void *s = (const void *)osrh + tlv_offset;
> +		void *d = (void *)isrh + red_tlv_offset;
> +
> +		memcpy(d, s, tlvs_len);
> +	}
> +
> +	--isrh->first_segment;
> +	isrh->hdrlen -= 2;
> +
> +srcaddr:
> +	isrh->nexthdr = proto;
> +	set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr);
> +
> +#ifdef CONFIG_IPV6_SEG6_HMAC
> +	if (unlikely(!skip_srh && sr_has_hmac(isrh))) {
> +		err = seg6_push_hmac(net, &hdr->saddr, isrh);
> +		if (unlikely(err))
> +			return err;
> +	}
> +#endif
> +
> +out:
> +	skb_postpush_rcsum(skb, hdr, tot_len);

It looks like, at this point hdr->payload_len is not initialized yet -
it will be set later by the caller. So the above will corrupt the
checksum complete.

I think the solution is moving 'payload_len' initialization before the
csum update. Note that 'seg6_do_srh_encap' has a similar issue.

Paolo