[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <cd046e93a9783be5944cf15974afa534c94fb15e.camel@redhat.com>
Date: Tue, 05 Jul 2022 09:33:16 +0200
From: Paolo Abeni <pabeni@...hat.com>
To: Andrea Mayer <andrea.mayer@...roma2.it>,
"David S. Miller" <davem@...emloft.net>,
Hideaki YOSHIFUJI <yoshfuji@...ux-ipv6.org>,
David Ahern <dsahern@...nel.org>,
Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>,
Shuah Khan <shuah@...nel.org>,
Anton Makarov <anton.makarov11235@...il.com>,
linux-kernel@...r.kernel.org, netdev@...r.kernel.org,
linux-kselftest@...r.kernel.org
Cc: Stefano Salsano <stefano.salsano@...roma2.it>,
Paolo Lungaroni <paolo.lungaroni@...roma2.it>,
Ahmed Abdelsalam <ahabdels.dev@...il.com>
Subject: Re: [net-next v4 1/4] seg6: add support for SRv6 H.Encaps.Red
behavior
On Fri, 2022-07-01 at 17:01 +0200, Andrea Mayer wrote:
> The SRv6 H.Encaps.Red behavior described in [1] is an optimization of
> the SRv6 H.Encaps behavior [2].
>
> H.Encaps.Red reduces the length of the SRH by excluding the first
> segment (SID) in the SRH of the pushed IPv6 header. The first SID is
> only placed in the IPv6 Destination Address field of the pushed IPv6
> header.
> When the SRv6 Policy only contains one SID the SRH is omitted, unless
> there is an HMAC TLV to be carried.
>
> [1] - https://datatracker.ietf.org/doc/html/rfc8986#section-5.2
> [2] - https://datatracker.ietf.org/doc/html/rfc8986#section-5.1
>
> Signed-off-by: Andrea Mayer <andrea.mayer@...roma2.it>
> Signed-off-by: Anton Makarov <anton.makarov11235@...il.com>
> ---
> include/uapi/linux/seg6_iptunnel.h | 1 +
> net/ipv6/seg6_iptunnel.c | 126 ++++++++++++++++++++++++++++-
> 2 files changed, 126 insertions(+), 1 deletion(-)
>
> diff --git a/include/uapi/linux/seg6_iptunnel.h b/include/uapi/linux/seg6_iptunnel.h
> index eb815e0d0ac3..538152a7b2c3 100644
> --- a/include/uapi/linux/seg6_iptunnel.h
> +++ b/include/uapi/linux/seg6_iptunnel.h
> @@ -35,6 +35,7 @@ enum {
> SEG6_IPTUN_MODE_INLINE,
> SEG6_IPTUN_MODE_ENCAP,
> SEG6_IPTUN_MODE_L2ENCAP,
> + SEG6_IPTUN_MODE_ENCAP_RED,
> };
>
> #endif
> diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
> index d64855010948..4942073650d3 100644
> --- a/net/ipv6/seg6_iptunnel.c
> +++ b/net/ipv6/seg6_iptunnel.c
> @@ -36,6 +36,7 @@ static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo)
> case SEG6_IPTUN_MODE_INLINE:
> break;
> case SEG6_IPTUN_MODE_ENCAP:
> + case SEG6_IPTUN_MODE_ENCAP_RED:
> head = sizeof(struct ipv6hdr);
> break;
> case SEG6_IPTUN_MODE_L2ENCAP:
> @@ -195,6 +196,122 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
> }
> EXPORT_SYMBOL_GPL(seg6_do_srh_encap);
>
> +/* encapsulate an IPv6 packet within an outer IPv6 header with reduced SRH */
> +static int seg6_do_srh_encap_red(struct sk_buff *skb,
> + struct ipv6_sr_hdr *osrh, int proto)
> +{
> + __u8 first_seg = osrh->first_segment;
> + struct dst_entry *dst = skb_dst(skb);
> + struct net *net = dev_net(dst->dev);
> + struct ipv6hdr *hdr, *inner_hdr;
> + int hdrlen = ipv6_optlen(osrh);
> + int red_tlv_offset, tlv_offset;
> + struct ipv6_sr_hdr *isrh;
> + bool skip_srh = false;
> + __be32 flowlabel;
> + int tot_len, err;
> + int red_hdrlen;
> + int tlvs_len;
> +
> + if (first_seg > 0) {
> + red_hdrlen = hdrlen - sizeof(struct in6_addr);
> + } else {
> + /* NOTE: if tag/flags and/or other TLVs are introduced in the
> + * seg6_iptunnel infrastructure, they should be considered when
> + * deciding to skip the SRH.
> + */
> + skip_srh = !sr_has_hmac(osrh);
> +
> + red_hdrlen = skip_srh ? 0 : hdrlen;
> + }
> +
> + tot_len = red_hdrlen + sizeof(struct ipv6hdr);
> +
> + err = skb_cow_head(skb, tot_len + skb->mac_len);
> + if (unlikely(err))
> + return err;
> +
> + inner_hdr = ipv6_hdr(skb);
> + flowlabel = seg6_make_flowlabel(net, skb, inner_hdr);
> +
> + skb_push(skb, tot_len);
> + skb_reset_network_header(skb);
> + skb_mac_header_rebuild(skb);
> + hdr = ipv6_hdr(skb);
> +
> + /* based on seg6_do_srh_encap() */
> + if (skb->protocol == htons(ETH_P_IPV6)) {
> + ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
> + flowlabel);
> + hdr->hop_limit = inner_hdr->hop_limit;
> + } else {
> + ip6_flow_hdr(hdr, 0, flowlabel);
> + hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
> +
> + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
> + IP6CB(skb)->iif = skb->skb_iif;
> + }
> +
> + /* no matter if we have to skip the SRH or not, the first segment
> + * always comes in the pushed IPv6 header.
> + */
> + hdr->daddr = osrh->segments[first_seg];
> +
> + if (skip_srh) {
> + hdr->nexthdr = proto;
> +
> + set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr);
> + goto out;
> + }
> +
> + /* we cannot skip the SRH, slow path */
> +
> + hdr->nexthdr = NEXTHDR_ROUTING;
> + isrh = (void *)hdr + sizeof(struct ipv6hdr);
> +
> + if (unlikely(!first_seg)) {
> + /* this is a very rare case; we have only one SID but
> + * we cannot skip the SRH since we are carrying some
> + * other info.
> + */
> + memcpy(isrh, osrh, hdrlen);
> + goto srcaddr;
> + }
> +
> + tlv_offset = sizeof(*osrh) + (first_seg + 1) * sizeof(struct in6_addr);
> + red_tlv_offset = tlv_offset - sizeof(struct in6_addr);
> +
> + memcpy(isrh, osrh, red_tlv_offset);
> +
> + tlvs_len = hdrlen - tlv_offset;
> + if (unlikely(tlvs_len > 0)) {
> + const void *s = (const void *)osrh + tlv_offset;
> + void *d = (void *)isrh + red_tlv_offset;
> +
> + memcpy(d, s, tlvs_len);
> + }
> +
> + --isrh->first_segment;
> + isrh->hdrlen -= 2;
> +
> +srcaddr:
> + isrh->nexthdr = proto;
> + set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr);
> +
> +#ifdef CONFIG_IPV6_SEG6_HMAC
> + if (unlikely(!skip_srh && sr_has_hmac(isrh))) {
> + err = seg6_push_hmac(net, &hdr->saddr, isrh);
> + if (unlikely(err))
> + return err;
> + }
> +#endif
> +
> +out:
> + skb_postpush_rcsum(skb, hdr, tot_len);
It looks like, at this point hdr->payload_len is not initialized yet -
it will be set later by the caller. So the above will corrupt the
checksum complete.
I think the solution is moving 'payload_len' initialization before the
csum update. Note that 'seg6_do_srh_encap' has a similar issue.
Paolo
Powered by blists - more mailing lists