[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CACGkMEsx-pcwC=_-cMMMdGZ=E5P-5W=4YoivMQiy=FB1W7GKog@mail.gmail.com>
Date: Wed, 18 Jun 2025 12:08:22 +0800
From: Jason Wang <jasowang@...hat.com>
To: Paolo Abeni <pabeni@...hat.com>
Cc: netdev@...r.kernel.org, Willem de Bruijn <willemdebruijn.kernel@...il.com>,
Andrew Lunn <andrew+netdev@...n.ch>, "David S. Miller" <davem@...emloft.net>,
Eric Dumazet <edumazet@...gle.com>, Jakub Kicinski <kuba@...nel.org>,
"Michael S. Tsirkin" <mst@...hat.com>, Xuan Zhuo <xuanzhuo@...ux.alibaba.com>,
Eugenio Pérez <eperezma@...hat.com>,
Yuri Benditovich <yuri.benditovich@...nix.com>, Akihiko Odaki <akihiko.odaki@...nix.com>
Subject: Re: [PATCH v4 net-next 5/8] net: implement virtio helpers to handle
UDP GSO tunneling.
On Wed, Jun 18, 2025 at 12:13 AM Paolo Abeni <pabeni@...hat.com> wrote:
>
> The virtio specification are introducing support for GSO over UDP
> tunnel.
>
> This patch brings in the needed defines and the additional virtio hdr
> parsing/building helpers.
>
> The UDP tunnel support uses additional fields in the virtio hdr, and such
> fields location can change depending on other negotiated features -
> specifically VIRTIO_NET_F_HASH_REPORT.
>
> Try to be as conservative as possible with the new field validation.
>
> Existing implementation for plain GSO offloads allow for invalid/
> self-contradictory values of such fields. With GSO over UDP tunnel we can
> be more strict, with no need to deal with legacy implementation.
>
> Since the checksum-related field validation is asymmetric in the driver
> and in the device, introduce a separate helper to implement the new checks
> (to be used only on the driver side).
>
> Note that while the feature space exceeds the 64-bit boundaries, the
> guest offload space is fixed by the specification of the
> VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET command to a 64-bit size.
>
> Prior to the UDP tunnel GSO support, each guest offload bit corresponded
> to the feature bit with the same value and vice versa.
>
> Due to the limited 'guest offload' space, relevant features in the high
> 64 bits are 'mapped' to free bits in the lower range. That is simpler
> than defining a new command (and associated features) to exchange an
> extended guest offloads set.
>
> As a consequence, the uAPIs also specify the mapped guest offload value
> corresponding to the UDP tunnel GSO features.
>
> Signed-off-by: Paolo Abeni <pabeni@...hat.com>
> --
> v3 -> v4:
> - fixed offset for UDP GSO tunnel, update accordingly the helpers
> - tried to clarified vlan_hlen semantic
> - virtio_net_chk_data_valid() -> virtio_net_handle_csum_offload()
>
> v2 -> v3:
> - add definitions for possible vnet hdr layouts with tunnel support
>
> v1 -> v2:
> - 'relay' -> 'rely' typo
> - less unclear comment WRT enforced inner GSO checks
> - inner header fields are allowed only with 'modern' virtio,
> thus are always le
> - clarified in the commit message the need for 'mapped features'
> defines
> - assume little_endian is true when UDP GSO is enabled.
> - fix inner proto type value
> ---
> include/linux/virtio_net.h | 196 ++++++++++++++++++++++++++++++--
> include/uapi/linux/virtio_net.h | 33 ++++++
> 2 files changed, 221 insertions(+), 8 deletions(-)
>
> diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
> index 02a9f4dc594d..449487c914a8 100644
> --- a/include/linux/virtio_net.h
> +++ b/include/linux/virtio_net.h
> @@ -47,9 +47,9 @@ static inline int virtio_net_hdr_set_proto(struct sk_buff *skb,
> return 0;
> }
>
> -static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
> - const struct virtio_net_hdr *hdr,
> - bool little_endian)
> +static inline int __virtio_net_hdr_to_skb(struct sk_buff *skb,
> + const struct virtio_net_hdr *hdr,
> + bool little_endian, u8 hdr_gso_type)
> {
> unsigned int nh_min_len = sizeof(struct iphdr);
> unsigned int gso_type = 0;
> @@ -57,8 +57,8 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
> unsigned int p_off = 0;
> unsigned int ip_proto;
>
> - if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
> - switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
> + if (hdr_gso_type != VIRTIO_NET_HDR_GSO_NONE) {
> + switch (hdr_gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
> case VIRTIO_NET_HDR_GSO_TCPV4:
> gso_type = SKB_GSO_TCPV4;
> ip_proto = IPPROTO_TCP;
> @@ -84,7 +84,7 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
> return -EINVAL;
> }
>
> - if (hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN)
> + if (hdr_gso_type & VIRTIO_NET_HDR_GSO_ECN)
> gso_type |= SKB_GSO_TCP_ECN;
>
> if (hdr->gso_size == 0)
> @@ -122,7 +122,7 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
>
> if (!protocol)
> virtio_net_hdr_set_proto(skb, hdr);
> - else if (!virtio_net_hdr_match_proto(protocol, hdr->gso_type))
> + else if (!virtio_net_hdr_match_proto(protocol, hdr_gso_type))
> return -EINVAL;
> else
> skb->protocol = protocol;
> @@ -153,7 +153,7 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
> }
> }
>
> - if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
> + if (hdr_gso_type != VIRTIO_NET_HDR_GSO_NONE) {
> u16 gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size);
> unsigned int nh_off = p_off;
> struct skb_shared_info *shinfo = skb_shinfo(skb);
> @@ -199,6 +199,13 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
> return 0;
> }
>
> +static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
> + const struct virtio_net_hdr *hdr,
> + bool little_endian)
> +{
> + return __virtio_net_hdr_to_skb(skb, hdr, little_endian, hdr->gso_type);
> +}
> +
> static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
> struct virtio_net_hdr *hdr,
> bool little_endian,
> @@ -242,4 +249,177 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
> return 0;
> }
>
> +static inline unsigned int virtio_l3min(bool is_ipv6)
> +{
> + return is_ipv6 ? sizeof(struct ipv6hdr) : sizeof(struct iphdr);
> +}
> +
> +static inline int
> +virtio_net_hdr_tnl_to_skb(struct sk_buff *skb,
> + const struct virtio_net_hdr_v1_hash_tunnel *vhdr,
> + bool tnl_hdr_negotiated,
> + bool tnl_csum_negotiated,
> + bool little_endian)
> +{
> + const struct virtio_net_hdr *hdr = (const struct virtio_net_hdr *)vhdr;
> + unsigned int inner_nh, outer_th, inner_th;
> + unsigned int inner_l3min, outer_l3min;
> + u8 gso_inner_type, gso_tunnel_type;
> + bool outer_isv6, inner_isv6;
> + int ret;
> +
> + gso_tunnel_type = hdr->gso_type & VIRTIO_NET_HDR_GSO_UDP_TUNNEL;
> + if (!gso_tunnel_type)
> + return virtio_net_hdr_to_skb(skb, hdr, little_endian);
> +
> + /* Tunnel not supported/negotiated, but the hdr asks for it. */
> + if (!tnl_hdr_negotiated)
> + return -EINVAL;
> +
> + /* Either ipv4 or ipv6. */
> + if (gso_tunnel_type == VIRTIO_NET_HDR_GSO_UDP_TUNNEL)
> + return -EINVAL;
> +
> + /* The UDP tunnel must carry a GSO packet, but no UFO. */
> + gso_inner_type = hdr->gso_type & ~(VIRTIO_NET_HDR_GSO_ECN |
> + VIRTIO_NET_HDR_GSO_UDP_TUNNEL);
> + if (!gso_inner_type || gso_inner_type == VIRTIO_NET_HDR_GSO_UDP)
> + return -EINVAL;
> +
> + /* Rely on csum being present. */
> + if (!(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM))
> + return -EINVAL;
> +
> + /* Validate offsets. */
> + outer_isv6 = gso_tunnel_type & VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6;
> + inner_isv6 = gso_inner_type == VIRTIO_NET_HDR_GSO_TCPV6;
> + inner_l3min = virtio_l3min(inner_isv6);
> + outer_l3min = ETH_HLEN + virtio_l3min(outer_isv6);
> +
> + inner_th = __virtio16_to_cpu(little_endian, hdr->csum_start);
> + inner_nh = le16_to_cpu(vhdr->inner_nh_offset);
> + outer_th = le16_to_cpu(vhdr->outer_th_offset);
> + if (outer_th < outer_l3min ||
> + inner_nh < outer_th + sizeof(struct udphdr) ||
> + inner_th < inner_nh + inner_l3min)
> + return -EINVAL;
> +
> + /* Let the basic parsing deal with plain GSO features. */
> + ret = __virtio_net_hdr_to_skb(skb, hdr, true,
> + hdr->gso_type & ~gso_tunnel_type);
> + if (ret)
> + return ret;
> +
> + /* In case of USO, the inner protocol is still unknown and
> + * `inner_isv6` is just a guess, additional parsing is needed.
> + * The previous validation ensures that accessing an ipv4 inner
> + * network header is safe.
> + */
> + if (gso_inner_type == VIRTIO_NET_HDR_GSO_UDP_L4) {
> + struct iphdr *iphdr = (struct iphdr *)(skb->data + inner_nh);
> +
> + inner_isv6 = iphdr->version == 6;
> + inner_l3min = virtio_l3min(inner_isv6);
> + if (inner_th < inner_nh + inner_l3min)
> + return -EINVAL;
> + }
> +
> + skb_set_inner_protocol(skb, inner_isv6 ? htons(ETH_P_IPV6) :
> + htons(ETH_P_IP));
> + if (hdr->flags & VIRTIO_NET_HDR_F_UDP_TUNNEL_CSUM) {
> + if (!tnl_csum_negotiated)
> + return -EINVAL;
> +
> + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
> + } else {
> + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
> + }
> +
> + skb->inner_transport_header = inner_th + skb_headroom(skb);
> + skb->inner_network_header = inner_nh + skb_headroom(skb);
> + skb->inner_mac_header = inner_nh + skb_headroom(skb);
> + skb->transport_header = outer_th + skb_headroom(skb);
> + skb->encapsulation = 1;
> + return 0;
> +}
> +
> +/* Checksum-related fields validation for the driver */
> +static inline int virtio_net_handle_csum_offload(struct sk_buff *skb,
> + struct virtio_net_hdr *hdr,
> + bool tnl_csum_negotiated)
> +{
> + if (!(hdr->gso_type & VIRTIO_NET_HDR_GSO_UDP_TUNNEL)) {
> + if (!(hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID))
> + return 0;
> +
> + skb->ip_summed = CHECKSUM_UNNECESSARY;
> + if (!(hdr->flags & VIRTIO_NET_HDR_F_UDP_TUNNEL_CSUM))
> + return 0;
> +
> + /* tunnel csum packets are invalid when the related
> + * feature has not been negotiated
> + */
> + if (!tnl_csum_negotiated)
> + return -EINVAL;
> + skb->csum_level = 1;
> + return 0;
> + }
> +
> + /* DATA_VALID is mutually exclusive with NEEDS_CSUM, and GSO
> + * over UDP tunnel requires the latter
> + */
> + if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID)
> + return -EINVAL;
> + return 0;
> +}
> +
> +/*
> + * vlan_hlen always refers to the outermost MAC header. That also
> + * means it refers to the only MAC header, if the packet does not carry
> + * any encapsulation.
> + */
> +static inline int
> +virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb,
> + struct virtio_net_hdr_v1_hash_tunnel *vhdr,
> + bool tnl_hdr_negotiated,
> + bool little_endian,
Nit: it looks to me we can just accept netdev_features_t here.
Others look good.
Acked-by: Jason Wang <jasowang@...hat.com>
Thanks
Powered by blists - more mailing lists