[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <63fcdaf7e3e9d_1684422084b@willemb.c.googlers.com.notmuch>
Date: Mon, 27 Feb 2023 11:31:51 -0500
From: Willem de Bruijn <willemdebruijn.kernel@...il.com>
To: 沈安琪(凛玥) <amy.saq@...group.com>,
netdev@...r.kernel.org
Cc: willemdebruijn.kernel@...il.com, mst@...hat.com,
davem@...emloft.net, jasowang@...hat.com,
谈鉴锋 <henry.tjf@...group.com>,
沈安琪(凛玥) <amy.saq@...group.com>
Subject: RE: [PATCH v2] net/packet: support mergeable feautre of virtio
沈安琪(凛玥) wrote:
> From: Jianfeng Tan <henry.tjf@...group.com>
>
> Packet sockets, like tap, can be used as the backend for kernel vhost.
> In packet sockets, virtio net header size is currently hardcoded to be
> the size of struct virtio_net_hdr, which is 10 bytes; however, it is not
> always the case: some virtio features, such as mrg_rxbuf, need virtio
> net header to be 12-byte long.
>
> Mergeable buffers, as a virtio feature, is worthy to support: packets
> that larger than one-mbuf size will be dropped in vhost worker's
> handle_rx if mrg_rxbuf feature is not used, but large packets
> cannot be avoided and increasing mbuf's size is not economical.
>
> With this virtio feature enabled, packet sockets with hardcoded 10-byte
> virtio net header will parse mac head incorrectly in packet_snd by taking
> the last two bytes of virtio net header as part of mac header as well.
> This incorrect mac header parsing will cause packet be dropped due to
> invalid ether head checking in later under-layer device packet receiving.
>
> By adding extra field vnet_hdr_sz with utilizing holes in struct
> packet_sock to record current using virtio net header size and supporting
> extra sockopt PACKET_VNET_HDR_SZ to set specified vnet_hdr_sz, packet
> sockets can know the exact length of virtio net header that virtio user
> gives.
> In packet_snd, tpacket_snd and packet_recvmsg, instead of using hardcode
> virtio net header size, it can get the exact vnet_hdr_sz from corresponding
> packet_sock, and parse mac header correctly based on this information to
> avoid the packets being mistakenly dropped.
>
> Signed-off-by: Jianfeng Tan <henry.tjf@...group.com>
> Co-developed-by: Anqi Shen <amy.saq@...group.com>
> Signed-off-by: Anqi Shen <amy.saq@...group.com>
net-next is closed
> @@ -2311,7 +2312,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
> (maclen < 16 ? 16 : maclen)) +
> po->tp_reserve;
> if (po->has_vnet_hdr) {
> - netoff += sizeof(struct virtio_net_hdr);
> + netoff += po->vnet_hdr_sz;
> do_vnet = true;
> }
> macoff = netoff - maclen;
> @@ -2552,16 +2553,23 @@ static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len)
> }
>
> static int packet_snd_vnet_parse(struct msghdr *msg, size_t *len,
> - struct virtio_net_hdr *vnet_hdr)
> + struct virtio_net_hdr *vnet_hdr, int vnet_hdr_sz)
> {
> - if (*len < sizeof(*vnet_hdr))
> + int ret;
> +
> + if (*len < vnet_hdr_sz)
> return -EINVAL;
> - *len -= sizeof(*vnet_hdr);
> + *len -= vnet_hdr_sz;
>
> if (!copy_from_iter_full(vnet_hdr, sizeof(*vnet_hdr), &msg->msg_iter))
> return -EFAULT;
>
> - return __packet_snd_vnet_parse(vnet_hdr, *len);
> + ret = __packet_snd_vnet_parse(vnet_hdr, *len);
> +
> + /* move iter to point to the start of mac header */
> + if (ret == 0)
> + iov_iter_advance(&msg->msg_iter, vnet_hdr_sz - sizeof(struct virtio_net_hdr));
> + return ret;
Let's make the error path the exception
if (ret)
return ret;
And maybe avoid calling iov_iter_advance if vnet_hdr_sz == sizeof(*vnet_hdr)
> case PACKET_VNET_HDR:
> + case PACKET_VNET_HDR_SZ:
> {
> int val;
> + int hdr_len = 0;
>
> if (sock->type != SOCK_RAW)
> return -EINVAL;
> @@ -3931,11 +3945,23 @@ static void packet_flush_mclist(struct sock *sk)
> if (copy_from_sockptr(&val, optval, sizeof(val)))
> return -EFAULT;
>
> + if (optname == PACKET_VNET_HDR_SZ) {
> + if (val != sizeof(struct virtio_net_hdr) &&
> + val != sizeof(struct virtio_net_hdr_mrg_rxbuf))
> + return -EINVAL;
> + hdr_len = val;
> + }
> +
} else {
hdr_len = sizeof(struct virtio_net_hdr);
}
> lock_sock(sk);
> if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
> ret = -EBUSY;
> } else {
> - po->has_vnet_hdr = !!val;
> + if (optname == PACKET_VNET_HDR) {
> + po->has_vnet_hdr = !!val;
> + if (po->has_vnet_hdr)
> + hdr_len = sizeof(struct virtio_net_hdr);
> + }
> + po->vnet_hdr_sz = hdr_len;
then this is not needed
> ret = 0;
> }
> release_sock(sk);
> @@ -4070,6 +4096,9 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
> case PACKET_VNET_HDR:
> val = po->has_vnet_hdr;
> break;
> + case PACKET_VNET_HDR_SZ:
> + val = po->vnet_hdr_sz;
> + break;
> case PACKET_VERSION:
> val = po->tp_version;
> break;
> diff --git a/net/packet/internal.h b/net/packet/internal.h
> index 48af35b..e27b47d 100644
> --- a/net/packet/internal.h
> +++ b/net/packet/internal.h
> @@ -121,7 +121,8 @@ struct packet_sock {
> origdev:1,
> has_vnet_hdr:1,
> tp_loss:1,
> - tp_tx_has_off:1;
> + tp_tx_has_off:1,
> + vnet_hdr_sz:8; /* vnet header size should use */
has_vnet_hdr is no longer needed when adding vnet_hdr_sz. removing that simplifies the code
drop the comment. That is quite self explanatory from the variable name.
> int pressure;
> int ifindex; /* bound device */
> __be16 num;
> --
> 1.8.3.1
>
Powered by blists - more mailing lists