[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <63e664707f792_1b03a820836@willemb.c.googlers.com.notmuch>
Date: Fri, 10 Feb 2023 10:36:16 -0500
From: Willem de Bruijn <willemdebruijn.kernel@...il.com>
To: "Michael S. Tsirkin" <mst@...hat.com>,
沈安琪(凛玥) <amy.saq@...group.com>
Cc: netdev@...r.kernel.org, willemdebruijn.kernel@...il.com,
davem@...emloft.net, jasowang@...hat.com,
谈鉴锋 <henry.tjf@...group.com>
Subject: Re: [PATCH 2/2] net/packet: send and receive pkt with given
vnet_hdr_sz
Michael S. Tsirkin wrote:
> On Fri, Feb 10, 2023 at 12:01:03PM +0800, 沈安琪(凛玥) wrote:
> >
> > 在 2023/2/9 下午9:07, Michael S. Tsirkin 写道:
> > > On Thu, Feb 09, 2023 at 08:43:15PM +0800, 沈安琪(凛玥) wrote:
> > > > From: "Jianfeng Tan" <henry.tjf@...group.com>
> > > >
> > > > When raw socket is used as the backend for kernel vhost, currently it
> > > > will regard the virtio net header as 10-byte, which is not always the
> > > > case since some virtio features need virtio net header other than
> > > > 10-byte, such as mrg_rxbuf and VERSION_1 that both need 12-byte virtio
> > > > net header.
> > > >
> > > > Instead of hardcoding virtio net header length to 10 bytes, tpacket_snd,
> > > > tpacket_rcv, packet_snd and packet_recvmsg now get the virtio net header
> > > > size that is recorded in packet_sock to indicate the exact virtio net
> > > > header size that virtio user actually prepares in the packets. By doing
> > > > so, it can fix the issue of incorrect mac header parsing when these
> > > > virtio features that need virtio net header other than 10-byte are
> > > > enable.
> > > >
> > > > Signed-off-by: Jianfeng Tan <henry.tjf@...group.com>
> > > > Co-developed-by: Anqi Shen <amy.saq@...group.com>
> > > > Signed-off-by: Anqi Shen <amy.saq@...group.com>
> > > Does it handle VERSION_1 though? That one is also LE.
> > > Would it be better to pass a features bitmap instead?
> >
> >
> > Thanks for quick reply!
> >
> > I am a little confused abot what "LE" presents here?
>
> LE == little_endian.
> Little endian format.
>
> > For passing a features bitmap to af_packet here, our consideration is
> > whether it will be too complicated for af_packet to understand the virtio
> > features bitmap in order to get the vnet header size. For now, all the
> > virtio features stuff is handled by vhost worker and af_packet actually does
> > not need to know much about virtio features. Would it be better if we keep
> > the virtio feature stuff in user-level and let user-level tell af_packet how
> > much space it should reserve?
>
> Presumably, we'd add an API in include/linux/virtio_net.h ?
If packet sockets do not act on the contents of these extended fields,
it's probably better to leave them opaque.
This patch series probably should be one patch. The new option in the
first patch modifies the data path. Now there is one SHA1 at which its
behavior would not work.
>
> > >
> > >
> > > > ---
> > > > net/packet/af_packet.c | 48 +++++++++++++++++++++++++++++++++---------------
> > > > 1 file changed, 33 insertions(+), 15 deletions(-)
> > > >
> > > > diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> > > > index ab37baf..4f49939 100644
> > > > --- a/net/packet/af_packet.c
> > > > +++ b/net/packet/af_packet.c
> > > > @@ -2092,18 +2092,25 @@ static unsigned int run_filter(struct sk_buff *skb,
> > > > }
> > > > static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb,
> > > > - size_t *len)
> > > > + size_t *len, int vnet_hdr_sz)
> > > > {
> > > > struct virtio_net_hdr vnet_hdr;
> > > > + int ret;
> > > > - if (*len < sizeof(vnet_hdr))
> > > > + if (*len < vnet_hdr_sz)
> > > > return -EINVAL;
> > > > - *len -= sizeof(vnet_hdr);
> > > > + *len -= vnet_hdr_sz;
> > > > if (virtio_net_hdr_from_skb(skb, &vnet_hdr, vio_le(), true, 0))
> > > > return -EINVAL;
> > > > - return memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr));
> > > > + ret = memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr));
> > > > +
> > > > + /* reserve space for extra info in vnet_hdr if needed */
> > > > + if (ret == 0)
> > > > + iov_iter_advance(&msg->msg_iter, vnet_hdr_sz - sizeof(vnet_hdr));
> > > > +
How about
struct virtio_net_hdr_mrg_rxbuf vnet_hdr { .num_buffers = 0 };
..
ret = memcpy_to_msg(msg, (void *)&vnet_hdr, vnet_hdr_sz);
To avoid the iov_iter_advance and properly initialize those bytes.
> > > > + return ret;
> > > > }
> > > > /*
> > > > @@ -2311,7 +2318,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
> > > > (maclen < 16 ? 16 : maclen)) +
> > > > po->tp_reserve;
> > > > if (po->has_vnet_hdr) {
> > > > - netoff += sizeof(struct virtio_net_hdr);
> > > > + netoff += po->vnet_hdr_sz;
> > > > do_vnet = true;
> > > > }
> > > > macoff = netoff - maclen;
> > > > @@ -2552,16 +2559,23 @@ static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len)
> > > > }
> > > > static int packet_snd_vnet_parse(struct msghdr *msg, size_t *len,
> > > > - struct virtio_net_hdr *vnet_hdr)
> > > > + struct virtio_net_hdr *vnet_hdr, int vnet_hdr_sz)
> > > > {
> > > > - if (*len < sizeof(*vnet_hdr))
> > > > + int ret;
> > > > +
> > > > + if (*len < vnet_hdr_sz)
> > > > return -EINVAL;
> > > > - *len -= sizeof(*vnet_hdr);
> > > > + *len -= vnet_hdr_sz;
> > > > if (!copy_from_iter_full(vnet_hdr, sizeof(*vnet_hdr), &msg->msg_iter))
> > > > return -EFAULT;
> > > > - return __packet_snd_vnet_parse(vnet_hdr, *len);
> > > > + ret = __packet_snd_vnet_parse(vnet_hdr, *len);
> > > > +
> > > > + /* move iter to point to the start of mac header */
> > > > + if (ret == 0)
> > > > + iov_iter_advance(&msg->msg_iter, vnet_hdr_sz - sizeof(struct virtio_net_hdr));
> > > > + return ret;
> > > > }
> > > > static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
> > > > @@ -2730,6 +2744,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
> > > > int status = TP_STATUS_AVAILABLE;
> > > > int hlen, tlen, copylen = 0;
> > > > long timeo = 0;
> > > > + int vnet_hdr_sz;
> > > > mutex_lock(&po->pg_vec_lock);
> > > > @@ -2811,8 +2826,9 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
> > > > tlen = dev->needed_tailroom;
> > > > if (po->has_vnet_hdr) {
> > > > vnet_hdr = data;
> > > > - data += sizeof(*vnet_hdr);
> > > > - tp_len -= sizeof(*vnet_hdr);
> > > > + vnet_hdr_sz = po->vnet_hdr_sz;
> > > > + data += vnet_hdr_sz;
> > > > + tp_len -= vnet_hdr_sz;
> > > > if (tp_len < 0 ||
> > > > __packet_snd_vnet_parse(vnet_hdr, tp_len)) {
> > > > tp_len = -EINVAL;
> > > > @@ -2947,6 +2963,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
> > > > int offset = 0;
> > > > struct packet_sock *po = pkt_sk(sk);
> > > > bool has_vnet_hdr = false;
> > > > + int vnet_hdr_sz;
> > > > int hlen, tlen, linear;
> > > > int extra_len = 0;
> > > > @@ -2991,7 +3008,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
> > > > if (sock->type == SOCK_RAW)
> > > > reserve = dev->hard_header_len;
> > > > if (po->has_vnet_hdr) {
> > > > - err = packet_snd_vnet_parse(msg, &len, &vnet_hdr);
> > > > + vnet_hdr_sz = po->vnet_hdr_sz;
> > > > + err = packet_snd_vnet_parse(msg, &len, &vnet_hdr, vnet_hdr_sz);
> > > > if (err)
> > > > goto out_unlock;
> > > > has_vnet_hdr = true;
> > > > @@ -3068,7 +3086,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
> > > > err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le());
> > > > if (err)
> > > > goto out_free;
> > > > - len += sizeof(vnet_hdr);
> > > > + len += vnet_hdr_sz;
> > > > virtio_net_hdr_set_proto(skb, &vnet_hdr);
> > > > }
> > > > @@ -3452,10 +3470,10 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
> > > > packet_rcv_try_clear_pressure(pkt_sk(sk));
> > > > if (pkt_sk(sk)->has_vnet_hdr) {
> > > > - err = packet_rcv_vnet(msg, skb, &len);
> > > > + vnet_hdr_len = pkt_sk(sk)->vnet_hdr_sz;
> > > > + err = packet_rcv_vnet(msg, skb, &len, vnet_hdr_len);
> > > > if (err)
> > > > goto out_free;
> > > > - vnet_hdr_len = sizeof(struct virtio_net_hdr);
> > > > }
> > > > /* You lose any data beyond the buffer you gave. If it worries
> > > > --
> > > > 1.8.3.1
>
Powered by blists - more mailing lists