netdev - Re: [PATCH net-next v4 3/4] net: vhost: factor out busy polling logic to vhost_net_busy

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAMDZJNViR1X7QBOn9kjGHa8WaaGe4skqa+K9q_4GMYZVSTE59w@mail.gmail.com>
Date:   Wed, 4 Jul 2018 09:40:31 +0800
From:   Tonghao Zhang <xiangxia.m.yue@...il.com>
To:     jasowang@...hat.com
Cc:     mst@...hat.com, makita.toshiaki@....ntt.co.jp,
        virtualization@...ts.linux-foundation.org,
        Linux Kernel Network Developers <netdev@...r.kernel.org>,
        Tonghao Zhang <zhangtonghao@...ichuxing.com>
Subject: Re: [PATCH net-next v4 3/4] net: vhost: factor out busy polling logic
 to vhost_net_busy_poll()

On Tue, Jul 3, 2018 at 1:55 PM Jason Wang <jasowang@...hat.com> wrote:
>
>
>
> On 2018年07月03日 13:48, Tonghao Zhang wrote:
> > On Tue, Jul 3, 2018 at 10:12 AM Jason Wang <jasowang@...hat.com> wrote:
> >>
> >>
> >> On 2018年07月02日 20:57, xiangxia.m.yue@...il.com wrote:
> >>> From: Tonghao Zhang <xiangxia.m.yue@...il.com>
> >>>
> >>> Factor out generic busy polling logic and will be
> >>> used for in tx path in the next patch. And with the patch,
> >>> qemu can set differently the busyloop_timeout for rx queue.
> >>>
> >>> Signed-off-by: Tonghao Zhang <zhangtonghao@...ichuxing.com>
> >>> ---
> >>>    drivers/vhost/net.c | 94 +++++++++++++++++++++++++++++++----------------------
> >>>    1 file changed, 55 insertions(+), 39 deletions(-)
> >>>
> >>> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> >>> index 62bb8e8..2790959 100644
> >>> --- a/drivers/vhost/net.c
> >>> +++ b/drivers/vhost/net.c
> >>> @@ -429,6 +429,52 @@ static int vhost_net_enable_vq(struct vhost_net *n,
> >>>        return vhost_poll_start(poll, sock->file);
> >>>    }
> >>>
> >>> +static int sk_has_rx_data(struct sock *sk)
> >>> +{
> >>> +     struct socket *sock = sk->sk_socket;
> >>> +
> >>> +     if (sock->ops->peek_len)
> >>> +             return sock->ops->peek_len(sock);
> >>> +
> >>> +     return skb_queue_empty(&sk->sk_receive_queue);
> >>> +}
> >>> +
> >>> +static void vhost_net_busy_poll(struct vhost_net *net,
> >>> +                             struct vhost_virtqueue *rvq,
> >>> +                             struct vhost_virtqueue *tvq,
> >>> +                             bool rx)
> >>> +{
> >>> +     unsigned long uninitialized_var(endtime);
> >>> +     unsigned long busyloop_timeout;
> >>> +     struct socket *sock;
> >>> +     struct vhost_virtqueue *vq = rx ? tvq : rvq;
> >>> +
> >>> +     mutex_lock_nested(&vq->mutex, rx ? VHOST_NET_VQ_TX: VHOST_NET_VQ_RX);
> >>> +
> >>> +     vhost_disable_notify(&net->dev, vq);
> >>> +     sock = rvq->private_data;
> >>> +     busyloop_timeout = rx ? rvq->busyloop_timeout : tvq->busyloop_timeout;
> >>> +
> >>> +     preempt_disable();
> >>> +     endtime = busy_clock() + busyloop_timeout;
> >>> +     while (vhost_can_busy_poll(tvq->dev, endtime) &&
> >>> +            !(sock && sk_has_rx_data(sock->sk)) &&
> >>> +            vhost_vq_avail_empty(tvq->dev, tvq))
> >>> +             cpu_relax();
> >>> +     preempt_enable();
> >>> +
> >>> +     if ((rx && !vhost_vq_avail_empty(&net->dev, vq)) ||
> >>> +         (!rx && (sock && sk_has_rx_data(sock->sk)))) {
> >>> +             vhost_poll_queue(&vq->poll);
> >>> +     } else if (unlikely(vhost_enable_notify(&net->dev, vq))) {
> >> One last question, do we need this for rx? This check will be always
> >> true under light or medium load.
> > will remove the 'unlikely'
>
> Not only the unlikely. We only want rx kick when packet is pending at
> socket but we're out of available buffers. This is not the case here
> (you have polled the vq above).
>
> So we probably want to do this only when rx == true.
got it. change it to:
+       } else if (vhost_enable_notify(&net->dev, vq) && rx) {
+               vhost_disable_notify(&net->dev, vq);
+               vhost_poll_queue(&vq->poll);
+       }

> Thanks
>
> >
> >> Thanks
> >>
> >>> +             vhost_disable_notify(&net->dev, vq);
> >>> +             vhost_poll_queue(&vq->poll);
> >>> +     }
> >>> +
> >>> +     mutex_unlock(&vq->mutex);
> >>> +}
> >>> +
> >>> +
> >>>    static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
> >>>                                    struct vhost_virtqueue *vq,
> >>>                                    struct iovec iov[], unsigned int iov_size,
> >>> @@ -621,16 +667,6 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk)
> >>>        return len;
> >>>    }
> >>>
> >>> -static int sk_has_rx_data(struct sock *sk)
> >>> -{
> >>> -     struct socket *sock = sk->sk_socket;
> >>> -
> >>> -     if (sock->ops->peek_len)
> >>> -             return sock->ops->peek_len(sock);
> >>> -
> >>> -     return skb_queue_empty(&sk->sk_receive_queue);
> >>> -}
> >>> -
> >>>    static void vhost_rx_signal_used(struct vhost_net_virtqueue *nvq)
> >>>    {
> >>>        struct vhost_virtqueue *vq = &nvq->vq;
> >>> @@ -645,39 +681,19 @@ static void vhost_rx_signal_used(struct vhost_net_virtqueue *nvq)
> >>>
> >>>    static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk)
> >>>    {
> >>> -     struct vhost_net_virtqueue *rvq = &net->vqs[VHOST_NET_VQ_RX];
> >>> -     struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
> >>> -     struct vhost_virtqueue *vq = &nvq->vq;
> >>> -     unsigned long uninitialized_var(endtime);
> >>> -     int len = peek_head_len(rvq, sk);
> >>> +     struct vhost_net_virtqueue *rnvq = &net->vqs[VHOST_NET_VQ_RX];
> >>> +     struct vhost_net_virtqueue *tnvq = &net->vqs[VHOST_NET_VQ_TX];
> >>>
> >>> -     if (!len && vq->busyloop_timeout) {
> >>> -             /* Flush batched heads first */
> >>> -             vhost_rx_signal_used(rvq);
> >>> -             /* Both tx vq and rx socket were polled here */
> >>> -             mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_TX);
> >>> -             vhost_disable_notify(&net->dev, vq);
> >>> +     int len = peek_head_len(rnvq, sk);
> >>>
> >>> -             preempt_disable();
> >>> -             endtime = busy_clock() + vq->busyloop_timeout;
> >>> -
> >>> -             while (vhost_can_busy_poll(&net->dev, endtime) &&
> >>> -                    !sk_has_rx_data(sk) &&
> >>> -                    vhost_vq_avail_empty(&net->dev, vq))
> >>> -                     cpu_relax();
> >>> -
> >>> -             preempt_enable();
> >>> -
> >>> -             if (!vhost_vq_avail_empty(&net->dev, vq))
> >>> -                     vhost_poll_queue(&vq->poll);
> >>> -             else if (unlikely(vhost_enable_notify(&net->dev, vq))) {
> >>> -                     vhost_disable_notify(&net->dev, vq);
> >>> -                     vhost_poll_queue(&vq->poll);
> >>> -             }
> >>> +     if (!len && rnvq->vq.busyloop_timeout) {
> >>> +             /* Flush batched heads first */
> >>> +             vhost_rx_signal_used(rnvq);
> >>>
> >>> -             mutex_unlock(&vq->mutex);
> >>> +             /* Both tx vq and rx socket were polled here */
> >>> +             vhost_net_busy_poll(net, &rnvq->vq, &tnvq->vq, true);
> >>>
> >>> -             len = peek_head_len(rvq, sk);
> >>> +             len = peek_head_len(rnvq, sk);
> >>>        }
> >>>
> >>>        return len;
>