lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:	Tue, 2 Mar 2010 17:20:26 -0700
From:	David Stevens <dlstevens@...ibm.com>
To:	mst@...hat.com, rusty@...tcorp.com.au
Cc:	netdev@...r.kernel.org, kvm@...r.kernel.org,
	virtualization@...ts.osdl.org
Subject: [RFC][ PATCH 2/3] vhost-net: handle vnet_hdr processing for MRG_RX_BUF

This patch adds vnet_hdr processing for mergeable buffer
support to vhost-net.

Signed-off-by: David L Stevens <dlstevens@...ibm.com>

diff -ruN net-next-p1/drivers/vhost/net.c net-next-p2/drivers/vhost/net.c
--- net-next-p1/drivers/vhost/net.c     2010-03-01 11:44:22.000000000 
-0800
+++ net-next-p2/drivers/vhost/net.c     2010-03-02 13:01:34.000000000 
-0800
@@ -109,7 +109,6 @@
        };
        size_t len, total_len = 0;
        int err, wmem;
-       size_t hdr_size;
        struct socket *sock = rcu_dereference(vq->private_data);
        if (!sock)
                return;
@@ -124,7 +123,6 @@
 
        if (wmem < sock->sk->sk_sndbuf * 2)
                tx_poll_stop(net);
-       hdr_size = vq->hdr_size;
 
        for (;;) {
                head.iov_base = (void *)vhost_get_vq_desc(&net->dev, vq,
@@ -148,25 +146,45 @@
                               "out %d, int %d\n", out, in);
                        break;
                }
+               if (vq->guest_hlen > vq->sock_hlen) {
+                       if (msg.msg_iov[0].iov_len == vq->guest_hlen)
+                               msg.msg_iov[0].iov_len = vq->sock_hlen;
+                       else if (out == ARRAY_SIZE(vq->iov))
+                               vq_err(vq, "handle_tx iov overflow!");
+                       else {
+                               int i;
+
+                               /* give header its own iov */
+                               for (i=out; i>0; ++i)
+                                       msg.msg_iov[i+1] = msg.msg_iov[i];
+                               msg.msg_iov[0].iov_len = vq->sock_hlen;
+                               msg.msg_iov[1].iov_base += vq->guest_hlen;
+                               msg.msg_iov[1].iov_len -= vq->guest_hlen;
+                               out++;
+                       }
+               }
                /* Skip header. TODO: support TSO. */
-               s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, out);
                msg.msg_iovlen = out;
                head.iov_len = len = iov_length(vq->iov, out);
                /* Sanity check */
                if (!len) {
                        vq_err(vq, "Unexpected header len for TX: "
                               "%zd expected %zd\n",
-                              iov_length(vq->hdr, s), hdr_size);
+                              len, vq->guest_hlen);
                        break;
                }
                /* TODO: Check specific error and bomb out unless ENOBUFS? 
*/
                err = sock->ops->sendmsg(NULL, sock, &msg, len);
                if (unlikely(err < 0)) {
-                       vhost_discard(vq, 1);
-                       tx_poll_start(net, sock);
+                       if (err == -EAGAIN) {
+                               tx_poll_start(net, sock);
+                       } else {
+                               vq_err(vq, "sendmsg: errno %d\n", -err);
+                               /* drop packet; do not discard/resend */
+ vhost_add_used_and_signal(&net->dev,vq,&head,1);
+                       }
                        break;
-               }
-               if (err != len)
+               } else if (err != len)
                        pr_err("Truncated TX packet: "
                               " len %d != %zd\n", err, len);
                vhost_add_used_and_signal(&net->dev, vq, &head, 1);
@@ -207,14 +225,8 @@
                .msg_flags = MSG_DONTWAIT,
        };
 
-       struct virtio_net_hdr hdr = {
-               .flags = 0,
-               .gso_type = VIRTIO_NET_HDR_GSO_NONE
-       };
-
        size_t len, total_len = 0;
        int err, headcount, datalen;
-       size_t hdr_size;
        struct socket *sock = rcu_dereference(vq->private_data);
 
        if (!sock || !skb_head_len(&sock->sk->sk_receive_queue))
@@ -223,7 +235,6 @@
        use_mm(net->dev.mm);
        mutex_lock(&vq->mutex);
        vhost_disable_notify(vq);
-       hdr_size = vq->hdr_size;
 
        vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
                vq->log : NULL;
@@ -232,25 +243,18 @@
                headcount = vhost_get_heads(vq, datalen, &in, vq_log, 
&log);
                /* OK, now we need to know about added descriptors. */
                if (!headcount) {
-                       if (unlikely(vhost_enable_notify(vq))) {
-                               /* They have slipped one in as we were
-                                * doing that: check again. */
-                               vhost_disable_notify(vq);
-                               continue;
-                       }
-                       /* Nothing new?  Wait for eventfd to tell us
-                        * they refilled. */
+                       vhost_enable_notify(vq);
                        break;
                }
                /* Skip header. TODO: support TSO/mergeable rx buffers. */
-               s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, in);
                msg.msg_iovlen = in;
                len = iov_length(vq->iov, in);
+
                /* Sanity check */
                if (!len) {
                        vq_err(vq, "Unexpected header len for RX: "
                               "%zd expected %zd\n",
-                              iov_length(vq->hdr, s), hdr_size);
+                              len, vq->guest_hlen);
                        break;
                }
                err = sock->ops->recvmsg(NULL, sock, &msg,
@@ -268,13 +272,7 @@
                        continue;
                }
                len = err;
-               err = memcpy_toiovec(vq->hdr, (unsigned char *)&hdr, 
hdr_size);
-               if (err) {
-                       vq_err(vq, "Unable to write vnet_hdr at addr %p: 
%d\n",
-                              vq->iov->iov_base, err);
-                       break;
-               }
-               len += hdr_size;
+               len += vq->guest_hlen - vq->sock_hlen;
                vhost_add_used_and_signal(&net->dev, vq, vq->heads, 
headcount);
                if (unlikely(vq_log))
                        vhost_log_write(vq, vq_log, log, len);
@@ -483,6 +481,13 @@
        return ERR_PTR(-ENOTSOCK);
 }
 
+static int vhost_sock_is_raw(struct socket *sock)
+{
+       if (!sock || !sock->sk)
+               return 0;
+       return sock->sk->sk_type == SOCK_RAW;
+}
+
 static long vhost_net_set_backend(struct vhost_net *n, unsigned index, 
int fd)
 {
        struct socket *sock, *oldsock;
@@ -519,6 +524,20 @@
 
        vhost_net_disable_vq(n, vq);
        rcu_assign_pointer(vq->private_data, sock);
+
+       if (sock && sock->sk) {
+               if (!vhost_sock_is_raw(sock) ||
+                   vhost_has_feature(&n->dev, 
VHOST_NET_F_VIRTIO_NET_HDR)) {
+                       vq->sock_hlen = sizeof(struct virtio_net_hdr);
+                       if (vhost_has_feature(&n->dev, 
VIRTIO_NET_F_MRG_RXBUF))
+                               vq->guest_hlen =
+                                       sizeof(struct 
virtio_net_hdr_mrg_rxbuf);
+                       else
+                               vq->guest_hlen = sizeof(struct 
virtio_net_hdr);
+               } else
+                       vq->guest_hlen = vq->sock_hlen = 0;
+       } else
+               vq_err(vq, "vhost_net_set_backend: sock->sk is NULL");
        vhost_net_enable_vq(n, vq);
        mutex_unlock(&vq->mutex);
 done:
@@ -566,8 +585,17 @@
        n->dev.acked_features = features;
        smp_wmb();
        for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
-               mutex_lock(&n->vqs[i].mutex);
-               n->vqs[i].hdr_size = hdr_size;
+               struct vhost_virtqueue *vq = n->vqs + i;
+               struct socket *sock = vq->private_data;
+
+               mutex_lock(&vq->mutex);
+               if (features & (1 << VIRTIO_NET_F_MRG_RXBUF))
+                       vq->sock_hlen = sizeof(struct 
virtio_net_hdr_mrg_rxbuf);
+               else if (features & (1 << VHOST_NET_F_VIRTIO_NET_HDR) ||
+                        !vhost_sock_is_raw(sock))
+                       vq->sock_hlen = sizeof(struct virtio_net_hdr);
+               else
+                       vq->sock_hlen = 0;
                mutex_unlock(&n->vqs[i].mutex);
        }
        vhost_net_flush(n);
diff -ruN net-next-p1/drivers/vhost/vhost.c 
net-next-p2/drivers/vhost/vhost.c
--- net-next-p1/drivers/vhost/vhost.c   2010-03-01 11:44:06.000000000 
-0800
+++ net-next-p2/drivers/vhost/vhost.c   2010-03-02 12:53:02.000000000 
-0800
@@ -113,7 +113,8 @@
        vq->used_flags = 0;
        vq->log_used = false;
        vq->log_addr = -1ull;
-       vq->hdr_size = 0;
+       vq->guest_hlen = 0;
+       vq->sock_hlen = 0;
        vq->private_data = NULL;
        vq->log_base = NULL;
        vq->error_ctx = NULL;
@@ -848,20 +849,85 @@
        return 0;
 }
 
+static int
+vhost_get_hdr(struct vhost_virtqueue *vq, int *in, struct vhost_log *log,
+       int *log_num)
+{
+       struct iovec *heads = vq->heads;
+       struct iovec *iov = vq->iov;
+       int out;
+
+       *in = 0;
+       iov[0].iov_len = 0;
+
+       /* get buffer, starting from iov[1] */
+       heads[0].iov_base = (void *)vhost_get_vq_desc(vq->dev, vq,
+               vq->iov+1, ARRAY_SIZE(vq->iov)-1, &out, in, log, log_num);
+       if (out || *in <= 0) {
+               vq_err(vq, "unexpected descriptor format for RX: out %d, "
+                       "in %d\n", out, *in);
+               return 0;
+       }
+       if (heads[0].iov_base == (void *)vq->num)
+               return 0;
+
+       /* make iov[0] the header */
+       if (!vq->guest_hlen) {
+               if (vq->sock_hlen) {
+                       static struct virtio_net_hdr junk; /* bit bucket 
*/
+
+                       iov[0].iov_base = &junk;
+                       iov[0].iov_len = sizeof(junk);
+               } else
+                       iov[0].iov_len = 0;
+       }
+       if (vq->sock_hlen < vq->guest_hlen) {
+               iov[0].iov_base = iov[1].iov_base;
+               iov[0].iov_len = vq->sock_hlen;
+
+               if (iov[1].iov_len < vq->sock_hlen) {
+                       vq_err(vq, "can't fit header in one buffer!");
+                       vhost_discard(vq, 1);
+                       return 0;
+               }
+               if (!vq->sock_hlen) {
+                       static const struct virtio_net_hdr_mrg_rxbuf hdr = 
{
+                               .hdr.flags = 0,
+                               .hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE
+                       };
+                       memcpy(iov[0].iov_base, &hdr, vq->guest_hlen);
+               }
+               iov[1].iov_base += vq->guest_hlen;
+               iov[1].iov_len -= vq->guest_hlen;
+       }
+       return 1;
+}
+
 unsigned vhost_get_heads(struct vhost_virtqueue *vq, int datalen, int 
*iovcount,
        struct vhost_log *log, unsigned int *log_num)
 {
        struct iovec *heads = vq->heads;
-       int out, in;
+       int out, in = 0;
+       int seg = 0;
        int hc = 0;
 
+       if (vq->guest_hlen != vq->sock_hlen) {
+               seg = vhost_get_hdr(vq, &in, log, log_num);
+               if (!seg)
+                       return 0;
+               hc++;
+               datalen -= iov_length(vq->iov+seg, in);
+               seg += in;
+       }
+
        while (datalen > 0) {
                if (hc >= VHOST_NET_MAX_SG) {
                        vhost_discard(vq, hc);
                        return 0;
                }
                heads[hc].iov_base = (void *)vhost_get_vq_desc(vq->dev, 
vq,
-                       vq->iov, ARRAY_SIZE(vq->iov), &out, &in, log, 
log_num);
+                       vq->iov+seg, ARRAY_SIZE(vq->iov)-seg, &out, &in,
+                       log, log_num);
                if (heads[hc].iov_base == (void *)vq->num) {
                        vhost_discard(vq, hc);
                        return 0;
@@ -872,11 +938,12 @@
                        vhost_discard(vq, hc);
                        return 0;
                }
-               heads[hc].iov_len = iov_length(vq->iov, in);
-               hc++;
+               heads[hc].iov_len = iov_length(vq->iov+seg, in);
                datalen -= heads[hc].iov_len;
+               hc++;
+               seg += in;
        }
-       *iovcount = in;
+       *iovcount = seg;
        return hc;
 }
 
diff -ruN net-next-p1/drivers/vhost/vhost.h 
net-next-p2/drivers/vhost/vhost.h
--- net-next-p1/drivers/vhost/vhost.h   2010-03-01 11:42:18.000000000 
-0800
+++ net-next-p2/drivers/vhost/vhost.h   2010-03-02 13:02:03.000000000 
-0800
@@ -82,10 +82,9 @@
        u64 log_addr;
 
        struct iovec indirect[VHOST_NET_MAX_SG];
-       struct iovec iov[VHOST_NET_MAX_SG];
-       struct iovec hdr[VHOST_NET_MAX_SG];
+       struct iovec iov[VHOST_NET_MAX_SG+1]; /* an extra for vnet hdr */
        struct iovec heads[VHOST_NET_MAX_SG];
-       size_t hdr_size;
+       size_t guest_hlen, sock_hlen;
        /* We use a kind of RCU to access private pointer.
         * All readers access it from workqueue, which makes it possible 
to
         * flush the workqueue instead of synchronize_rcu. Therefore 
readers do


Download attachment "MRXB2.patch" of type "application/octet-stream" (9374 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ