[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20100510164303.GA28798@redhat.com>
Date: Mon, 10 May 2010 19:43:03 +0300
From: "Michael S. Tsirkin" <mst@...hat.com>
To: David L Stevens <dlstevens@...ibm.com>
Cc: netdev@...r.kernel.org, kvm@...r.kernel.org,
virtualization@...ts.osdl.org
Subject: Re: [PATCHv7] add mergeable buffers support to vhost_net
On Wed, Apr 28, 2010 at 01:57:12PM -0700, David L Stevens wrote:
> @@ -218,18 +248,19 @@ static void handle_rx(struct vhost_net *
> use_mm(net->dev.mm);
> mutex_lock(&vq->mutex);
> vhost_disable_notify(vq);
> - hdr_size = vq->hdr_size;
> + vhost_hlen = vq->vhost_hlen;
>
> vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
> vq->log : NULL;
>
> - for (;;) {
> - head = vhost_get_vq_desc(&net->dev, vq, vq->iov,
> - ARRAY_SIZE(vq->iov),
> - &out, &in,
> - vq_log, &log);
> + while ((datalen = vhost_head_len(vq, sock->sk))) {
> + headcount = vhost_get_desc_n(vq, vq->heads,
> + datalen + vhost_hlen,
> + &in, vq_log, &log);
> + if (headcount < 0)
> + break;
> /* OK, now we need to know about added descriptors. */
> - if (head == vq->num) {
> + if (!headcount) {
> if (unlikely(vhost_enable_notify(vq))) {
> /* They have slipped one in as we were
> * doing that: check again. */
> @@ -241,46 +272,53 @@ static void handle_rx(struct vhost_net *
> break;
> }
> /* We don't need to be notified again. */
> - if (out) {
> - vq_err(vq, "Unexpected descriptor format for RX: "
> - "out %d, int %d\n",
> - out, in);
> - break;
> - }
> - /* Skip header. TODO: support TSO/mergeable rx buffers. */
> - s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, in);
> + if (vhost_hlen)
> + /* Skip header. TODO: support TSO. */
> + s = move_iovec_hdr(vq->iov, vq->hdr, vhost_hlen, in);
> + else
> + s = copy_iovec_hdr(vq->iov, vq->hdr, vq->sock_hlen, in);
> msg.msg_iovlen = in;
> len = iov_length(vq->iov, in);
> /* Sanity check */
> if (!len) {
> vq_err(vq, "Unexpected header len for RX: "
> "%zd expected %zd\n",
> - iov_length(vq->hdr, s), hdr_size);
> + iov_length(vq->hdr, s), vhost_hlen);
> break;
> }
> err = sock->ops->recvmsg(NULL, sock, &msg,
> len, MSG_DONTWAIT | MSG_TRUNC);
> /* TODO: Check specific error and bomb out unless EAGAIN? */
> if (err < 0) {
> - vhost_discard_vq_desc(vq);
> + vhost_discard_desc(vq, headcount);
> break;
> }
> - /* TODO: Should check and handle checksum. */
> - if (err > len) {
> - pr_err("Discarded truncated rx packet: "
> - " len %d > %zd\n", err, len);
> - vhost_discard_vq_desc(vq);
> + if (err != datalen) {
> + pr_err("Discarded rx packet: "
> + " len %d, expected %zd\n", err, datalen);
> + vhost_discard_desc(vq, headcount);
> continue;
> }
> len = err;
> - err = memcpy_toiovec(vq->hdr, (unsigned char *)&hdr, hdr_size);
> - if (err) {
> - vq_err(vq, "Unable to write vnet_hdr at addr %p: %d\n",
> - vq->iov->iov_base, err);
> + if (vhost_hlen &&
> + memcpy_toiovecend(vq->hdr, (unsigned char *)&hdr, 0,
> + vhost_hlen)) {
> + vq_err(vq, "Unable to write vnet_hdr at addr %p\n",
> + vq->iov->iov_base);
> break;
> }
> - len += hdr_size;
> - vhost_add_used_and_signal(&net->dev, vq, head, len);
> + /* TODO: Should check and handle checksum. */
> + if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF) &&
> + memcpy_toiovecend(vq->hdr, (unsigned char *)&headcount,
> + offsetof(typeof(hdr), num_buffers),
> + sizeof(hdr.num_buffers))) {
> + vq_err(vq, "Failed num_buffers write");
> + vhost_discard_desc(vq, headcount);
> + break;
> + }
> + len += vhost_hlen;
> + vhost_add_used_and_signal_n(&net->dev, vq, vq->heads,
> + headcount);
> if (unlikely(vq_log))
> vhost_log_write(vq, vq_log, log, len);
> total_len += len;
OK I think I see the bug here: vhost_add_used_and_signal_n
does not get the actual length, it gets the iovec length from vhost.
Guest virtio uses this as packet length, with bad results.
So I have applied the follows and it seems to have fixed the problem:
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index c16db02..9d7496d 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -219,7 +219,7 @@ static int peek_head_len(struct vhost_virtqueue *vq, struct sock *sk)
/* This is a multi-buffer version of vhost_get_desc, that works if
* vq has read descriptors only.
* @vq - the relevant virtqueue
- * @datalen - data length we'll be reading
+ * @datalen - data length we'll be reading. must be > 0
* @iovcount - returned count of io vectors we fill
* @log - vhost log
* @log_num - log offset
@@ -236,9 +236,10 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
int seg = 0;
int headcount = 0;
unsigned d;
+ size_t len;
int r, nlogs = 0;
- while (datalen > 0) {
+ for (;;) {
if (unlikely(headcount >= VHOST_NET_MAX_SG)) {
r = -ENOBUFS;
goto err;
@@ -260,16 +261,20 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
nlogs += *log_num;
log += *log_num;
}
+ len = iov_length(vq->iov + seg, in);
+ seg += in;
heads[headcount].id = d;
- heads[headcount].len = iov_length(vq->iov + seg, in);
- datalen -= heads[headcount].len;
+ if (datalen <= len)
+ break;
+ heads[headcount].len = len;
++headcount;
- seg += in;
+ datalen -= len;
}
+ heads[headcount].len = datalen;
*iovcount = seg;
if (unlikely(log))
*log_num = nlogs;
- return headcount;
+ return headcount + 1;
err:
vhost_discard_desc(vq, headcount);
return r;
--
MST
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists