lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1332886987-11613-2-git-send-email-haiyangz@microsoft.com>
Date:	Tue, 27 Mar 2012 15:23:07 -0700
From:	Haiyang Zhang <haiyangz@...rosoft.com>
To:	davem@...emloft.net, netdev@...r.kernel.org
Cc:	haiyangz@...rosoft.com, kys@...rosoft.com, olaf@...fle.de,
	linux-kernel@...r.kernel.org, devel@...uxdriverproject.org
Subject: [PATCH 1/1] net/hyperv: Add flow control based on hi/low watermark

In the existing code, we only stop queue when the ringbuffer is full,
so the current packet has to be dropped or retried from upper layer.

This patch stops the tx queue when available ringbuffer is below
the low watermark. So the ringbuffer still has small amount of space
available for the current packet. This will reduce the overhead of
retries on sending.

Signed-off-by: Haiyang Zhang <haiyangz@...rosoft.com>
Reviewed-by: K. Y. Srinivasan <kys@...rosoft.com>
---
 drivers/hv/ring_buffer.c        |   31 -----------------------------
 drivers/net/hyperv/netvsc.c     |   41 +++++++++++++++++++++++++++++++++++---
 drivers/net/hyperv/netvsc_drv.c |    6 ++++-
 include/linux/hyperv.h          |   29 +++++++++++++++++++++++++++
 4 files changed, 71 insertions(+), 36 deletions(-)

diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index 8af25a0..7233c88 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -30,37 +30,6 @@
 #include "hyperv_vmbus.h"
 
 
-/* #defines */
-
-
-/* Amount of space to write to */
-#define BYTES_AVAIL_TO_WRITE(r, w, z) \
-	((w) >= (r)) ? ((z) - ((w) - (r))) : ((r) - (w))
-
-
-/*
- *
- * hv_get_ringbuffer_availbytes()
- *
- * Get number of bytes available to read and to write to
- * for the specified ring buffer
- */
-static inline void
-hv_get_ringbuffer_availbytes(struct hv_ring_buffer_info *rbi,
-			  u32 *read, u32 *write)
-{
-	u32 read_loc, write_loc;
-
-	smp_read_barrier_depends();
-
-	/* Capture the read/write indices before they changed */
-	read_loc = rbi->ring_buffer->read_index;
-	write_loc = rbi->ring_buffer->write_index;
-
-	*write = BYTES_AVAIL_TO_WRITE(read_loc, write_loc, rbi->ring_datasize);
-	*read = rbi->ring_datasize - *write;
-}
-
 /*
  * hv_get_next_write_location()
  *
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index d025c83..8b91947 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -428,6 +428,24 @@ int netvsc_device_remove(struct hv_device *device)
 	return 0;
 }
 
+
+#define RING_AVAIL_PERCENT_HIWATER 20
+#define RING_AVAIL_PERCENT_LOWATER 10
+
+/*
+ * Get the percentage of available bytes to write in the ring.
+ * The return value is in range from 0 to 100.
+ */
+static inline u32 hv_ringbuf_avail_percent(
+		struct hv_ring_buffer_info *ring_info)
+{
+	u32 avail_read, avail_write;
+
+	hv_get_ringbuffer_availbytes(ring_info, &avail_read, &avail_write);
+
+	return avail_write * 100 / ring_info->ring_datasize;
+}
+
 static void netvsc_send_completion(struct hv_device *device,
 				   struct vmpacket_descriptor *packet)
 {
@@ -455,6 +473,8 @@ static void netvsc_send_completion(struct hv_device *device,
 		complete(&net_device->channel_init_wait);
 	} else if (nvsp_packet->hdr.msg_type ==
 		   NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE) {
+		int num_outstanding_sends;
+
 		/* Get the send context */
 		nvsc_packet = (struct hv_netvsc_packet *)(unsigned long)
 			packet->trans_id;
@@ -463,10 +483,14 @@ static void netvsc_send_completion(struct hv_device *device,
 		nvsc_packet->completion.send.send_completion(
 			nvsc_packet->completion.send.send_completion_ctx);
 
-		atomic_dec(&net_device->num_outstanding_sends);
+		num_outstanding_sends =
+			atomic_dec_return(&net_device->num_outstanding_sends);
 
-		if (netif_queue_stopped(ndev) && !net_device->start_remove)
-			netif_wake_queue(ndev);
+		if (netif_queue_stopped(ndev) && !net_device->start_remove &&
+			(hv_ringbuf_avail_percent(&device->channel->outbound)
+			> RING_AVAIL_PERCENT_HIWATER ||
+			num_outstanding_sends < 1))
+				netif_wake_queue(ndev);
 	} else {
 		netdev_err(ndev, "Unknown send completion packet type- "
 			   "%d received!!\n", nvsp_packet->hdr.msg_type);
@@ -519,10 +543,19 @@ int netvsc_send(struct hv_device *device,
 
 	if (ret == 0) {
 		atomic_inc(&net_device->num_outstanding_sends);
+		if (hv_ringbuf_avail_percent(&device->channel->outbound) <
+			RING_AVAIL_PERCENT_LOWATER) {
+			netif_stop_queue(ndev);
+			if (atomic_read(&net_device->
+				num_outstanding_sends) < 1)
+				netif_wake_queue(ndev);
+		}
 	} else if (ret == -EAGAIN) {
 		netif_stop_queue(ndev);
-		if (atomic_read(&net_device->num_outstanding_sends) < 1)
+		if (atomic_read(&net_device->num_outstanding_sends) < 1) {
 			netif_wake_queue(ndev);
+			ret = -ENOSPC;
+		}
 	} else {
 		netdev_err(ndev, "Unable to send packet %p ret %d\n",
 			   packet, ret);
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index dd29478..a0cc127 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -224,9 +224,13 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
 		net->stats.tx_packets++;
 	} else {
 		kfree(packet);
+		if (ret != -EAGAIN) {
+			dev_kfree_skb_any(skb);
+			net->stats.tx_dropped++;
+		}
 	}
 
-	return ret ? NETDEV_TX_BUSY : NETDEV_TX_OK;
+	return (ret == -EAGAIN) ? NETDEV_TX_BUSY : NETDEV_TX_OK;
 }
 
 /*
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 5852545..2c366f0 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -274,6 +274,35 @@ struct hv_ring_buffer_debug_info {
 	u32 bytes_avail_towrite;
 };
 
+/* Amount of space to write to */
+#define BYTES_AVAIL_TO_WRITE(r, w, z) \
+	(((w) >= (r)) ? ((z) - ((w) - (r))) : ((r) - (w)))
+
+
+/*
+ *
+ * hv_get_ringbuffer_availbytes()
+ *
+ * Get number of bytes available to read and to write to
+ * for the specified ring buffer
+ */
+extern inline void
+hv_get_ringbuffer_availbytes(struct hv_ring_buffer_info *rbi,
+			  u32 *read, u32 *write)
+{
+	u32 read_loc, write_loc;
+
+	smp_read_barrier_depends();
+
+	/* Capture the read/write indices before they changed */
+	read_loc = rbi->ring_buffer->read_index;
+	write_loc = rbi->ring_buffer->write_index;
+
+	*write = BYTES_AVAIL_TO_WRITE(read_loc, write_loc, rbi->ring_datasize);
+	*read = rbi->ring_datasize - *write;
+}
+
+
 /*
  * We use the same version numbering for all Hyper-V modules.
  *
-- 
1.7.4.1

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ