lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20130624032407.7546.96685.stgit@nitbit.x32>
Date:	Sun, 23 Jun 2013 20:24:11 -0700
From:	John Fastabend <john.fastabend@...il.com>
To:	netdev@...r.kernel.org
Cc:	therbert@...gle.com, ben@...adent.org.uk,
	jesse.brandeburg@...el.com, jeffrey.t.kirsher@...el.com
Subject: [RFC PATCH] net: add a tx_queue attribute rate_queue_limits in Mbps

This adds a rate_queue_limit attribute to the tx_queue sysfs entry
to allow rate limiting in units of Mpbs. Along with mqprio and BQL
this provides another knob to tune queue performance. By default it
is disabled with a setting of '0'.

By adding this as a queue attribute and _not_ a qdisc option allows
using rate limits with qdisc schemes that may not align with tx rings
and also allows using QOS schemes along with rate limits.

A sample implementation is provided for ixgbe. Any improvements or
suggestions welcome I would also be interested to know if this works
with other hardware and if Mbps is a good default unit.

I tested this briefly with iperf/netperf,

# echo 4000 > /sys/class/net/p3p2/queues/tx-0/tx_rate_limit
# cat /sys/class/net/p3p2/queues/tx-0/tx_rate_limit
4000

Signed-off-by: John Fastabend <john.r.fastabend@...el.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c  |    1 
 drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c |   47 ++++++++++++++--
 drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h |    2 +
 include/linux/netdevice.h                      |   12 ++++
 net/core/net-sysfs.c                           |   69 +++++++++++++++++++-----
 5 files changed, 110 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 047ebaa..8d168a0 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -7244,6 +7244,7 @@ static const struct net_device_ops ixgbe_netdev_ops = {
 	.ndo_fdb_add		= ixgbe_ndo_fdb_add,
 	.ndo_bridge_setlink	= ixgbe_ndo_bridge_setlink,
 	.ndo_bridge_getlink	= ixgbe_ndo_bridge_getlink,
+	.ndo_set_ratelimit	= ixgbe_set_rate_limit,
 };
 
 /**
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index 1e7d587..22f3df2 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -1107,17 +1107,14 @@ static int ixgbe_link_mbps(struct ixgbe_adapter *adapter)
 	}
 }
 
-static void ixgbe_set_vf_rate_limit(struct ixgbe_adapter *adapter, int vf)
+static u32 ixgbe_bcnrc_from_rate(struct ixgbe_adapter *adapter,
+				 u16 tx_rate, int link_speed)
 {
-	struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ];
-	struct ixgbe_hw *hw = &adapter->hw;
 	u32 bcnrc_val = 0;
-	u16 queue, queues_per_pool;
-	u16 tx_rate = adapter->vfinfo[vf].tx_rate;
 
 	if (tx_rate) {
 		/* start with base link speed value */
-		bcnrc_val = adapter->vf_rate_link_speed;
+		bcnrc_val = link_speed;
 
 		/* Calculate the rate factor values to set */
 		bcnrc_val <<= IXGBE_RTTBCNRC_RF_INT_SHIFT;
@@ -1131,6 +1128,11 @@ static void ixgbe_set_vf_rate_limit(struct ixgbe_adapter *adapter, int vf)
 		bcnrc_val |= IXGBE_RTTBCNRC_RS_ENA;
 	}
 
+	return bcnrc_val;
+}
+
+static void ixgbe_set_xmit_compensation(struct ixgbe_hw *hw)
+{
 	/*
 	 * Set global transmit compensation time to the MMW_SIZE in RTTBCNRM
 	 * register. Typically MMW_SIZE=0x014 if 9728-byte jumbo is supported
@@ -1146,6 +1148,39 @@ static void ixgbe_set_vf_rate_limit(struct ixgbe_adapter *adapter, int vf)
 	default:
 		break;
 	}
+}
+
+int ixgbe_set_rate_limit(struct net_device *dev, int index, u32 *tx_rate)
+{
+	struct ixgbe_adapter *a = netdev_priv(dev);
+	struct ixgbe_hw *hw = &a->hw;
+	int linkspeed = ixgbe_link_mbps(a);
+	u8 reg_idx = a->tx_ring[index]->reg_idx;
+	u32 bcnrc = ixgbe_bcnrc_from_rate(a, *tx_rate, linkspeed);
+
+	/* rate limit cannot be less than 10Mbs or greater than link speed */
+	if (*tx_rate && ((*tx_rate <= 10) || (*tx_rate > linkspeed)))
+		return -EINVAL;
+
+	ixgbe_set_xmit_compensation(hw);
+
+	IXGBE_WRITE_REG(hw, IXGBE_RTTDQSEL, reg_idx);
+	IXGBE_WRITE_REG(hw, IXGBE_RTTBCNRC, bcnrc);
+
+	return 0;
+}
+
+static void ixgbe_set_vf_rate_limit(struct ixgbe_adapter *adapter, int vf)
+{
+	struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ];
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 bcnrc_val = 0;
+	u16 queue, queues_per_pool;
+	u16 tx_rate = adapter->vfinfo[vf].tx_rate;
+
+	bcnrc_val = ixgbe_bcnrc_from_rate(adapter, tx_rate,
+					  adapter->vf_rate_link_speed);
+	ixgbe_set_xmit_compensation(hw);
 
 	/* determine how many queues per pool based on VMDq mask */
 	queues_per_pool = __ALIGN_MASK(1, ~vmdq->mask);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
index 4713f9f..d8b4bbe 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
@@ -56,5 +56,7 @@ static inline void ixgbe_set_vmvir(struct ixgbe_adapter *adapter,
 	IXGBE_WRITE_REG(hw, IXGBE_VMVIR(vf), vmvir);
 }
 
+int ixgbe_set_rate_limit(struct net_device *dev, int index, u32 *tx_rate);
+
 #endif /* _IXGBE_SRIOV_H_ */
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 09b4188..d84d69a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -574,6 +574,7 @@ struct netdev_queue {
 #ifdef CONFIG_BQL
 	struct dql		dql;
 #endif
+	unsigned long		rate_limit;
 } ____cacheline_aligned_in_smp;
 
 static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
@@ -932,6 +933,14 @@ struct netdev_fcoe_hbainfo {
  *	that determine carrier state from physical hardware properties (eg
  *	network cables) or protocol-dependent mechanisms (eg
  *	USB_CDC_NOTIFY_NETWORK_CONNECTION) should NOT implement this function.
+ *
+ * int (*ndo_set_ratelimit)(struct net_device *dev,
+ *			    int queue_index, u32 *maxrate)
+ *	Called to set the rate limit in Mpbs specified by maxrate of the
+ *	specified queue_index. It is expected that hardware way may quantize
+ *	the rate limits. In these cases the driver should guarentee the
+ *	specfied maxrate is not exceeded and return the set value in maxrate.
+ *	Zero should be returned on sucess otherwise use appropriate error code.
  */
 struct net_device_ops {
 	int			(*ndo_init)(struct net_device *dev);
@@ -1060,6 +1069,9 @@ struct net_device_ops {
 						      struct nlmsghdr *nlh);
 	int			(*ndo_change_carrier)(struct net_device *dev,
 						      bool new_carrier);
+	int			(*ndo_set_ratelimit)(struct net_device *dev,
+						     int queue_index,
+						     u32 *max_rate);
 };
 
 /*
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 981fed3..ff61852 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -819,6 +819,59 @@ static ssize_t show_trans_timeout(struct netdev_queue *queue,
 	return sprintf(buf, "%lu", trans_timeout);
 }
 
+static ssize_t show_rate_limit(struct netdev_queue *queue,
+				  struct netdev_queue_attribute *attribute,
+				  char *buf)
+{
+	return sprintf(buf, "%lu", queue->rate_limit);
+}
+
+static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue)
+{
+	struct net_device *dev = queue->dev;
+	int i;
+
+	for (i = 0; i < dev->num_tx_queues; i++)
+		if (queue == &dev->_tx[i])
+			break;
+
+	BUG_ON(i >= dev->num_tx_queues);
+
+	return i;
+}
+
+static ssize_t set_tx_rate_limit(struct netdev_queue *queue,
+				 struct netdev_queue_attribute *attribute,
+				 const char *buf, size_t len)
+{
+	struct net_device *dev = queue->dev;
+	int err, index = get_netdev_queue_index(queue);
+	u32 rate = 0;
+
+	err = kstrtou32(buf, 10, &rate);
+	if (err < 0)
+		return err;
+
+	if (!rtnl_trylock())
+		return restart_syscall();
+
+	if (dev->netdev_ops->ndo_set_ratelimit)
+		err = dev->netdev_ops->ndo_set_ratelimit(dev, index, &rate);
+	else
+		err = -EOPNOTSUPP;
+	rtnl_unlock();
+
+	if (err < 0)
+		return err;
+
+	queue->rate_limit = rate;
+	return len;
+}
+
+static struct netdev_queue_attribute queue_rate_limit =
+	__ATTR(tx_rate_limit, S_IRUGO | S_IWUSR,
+	       show_rate_limit, set_tx_rate_limit);
+
 static struct netdev_queue_attribute queue_trans_timeout =
 	__ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL);
 
@@ -933,21 +986,6 @@ static struct attribute_group dql_group = {
 #endif /* CONFIG_BQL */
 
 #ifdef CONFIG_XPS
-static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue)
-{
-	struct net_device *dev = queue->dev;
-	int i;
-
-	for (i = 0; i < dev->num_tx_queues; i++)
-		if (queue == &dev->_tx[i])
-			break;
-
-	BUG_ON(i >= dev->num_tx_queues);
-
-	return i;
-}
-
-
 static ssize_t show_xps_map(struct netdev_queue *queue,
 			    struct netdev_queue_attribute *attribute, char *buf)
 {
@@ -1032,6 +1070,7 @@ static struct attribute *netdev_queue_default_attrs[] = {
 #ifdef CONFIG_XPS
 	&xps_cpus_attribute.attr,
 #endif
+	&queue_rate_limit.attr,
 	NULL
 };
 

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ