lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20101201182258.2748.99569.stgit@jf-dev1-dcblab>
Date:	Wed, 01 Dec 2010 10:22:58 -0800
From:	John Fastabend <john.r.fastabend@...el.com>
To:	davem@...emloft.net
Cc:	john.r.fastabend@...el.com, netdev@...r.kernel.org,
	tgraf@...radead.org, eric.dumazet@...il.com
Subject: [RFC PATCH v2 2/3] netlink: implement nla_policy for HW QOS

Implement nla_policy hooks to get/set HW offloaded QOS policies.
The following types are added to RTM_{GET|SET}LINK.


 [IFLA_TC]
	[IFLA_TC_MAX_TC]
 	[IFLA_TC_NUM_TC]
 	[IFLA_TC_TXQS]
		[IFLA_TC_TXQ]
 		...
	[IFLA_TC_MAPS]
		[IFLA_TC_MAP]
		...

The following are read only,

IFLA_TC_MAX_TC
IFLA_TC_TXQS

The IFLA_TC_MAX_TC attribute can only be set by the lower layer drivers
because it is a hardware limit. The IFLA_TC_TXQ_* values provide insight
into how the hardware has aligned the tx queues with traffic classes
but can not be modified.

This adds a net_device ops ndo_set_num_tc() to callback into drivers
to change the number of traffic classes. Lower layer drivers may need to
move resources around or reconfigure HW to support changing number
of traffic classes.

Signed-off-by: John Fastabend <john.r.fastabend@...el.com>
---

 include/linux/if_link.h   |   50 ++++++++++++++++++++++
 include/linux/netdevice.h |    4 ++
 net/core/rtnetlink.c      |  103 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 156 insertions(+), 1 deletions(-)

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index 6485d2a..ebe13a0 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -135,6 +135,7 @@ enum {
 	IFLA_VF_PORTS,
 	IFLA_PORT_SELF,
 	IFLA_AF_SPEC,
+	IFLA_TC,
 	__IFLA_MAX
 };
 
@@ -378,4 +379,53 @@ struct ifla_port_vsi {
 	__u8 pad[3];
 };
 
+/* HW QOS management section
+ *
+ *	Nested layout of set/get msg is:
+ *
+ *		[IFLA_TC]
+ *			[IFLA_TC_MAX_TC]
+ *			[IFLA_TC_NUM_TC]
+ *			[IFLA_TC_TXQS]
+ *				[IFLA_TC_TXQ]
+ *				...
+ *			[IFLA_TC_MAPS]
+ *				[IFLA_TC_MAP]
+ *				...
+ */
+enum {
+	IFLA_TC_UNSPEC,
+	IFLA_TC_TXMAX,
+	IFLA_TC_TXNUM,
+	IFLA_TC_TXQS,
+	IFLA_TC_MAPS,
+	__IFLA_TC_MAX,
+};
+#define IFLA_TC_MAX (__IFLA_TC_MAX - 1)
+
+struct ifla_tc_txq {
+	__u8 tc;
+	__u16 count;
+	__u16 offset;
+};
+
+enum {
+	IFLA_TC_TXQ_UNSPEC,
+	IFLA_TC_TXQ,
+	__IFLA_TC_TCQ_MAX,
+};
+#define IFLA_TC_TXQS_MAX (__IFLA_TC_TCQ_MAX - 1)
+
+struct ifla_tc_map {
+	__u8 prio;
+	__u8 tc;
+};
+
+enum {
+	IFLA_TC_MAP_UNSPEC,
+	IFLA_TC_MAP,
+	__IFLA_TC_MAP_MAX,
+};
+#define IFLA_TC_MAPS_MAX (__IFLA_TC_TCQ_MAX - 1)
+
 #endif /* _LINUX_IF_LINK_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3307979..c44da29 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -744,6 +744,8 @@ struct netdev_tc_txq {
  * int (*ndo_set_vf_port)(struct net_device *dev, int vf,
  *			  struct nlattr *port[]);
  * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb);
+ *
+ * int (*ndo_set_num_tc)(struct net_device *dev, int tcs);
  */
 #define HAVE_NET_DEVICE_OPS
 struct net_device_ops {
@@ -802,6 +804,8 @@ struct net_device_ops {
 						   struct nlattr *port[]);
 	int			(*ndo_get_vf_port)(struct net_device *dev,
 						   int vf, struct sk_buff *skb);
+	int			(*ndo_set_num_tc)(struct net_device *dev,
+						  u8 tcs);
 #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
 	int			(*ndo_fcoe_enable)(struct net_device *dev);
 	int			(*ndo_fcoe_disable)(struct net_device *dev);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 750db57..12bdff5 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -739,6 +739,21 @@ static size_t rtnl_port_size(const struct net_device *dev)
 		return port_self_size;
 }
 
+static size_t rtnl_tc_size(const struct net_device *dev)
+{
+	u8 num_tcs = netdev_get_num_tc(dev);
+	size_t table_size = nla_total_size(8)	/* IFLA_TC_TXMAX */
+		+ nla_total_size(8);		/* IFLA_TC_TXNUM */
+
+	table_size += nla_total_size(sizeof(struct nlattr));
+	table_size += num_tcs * nla_total_size(sizeof(struct ifla_tc_txq));
+
+	table_size += nla_total_size(sizeof(struct nlattr));
+	table_size += 16 * nla_total_size(sizeof(struct ifla_tc_map));
+
+	return table_size;
+}
+
 static noinline size_t if_nlmsg_size(const struct net_device *dev)
 {
 	return NLMSG_ALIGN(sizeof(struct ifinfomsg))
@@ -761,7 +776,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev)
 	       + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */
 	       + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
 	       + rtnl_link_get_size(dev) /* IFLA_LINKINFO */
-	       + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */
+	       + rtnl_link_get_af_size(dev) /* IFLA_AF_SPEC */
+	       + rtnl_tc_size(dev); /* IFLA_TC */
 }
 
 static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
@@ -952,6 +968,41 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	if (rtnl_port_fill(skb, dev))
 		goto nla_put_failure;
 
+	if (dev->max_tcs) {
+		struct nlattr *tc_tbl, *tc_txq, *tc_map;
+		struct netdev_tc_txq *tcq;
+		struct ifla_tc_txq ifla_tcq;
+		struct ifla_tc_map ifla_map;
+		u8 i;
+
+		tc_tbl = nla_nest_start(skb, IFLA_TC);
+		if (!tc_tbl)
+			goto nla_put_failure;
+
+		NLA_PUT_U8(skb, IFLA_TC_TXMAX, dev->max_tcs);
+		NLA_PUT_U8(skb, IFLA_TC_TXNUM, dev->num_tcs);
+
+		tc_txq = nla_nest_start(skb, IFLA_TC_TXQS);
+		for (i = 0; i < dev->num_tcs; i++) {
+			tcq = netdev_get_tc_queue(dev, i);
+			ifla_tcq.tc = i;
+			ifla_tcq.count = tcq->count;
+			ifla_tcq.offset = tcq->offset;
+
+			NLA_PUT(skb, IFLA_TC_TXQ, sizeof(ifla_tcq), &ifla_tcq);
+		}
+		nla_nest_end(skb, tc_txq);
+
+		tc_map = nla_nest_start(skb, IFLA_TC_MAPS);
+		for (i = 0; i < 16; i++) {
+			ifla_map.prio = i;
+			ifla_map.tc = netdev_get_prio_tc_map(dev, i);
+			NLA_PUT(skb, IFLA_TC_MAP, sizeof(ifla_map), &ifla_map);
+		}
+		nla_nest_end(skb, tc_map);
+		nla_nest_end(skb, tc_tbl);
+	}
+
 	if (dev->rtnl_link_ops) {
 		if (rtnl_link_fill(skb, dev) < 0)
 			goto nla_put_failure;
@@ -1046,6 +1097,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_VF_PORTS]		= { .type = NLA_NESTED },
 	[IFLA_PORT_SELF]	= { .type = NLA_NESTED },
 	[IFLA_AF_SPEC]		= { .type = NLA_NESTED },
+	[IFLA_TC]		= { .type = NLA_NESTED },
 };
 EXPORT_SYMBOL(ifla_policy);
 
@@ -1081,6 +1133,23 @@ static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
 	[IFLA_PORT_RESPONSE]	= { .type = NLA_U16, },
 };
 
+static const struct nla_policy ifla_tc_policy[IFLA_TC_MAX+1] = {
+	[IFLA_TC_TXMAX]		= { .type = NLA_U8 },
+	[IFLA_TC_TXNUM]		= { .type = NLA_U8 },
+	[IFLA_TC_TXQS]		= { .type = NLA_NESTED },
+	[IFLA_TC_MAPS]		= { .type = NLA_NESTED },
+};
+
+static const struct nla_policy ifla_tc_txq[IFLA_TC_TXQS_MAX+1] = {
+	[IFLA_TC_TXQ]		= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_tc_txq)},
+};
+
+static const struct nla_policy ifla_tc_map[IFLA_TC_MAPS_MAX+1] = {
+	[IFLA_TC_MAP]		= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_tc_map)},
+};
+
 struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
 {
 	struct net *net;
@@ -1389,6 +1458,38 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 	}
 	err = 0;
 
+	if (tb[IFLA_TC]) {
+		struct nlattr *table[IFLA_TC_MAX+1];
+		struct nlattr *tc_maps;
+		int rem;
+
+		err = nla_parse_nested(table, IFLA_TC_MAX, tb[IFLA_TC],
+				       ifla_tc_policy);
+		if (err < 0)
+			goto errout;
+
+		if (table[IFLA_TC_TXNUM]) {
+			u8 tcs = nla_get_u8(table[IFLA_TC_TXNUM]);
+			err = -EOPNOTSUPP;
+			if (ops->ndo_set_num_tc)
+				err = ops->ndo_set_num_tc(dev, tcs);
+			if (err < 0)
+				goto errout;
+		}
+
+		if (table[IFLA_TC_MAPS]) {
+			nla_for_each_nested(tc_maps, table[IFLA_TC_MAPS], rem) {
+				struct ifla_tc_map *map;
+				map = nla_data(tc_maps);
+				err = netdev_set_prio_tc_map(dev, map->prio,
+							     map->tc);
+				if (err < 0)
+					goto errout;
+			}
+		}
+	}
+	err = 0;
+
 errout:
 	if (err < 0 && modified && net_ratelimit())
 		printk(KERN_WARNING "A link change request failed with "

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ