netdev - Re: [PATCH net-next 1/2] net_sched/mqprio: add support for different pgroup types

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4FAA1057.6020709@intel.com>
Date:	Tue, 08 May 2012 23:36:07 -0700
From:	John Fastabend <john.r.fastabend@...el.com>
To:	Amir Vadai <amirv@...lanox.com>
CC:	"David S. Miller" <davem@...emloft.net>, netdev@...r.kernel.org,
	Oren Duer <oren@...lanox.com>, Liran Liss <liranl@...lanox.com>
Subject: Re: [PATCH net-next 1/2] net_sched/mqprio: add support for different
 pgroup types

On 5/6/2012 12:05 AM, Amir Vadai wrote:
> Currently, HW based QoS mechanisms use the framework and means introduced in
> commits 4f57c087d "net: implement mechanism for HW based QOS" and b8970f0bfc
> "net_sched: implement a root container qdisc sch_mqprio".
> 
> The approach present in these patches is strongly orientated to the extended
> transmission selection (ETS) algorithm traffic classes (TC).
> 

I would argue the strongly orientated part per our other thread [0/2]

> This patch enhances the current scheme to allow for these mechanisms to be used
> also with hardware who has queues per UP - user priority (Linux has well
> established mechanisms to set UP for both tagged and untagged traffic).
> 

also per other thread I think this is only needed if you have many different
egress map configurations on vlans.

> Now, __skb_tx_hash() will direct a flow to a tx ring from a range of tx rings.
> This range is defined by the admin through the mqprio scheduler for the
> specific HW. For TC based queues, the range is by TC number and for UP based
> queues, the range is by UP.
> 
> Signed-off-by: Amir Vadai <amirv@...lanox.com>
> ---
>  include/linux/netdevice.h |   27 +++++++++++++++++++++++++++
>  include/linux/pkt_sched.h |    3 ++-
>  net/core/dev.c            |   12 +++++++++---
>  net/sched/sch_mqprio.c    |   11 +++++++++--
>  4 files changed, 47 insertions(+), 6 deletions(-)
> 
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 7f377fb..ecdd953 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -835,6 +835,9 @@ struct netdev_fcoe_hbainfo {
>   * 	is always called from the stack with the rtnl lock held and netif tx
>   * 	queues stopped. This allows the netdevice to perform queue management
>   * 	safely.
> + * int (*ndo_set_pg_type)(struct net_device *dev, u8 pg_type)
> + *	Called to setup 'tc' type. According to this type, traffic is
> + *	distributed across tx rings. If not set, ETS TC is in use.
>   *
>   *	Fiber Channel over Ethernet (FCoE) offload functions.
>   * int (*ndo_fcoe_enable)(struct net_device *dev);
> @@ -973,6 +976,8 @@ struct net_device_ops {
>  	int			(*ndo_get_vf_port)(struct net_device *dev,
>  						   int vf, struct sk_buff *skb);
>  	int			(*ndo_setup_tc)(struct net_device *dev, u8 tc);
> +	int			(*ndo_set_pg_type)(struct net_device *dev,
> +						   u8 pg_type);

expand ndo_setup_tc() to take either another parameter 'pg_type' or just
start passing in the entire tc_mqprio_opt that way we get the number of
queues as well. I would prefer passing tc_mpqrio_opt to adding more parameters.

This avoids adding another ndo op.


>  #if IS_ENABLED(CONFIG_FCOE)
>  	int			(*ndo_fcoe_enable)(struct net_device *dev);
>  	int			(*ndo_fcoe_disable)(struct net_device *dev);
> @@ -1307,6 +1312,11 @@ struct net_device {
>  	/* Data Center Bridging netlink ops */
>  	const struct dcbnl_rtnl_ops *dcbnl_ops;
>  #endif
> +	enum {
> +		PGROUP_TC,
> +		PGROUP_UP,
> +		PGROUP_MAX,
> +	} pg_type:8;
>  	u8 num_tc;
>  	struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE];
>  	u8 prio_tc_map[TC_BITMASK + 1];
> @@ -1329,6 +1339,23 @@ struct net_device {
>  #define	NETDEV_ALIGN		32
>  
>  static inline
> +int netdev_get_pg_type(const struct net_device *dev)
> +{
> +	return dev->pg_type;
> +}
> +
> +static inline
> +int netdev_set_pg_type(struct net_device *dev, u8 pg_type)
> +{
> +	if (pg_type >= PGROUP_MAX)
> +		return -EINVAL;
> +
> +	dev->pg_type = pg_type;
> +
> +	return 0;
> +}
> +
> +static inline
>  int netdev_get_prio_tc_map(const struct net_device *dev, u32 prio)
>  {
>  	return dev->prio_tc_map[prio & TC_BITMASK];
> diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
> index ffe975c..1ae7d3c 100644
> --- a/include/linux/pkt_sched.h
> +++ b/include/linux/pkt_sched.h
> @@ -596,7 +596,8 @@ struct tc_drr_stats {
>  struct tc_mqprio_qopt {
>  	__u8	num_tc;
>  	__u8	prio_tc_map[TC_QOPT_BITMASK + 1];
> -	__u8	hw;
> +	__u8	hw;	/* bit 0: hw owned, bits 1-7: hw queuing type.
> +			 * valid types: 0 - ETS TC, 1 - UP */
>  	__u16	count[TC_QOPT_MAX_QUEUE];
>  	__u16	offset[TC_QOPT_MAX_QUEUE];
>  };
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 09024fd..72ac4bf 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -2325,9 +2325,15 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
>  	}
>  
>  	if (dev->num_tc) {
> -		u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
> -		qoffset = dev->tc_to_txq[tc].offset;
> -		qcount = dev->tc_to_txq[tc].count;
> +		u8 pgroup;
> +
> +		if (dev->pg_type == PGROUP_TC || !vlan_tx_tag_present(skb))
> +			pgroup = netdev_get_prio_tc_map(dev, skb->priority);
> +		else
> +			pgroup = (vlan_tx_tag_get(skb) >> 13);
> +
> +		qoffset = dev->tc_to_txq[pgroup].offset;
> +		qcount = dev->tc_to_txq[pgroup].count;
>  	}
>  
>  	if (skb->sk && skb->sk->sk_hash)
> diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
> index d1831ca..2149cbb 100644
> --- a/net/sched/sch_mqprio.c
> +++ b/net/sched/sch_mqprio.c
> @@ -134,11 +134,18 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
>  		priv->qdiscs[i] = qdisc;
>  	}
>  
> +	if (dev->netdev_ops->ndo_set_pg_type)
> +		err = dev->netdev_ops->ndo_set_pg_type(dev, qopt->hw >> 1);
> +	else
> +		err = netdev_set_pg_type(dev, PGROUP_TC);

Software should still be allowed to set PGROUP_UP even though hardware may
not support it.

> +	if (err)
> +		goto err;
> +
>  	/* If the mqprio options indicate that hardware should own
>  	 * the queue mapping then run ndo_setup_tc otherwise use the
>  	 * supplied and verified mapping
>  	 */
> -	if (qopt->hw) {
> +	if (qopt->hw & 1) {
>  		priv->hw_owned = 1;
>  		err = dev->netdev_ops->ndo_setup_tc(dev, qopt->num_tc);
>  		if (err)


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html