[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4FAA1057.6020709@intel.com>
Date: Tue, 08 May 2012 23:36:07 -0700
From: John Fastabend <john.r.fastabend@...el.com>
To: Amir Vadai <amirv@...lanox.com>
CC: "David S. Miller" <davem@...emloft.net>, netdev@...r.kernel.org,
Oren Duer <oren@...lanox.com>, Liran Liss <liranl@...lanox.com>
Subject: Re: [PATCH net-next 1/2] net_sched/mqprio: add support for different
pgroup types
On 5/6/2012 12:05 AM, Amir Vadai wrote:
> Currently, HW based QoS mechanisms use the framework and means introduced in
> commits 4f57c087d "net: implement mechanism for HW based QOS" and b8970f0bfc
> "net_sched: implement a root container qdisc sch_mqprio".
>
> The approach present in these patches is strongly orientated to the extended
> transmission selection (ETS) algorithm traffic classes (TC).
>
I would argue the strongly orientated part per our other thread [0/2]
> This patch enhances the current scheme to allow for these mechanisms to be used
> also with hardware who has queues per UP - user priority (Linux has well
> established mechanisms to set UP for both tagged and untagged traffic).
>
also per other thread I think this is only needed if you have many different
egress map configurations on vlans.
> Now, __skb_tx_hash() will direct a flow to a tx ring from a range of tx rings.
> This range is defined by the admin through the mqprio scheduler for the
> specific HW. For TC based queues, the range is by TC number and for UP based
> queues, the range is by UP.
>
> Signed-off-by: Amir Vadai <amirv@...lanox.com>
> ---
> include/linux/netdevice.h | 27 +++++++++++++++++++++++++++
> include/linux/pkt_sched.h | 3 ++-
> net/core/dev.c | 12 +++++++++---
> net/sched/sch_mqprio.c | 11 +++++++++--
> 4 files changed, 47 insertions(+), 6 deletions(-)
>
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 7f377fb..ecdd953 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -835,6 +835,9 @@ struct netdev_fcoe_hbainfo {
> * is always called from the stack with the rtnl lock held and netif tx
> * queues stopped. This allows the netdevice to perform queue management
> * safely.
> + * int (*ndo_set_pg_type)(struct net_device *dev, u8 pg_type)
> + * Called to setup 'tc' type. According to this type, traffic is
> + * distributed across tx rings. If not set, ETS TC is in use.
> *
> * Fiber Channel over Ethernet (FCoE) offload functions.
> * int (*ndo_fcoe_enable)(struct net_device *dev);
> @@ -973,6 +976,8 @@ struct net_device_ops {
> int (*ndo_get_vf_port)(struct net_device *dev,
> int vf, struct sk_buff *skb);
> int (*ndo_setup_tc)(struct net_device *dev, u8 tc);
> + int (*ndo_set_pg_type)(struct net_device *dev,
> + u8 pg_type);
expand ndo_setup_tc() to take either another parameter 'pg_type' or just
start passing in the entire tc_mqprio_opt that way we get the number of
queues as well. I would prefer passing tc_mpqrio_opt to adding more parameters.
This avoids adding another ndo op.
> #if IS_ENABLED(CONFIG_FCOE)
> int (*ndo_fcoe_enable)(struct net_device *dev);
> int (*ndo_fcoe_disable)(struct net_device *dev);
> @@ -1307,6 +1312,11 @@ struct net_device {
> /* Data Center Bridging netlink ops */
> const struct dcbnl_rtnl_ops *dcbnl_ops;
> #endif
> + enum {
> + PGROUP_TC,
> + PGROUP_UP,
> + PGROUP_MAX,
> + } pg_type:8;
> u8 num_tc;
> struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE];
> u8 prio_tc_map[TC_BITMASK + 1];
> @@ -1329,6 +1339,23 @@ struct net_device {
> #define NETDEV_ALIGN 32
>
> static inline
> +int netdev_get_pg_type(const struct net_device *dev)
> +{
> + return dev->pg_type;
> +}
> +
> +static inline
> +int netdev_set_pg_type(struct net_device *dev, u8 pg_type)
> +{
> + if (pg_type >= PGROUP_MAX)
> + return -EINVAL;
> +
> + dev->pg_type = pg_type;
> +
> + return 0;
> +}
> +
> +static inline
> int netdev_get_prio_tc_map(const struct net_device *dev, u32 prio)
> {
> return dev->prio_tc_map[prio & TC_BITMASK];
> diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
> index ffe975c..1ae7d3c 100644
> --- a/include/linux/pkt_sched.h
> +++ b/include/linux/pkt_sched.h
> @@ -596,7 +596,8 @@ struct tc_drr_stats {
> struct tc_mqprio_qopt {
> __u8 num_tc;
> __u8 prio_tc_map[TC_QOPT_BITMASK + 1];
> - __u8 hw;
> + __u8 hw; /* bit 0: hw owned, bits 1-7: hw queuing type.
> + * valid types: 0 - ETS TC, 1 - UP */
> __u16 count[TC_QOPT_MAX_QUEUE];
> __u16 offset[TC_QOPT_MAX_QUEUE];
> };
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 09024fd..72ac4bf 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -2325,9 +2325,15 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
> }
>
> if (dev->num_tc) {
> - u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
> - qoffset = dev->tc_to_txq[tc].offset;
> - qcount = dev->tc_to_txq[tc].count;
> + u8 pgroup;
> +
> + if (dev->pg_type == PGROUP_TC || !vlan_tx_tag_present(skb))
> + pgroup = netdev_get_prio_tc_map(dev, skb->priority);
> + else
> + pgroup = (vlan_tx_tag_get(skb) >> 13);
> +
> + qoffset = dev->tc_to_txq[pgroup].offset;
> + qcount = dev->tc_to_txq[pgroup].count;
> }
>
> if (skb->sk && skb->sk->sk_hash)
> diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
> index d1831ca..2149cbb 100644
> --- a/net/sched/sch_mqprio.c
> +++ b/net/sched/sch_mqprio.c
> @@ -134,11 +134,18 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
> priv->qdiscs[i] = qdisc;
> }
>
> + if (dev->netdev_ops->ndo_set_pg_type)
> + err = dev->netdev_ops->ndo_set_pg_type(dev, qopt->hw >> 1);
> + else
> + err = netdev_set_pg_type(dev, PGROUP_TC);
Software should still be allowed to set PGROUP_UP even though hardware may
not support it.
> + if (err)
> + goto err;
> +
> /* If the mqprio options indicate that hardware should own
> * the queue mapping then run ndo_setup_tc otherwise use the
> * supplied and verified mapping
> */
> - if (qopt->hw) {
> + if (qopt->hw & 1) {
> priv->hw_owned = 1;
> err = dev->netdev_ops->ndo_setup_tc(dev, qopt->num_tc);
> if (err)
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists