lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 9 Feb 2023 17:14:04 +0100
From:   Simon Horman <simon.horman@...igine.com>
To:     wei.fang@....com
Cc:     shenwei.wang@....com, xiaoning.wang@....com, davem@...emloft.net,
        edumazet@...gle.com, kuba@...nel.org, pabeni@...hat.com,
        netdev@...r.kernel.org, linux-imx@....com,
        linux-kernel@...r.kernel.org
Subject: Re: [PATCH net-next] net: fec: add CBS offload support

On Thu, Feb 09, 2023 at 05:24:22PM +0800, wei.fang@....com wrote:
> From: Wei Fang <wei.fang@....com>
> 
> The FEC hardware supports the Credit-based shaper (CBS) which control
> the bandwidth distribution between normal traffic and time-sensitive
> traffic with respect to the total link bandwidth available.
> But notice that the bandwidth allocation of hardware is restricted to
> certain values. Below is the equation which is used to calculate the
> BW (bandwidth) fraction for per class:
> 	BW fraction = 1 / (1 + 512 / idle_slope)
> 
> For values of idle_slope less than 128, idle_slope = 2 ^ n, when n =
> 0,1,2,...,6. For values equal to or greater than 128, idle_slope =
> 128 * m, where m = 1,2,3,...,12.
> Example 1. idle_slope = 64, therefore BW fraction = 0.111.
> Example 2. idle_slope = 128, therefore BW fraction = 0.200.
> 
> Here is an example command to set 200Mbps bandwidth on 1000Mbps port
> for TC 2 and 111Mbps for TC 3.
> tc qdisc add dev eth0 parent root handle 100 mqprio num_tc 3 map \
> 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 queues 1@0 1@1 1@2 hw 0
> tc qdisc replace dev eth0 parent 100:2 cbs idleslope 200000 \
> sendslope -800000 hicredit 153 locredit -1389 offload 1
> tc qdisc replace dev eth0 parent 100:3 cbs idleslope 111000 \
> sendslope -889000 hicredit 90 locredit -892 offload 1
> 
> Signed-off-by: Wei Fang <wei.fang@....com>
> ---
>  drivers/net/ethernet/freescale/fec.h      |   4 +
>  drivers/net/ethernet/freescale/fec_main.c | 106 ++++++++++++++++++++++
>  2 files changed, 110 insertions(+)
> 
> diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
> index 5ba1e0d71c68..ad5f968aa086 100644
> --- a/drivers/net/ethernet/freescale/fec.h
> +++ b/drivers/net/ethernet/freescale/fec.h
> @@ -340,6 +340,10 @@ struct bufdesc_ex {
>  #define RCMR_CMP(X)		(((X) == 1) ? RCMR_CMP_1 : RCMR_CMP_2)
>  #define FEC_TX_BD_FTYPE(X)	(((X) & 0xf) << 20)
>  
> +#define FEC_QOS_TX_SHEME_MASK	GENMASK(2, 0)
> +#define CREDIT_BASED_SCHEME	0
> +#define ROUND_ROBIN_SCHEME	1
> +
>  /* The number of Tx and Rx buffers.  These are allocated from the page
>   * pool.  The code may assume these are power of two, so it it best
>   * to keep them that size.
> diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
> index c73e25f8995e..3bb3a071fa0c 100644
> --- a/drivers/net/ethernet/freescale/fec_main.c
> +++ b/drivers/net/ethernet/freescale/fec_main.c
> @@ -66,6 +66,7 @@
>  #include <linux/mfd/syscon.h>
>  #include <linux/regmap.h>
>  #include <soc/imx/cpuidle.h>
> +#include <net/pkt_sched.h>
>  #include <linux/filter.h>
>  #include <linux/bpf.h>
>  
> @@ -3232,6 +3233,110 @@ static int fec_enet_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd)
>  	return phy_mii_ioctl(phydev, rq, cmd);
>  }
>  
> +static u32 fec_enet_get_idle_slope(u8 bw)
> +{
> +	u32 idle_slope, quotient, msb;
> +
> +	/* Convert bw to hardware idle slope */
> +	idle_slope = (512 * bw) / (100 - bw);
> +
> +	if (idle_slope >= 128) {
> +		/* For values equal to or greater than 128, idle_slope = 128 * m,
> +		 * where m = 1, 2, 3, ...12. Here we use the rounding method.
> +		 */

	Perhaps the following would be clearer?

	 For values greater than or equal to 128,
	 idle_slope is rounded to the nearest multiple of 128.

> +		quotient = idle_slope / 128;
> +		if (idle_slope >= quotient * 128 + 64)
> +			idle_slope = 128 * (quotient + 1);
> +		else
> +			idle_slope = 128 * quotient;

	Maybe there is a helper that does this, but if
	not, perhaps:

	idle_slope = DIV_ROUND_CLOSEST(idle_slope, 128U) * 128U;


> +
> +		goto end;

Maybe return here

> +	}

Or an else here is nicer?

> +
> +	/* For values less than 128, idle_slope = 2 ^ n, where

	Perhaps the following would be clearer?

	 For values less than 128, idle_slope is rounded
	 to the nearest power of 2.

> +	 * n = 0, 1, 2, ...6. Here we use the rounding method.

         n is 7 for input idle_slope around 128 (2^7)

> +	 * So the minimum of idle_slope is 1.
> +	 */
> +	msb = fls(idle_slope);
> +
> +	if (msb == 0 || msb == 1) {
> +		idle_slope = 1;
> +		goto end;
> +	}

nit: maybe this is nicer

	if (msb <= 1)
		return 1;

> +
> +	msb -= 1;
> +	if (idle_slope >= (1 << msb) + (1 << (msb - 1)))
> +		idle_slope = 1 << (msb + 1);
> +	else
> +		idle_slope = 1 << msb;

	In the same vein as the suggestion for the >= 128 case, perhaps:

	u32 d;

	d = BIT(fls(idle_slope));
	idle_slope = DIV_ROUND_CLOSEST(idle_slope, d) * d;

> +
> +end:
> +	return idle_slope;
> +}
> +
> +static int fec_enet_setup_tc_cbs(struct net_device *ndev, void *type_data)
> +{
> +	struct fec_enet_private *fep = netdev_priv(ndev);
> +	struct tc_cbs_qopt_offload *cbs = type_data;
> +	int queue =  cbs->queue;

nit: extra space after '='

> +	u32 speed = fep->speed;
> +	u32 val, idle_slope;
> +	u8 bw;
> +
> +	if (!(fep->quirks & FEC_QUIRK_HAS_AVB))
> +		return -EOPNOTSUPP;
> +
> +	/* Queue 1 for Class A, Queue 2 for Class B, so the ENET must has

nit: s/has/have/

> +	 * three queues.
> +	 */
> +	if (fep->num_tx_queues != FEC_ENET_MAX_TX_QS)
> +		return -EOPNOTSUPP;
> +
> +	/* Queue 0 is not AVB capable */
> +	if (queue <= 0 || queue >= fep->num_tx_queues)
> +		return -EINVAL;
> +
> +	val = readl(fep->hwp + FEC_QOS_SCHEME);
> +	val &= ~FEC_QOS_TX_SHEME_MASK;
> +	if (!cbs->enable) {
> +		val |= ROUND_ROBIN_SCHEME;
> +		writel(val, fep->hwp + FEC_QOS_SCHEME);
> +
> +		return 0;
> +	}
> +
> +	val |= CREDIT_BASED_SCHEME;
> +	writel(val, fep->hwp + FEC_QOS_SCHEME);
> +
> +	/* cbs->idleslope is in kilobits per second. speed is the port rate
> +	 * in megabits per second. So bandwidth ratio bw = (idleslope /
> +	 * (speed * 1000)) * 100, the unit is percentage.
> +	 */

suggestion:

	/* cbs->idleslope is in kilobits per second.
	 * Speed is the port rate in megabits per second.
	 * So bandwidth the ratio, bw, is (idleslope / (speed * 1000)) * 100.
	 * The unit of bw is a percentage.
	 */

> +	bw = cbs->idleslope / (speed * 10UL);
> +	/* bw% can not >= 100% */
> +	if (bw >= 100)
> +		return -EINVAL;

nit: maybe the above calculation and check fits better inside
     fec_enet_get_idle_slope()

> +	idle_slope = fec_enet_get_idle_slope(bw);
> +
> +	val = readl(fep->hwp + FEC_DMA_CFG(queue));
> +	val &= ~IDLE_SLOPE_MASK;
> +	val |= idle_slope & IDLE_SLOPE_MASK;
> +	writel(val, fep->hwp + FEC_DMA_CFG(queue));
> +
> +	return 0;
> +}
> +
> +static int fec_enet_setup_tc(struct net_device *ndev, enum tc_setup_type type,
> +			     void *type_data)
> +{
> +	switch (type) {
> +	case TC_SETUP_QDISC_CBS:
> +		return fec_enet_setup_tc_cbs(ndev, type_data);
> +	default:
> +		return -EOPNOTSUPP;
> +	}
> +}
> +
>  static void fec_enet_free_buffers(struct net_device *ndev)
>  {
>  	struct fec_enet_private *fep = netdev_priv(ndev);
> @@ -3882,6 +3987,7 @@ static const struct net_device_ops fec_netdev_ops = {
>  	.ndo_tx_timeout		= fec_timeout,
>  	.ndo_set_mac_address	= fec_set_mac_address,
>  	.ndo_eth_ioctl		= fec_enet_ioctl,
> +	.ndo_setup_tc	= fec_enet_setup_tc,
>  #ifdef CONFIG_NET_POLL_CONTROLLER
>  	.ndo_poll_controller	= fec_poll_controller,
>  #endif
> -- 
> 2.25.1
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ