linux-kernel - Re: [PATCH V2 net-next] net: fec: add CBS offload support

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ed27795a-f81f-a913-8275-b6f516b4f384@intel.com>
Date:   Mon, 13 Feb 2023 17:04:57 +0100
From:   Alexander Lobakin <alexandr.lobakin@...el.com>
To:     <wei.fang@....com>
CC:     <shenwei.wang@....com>, <xiaoning.wang@....com>,
        <davem@...emloft.net>, <edumazet@...gle.com>, <kuba@...nel.org>,
        <pabeni@...hat.com>, <simon.horman@...igine.com>, <andrew@...n.ch>,
        <netdev@...r.kernel.org>, <linux-imx@....com>,
        <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH V2 net-next] net: fec: add CBS offload support

From: Wei Fang <wei.fang@....com>
Date: Mon, 13 Feb 2023 17:29:12 +0800

> From: Wei Fang <wei.fang@....com>
> 
> The FEC hardware supports the Credit-based shaper (CBS) which control
> the bandwidth distribution between normal traffic and time-sensitive
> traffic with respect to the total link bandwidth available.
> But notice that the bandwidth allocation of hardware is restricted to
> certain values. Below is the equation which is used to calculate the
> BW (bandwidth) fraction for per class:
> 	BW fraction = 1 / (1 + 512 / idle_slope)

[...]

> @@ -571,6 +575,12 @@ struct fec_stop_mode_gpr {
>  	u8 bit;
>  };
>  
> +struct fec_cbs_params {
> +	bool enable[FEC_ENET_MAX_TX_QS];

Maybe smth like

	DECLARE_BITMAP(enabled, FEC_ENET_MAX_TX_QS);

?

> +	int idleslope[FEC_ENET_MAX_TX_QS];
> +	int sendslope[FEC_ENET_MAX_TX_QS];

Can they actually be negative? (probably I'll see it below)

> +};
> +
>  /* The FEC buffer descriptors track the ring buffers.  The rx_bd_base and
>   * tx_bd_base always point to the base of the buffer descriptors.  The
>   * cur_rx and cur_tx point to the currently available buffer.
> @@ -679,6 +689,9 @@ struct fec_enet_private {
>  	/* XDP BPF Program */
>  	struct bpf_prog *xdp_prog;
>  
> +	/* CBS parameters */
> +	struct fec_cbs_params cbs;
> +
>  	u64 ethtool_stats[];
>  };
>  
> diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
> index c73e25f8995e..91394ad05121 100644
> --- a/drivers/net/ethernet/freescale/fec_main.c
> +++ b/drivers/net/ethernet/freescale/fec_main.c
> @@ -66,6 +66,7 @@
>  #include <linux/mfd/syscon.h>
>  #include <linux/regmap.h>
>  #include <soc/imx/cpuidle.h>
> +#include <net/pkt_sched.h>

Some alphabetic order? At least for new files :D

>  #include <linux/filter.h>
>  #include <linux/bpf.h>
>  
> @@ -1023,6 +1024,174 @@ static void fec_enet_reset_skb(struct net_device *ndev)
>  	}
>  }
>  
> +static u32 fec_enet_get_idle_slope(u8 bw)

Just use `u32`, usually there's no reason to use types shorter than
integer on the stack.

> +{
> +	int msb, power;
> +	u32 idle_slope;
> +
> +	if (bw >= 100)
> +		return 0;
> +
> +	/* Convert bw to hardware idle slope */
> +	idle_slope = (512 * bw) / (100 - bw);
> +

Redundant newline. Also first pair of braces is optional and up to you.

> +	if (idle_slope >= 128) {
> +		/* For values greater than or equal to 128, idle_slope
> +		 * rounded to the nearest multiple of 128.
> +		 */

But you can just do

	idle_slope = min(idle_slope, 128U);

and still use the "standard" path below, without the conditional return?
Or even combine it with the code above:

	idle_slope = min((512 * bw) / (100 - bw), 128U);

> +		idle_slope = DIV_ROUND_CLOSEST(idle_slope, 128U) * 128U;
> +
> +		return idle_slope;
> +	}
> +
> +	/* For values less than 128, idle_slope is rounded to
> +	 * nearst power of 2.

Typo, 'nearest'.

> +	 */
> +	if (idle_slope <= 1)
> +		return 1;
> +
> +	msb = __fls(idle_slope);

I think `fls() - 1` is preferred over `__fls()` since it may go UB. And
some checks wouldn't hurt.

> +	power = BIT(msb);

Oh okay. Then rounddown_pow_of_two() is what you're looking for.

	power = rounddown_pow_of_two(idle_slope);

Or even just use one variable, @idle_slope.

> +	idle_slope = DIV_ROUND_CLOSEST(idle_slope, power) * power;
> +
> +	return idle_slope;

You can return DIV_ROUND_ ... right away, without assignning first.
Also, I'm thinking of that this might be a generic helper. We have
roundup() and rounddown(), this could be something like "round_closest()"?

> +}
> +
> +static void fec_enet_set_cbs_idle_slope(struct fec_enet_private *fep)
> +{
> +	u32 bw, val, idle_slope;
> +	int speed = fep->speed;
> +	int idle_slope_sum = 0;
> +	int i;

Can any of them be negative?

> +
> +	if (!speed)
> +		return;
> +
> +	for (i = 1; i < FEC_ENET_MAX_TX_QS; i++) {

So you don't use the zeroth array elements at all? Why having them then?

> +		int port_tx_rate;

(same for type)

> +
> +		/* As defined in IEEE 802.1Q-2014 Section 8.6.8.2 item:
> +		 *       sendslope = idleslope -  port_tx_rate
> +		 * So we need to check whether port_tx_rate is equal to
> +		 * the current link rate.

[...]

> +	for (i = 1; i < FEC_ENET_MAX_TX_QS; i++) {
> +		bw = fep->cbs.idleslope[i] / (speed * 10);
> +		idle_slope = fec_enet_get_idle_slope(bw);
> +
> +		val = readl(fep->hwp + FEC_DMA_CFG(i));
> +		val &= ~IDLE_SLOPE_MASK;
> +		val |= idle_slope & IDLE_SLOPE_MASK;

u32_replace_bits() will do it for you.

> +		writel(val, fep->hwp + FEC_DMA_CFG(i));
> +	}
> +
> +	/* Enable Credit-based shaper. */
> +	val = readl(fep->hwp + FEC_QOS_SCHEME);
> +	val &= ~FEC_QOS_TX_SHEME_MASK;
> +	val |= CREDIT_BASED_SCHEME;

(same)

> +	writel(val, fep->hwp + FEC_QOS_SCHEME);
> +}
> +
> +static int fec_enet_setup_tc_cbs(struct net_device *ndev, void *type_data)
> +{
> +	struct fec_enet_private *fep = netdev_priv(ndev);
> +	struct tc_cbs_qopt_offload *cbs = type_data;
> +	int queue = cbs->queue;
> +	int speed = fep->speed;
> +	int queue2;

(types)

> +
> +	if (!(fep->quirks & FEC_QUIRK_HAS_AVB))
> +		return -EOPNOTSUPP;
> +
> +	/* Queue 1 for Class A, Queue 2 for Class B, so the ENET must
> +	 * have three queues.
> +	 */
> +	if (fep->num_tx_queues != FEC_ENET_MAX_TX_QS)
> +		return -EOPNOTSUPP;
> +
> +	if (!speed) {
> +		netdev_err(ndev, "Link speed is 0!\n");

??? Is this possible? If so, why is it checked only here and why can it
be possible?

> +		return -ECANCELED;

(already mentioned in other review)

> +	}
> +
> +	/* Queue 0 is not AVB capable */
> +	if (queue <= 0 || queue >= fep->num_tx_queues) {

Is `< 0` possible? I realize it's `s32`, just curious.

> +		netdev_err(ndev, "The queue: %d is invalid!\n", queue);

Maybe less emotions? There's no point in having `!` at the end of every
error.

> +		return -EINVAL;
> +	}
> +
> +	if (!cbs->enable) {
> +		u32 val;
> +
> +		val = readl(fep->hwp + FEC_QOS_SCHEME);
> +		val &= ~FEC_QOS_TX_SHEME_MASK;
> +		val |= ROUND_ROBIN_SCHEME;

(u32_replace_bits())

> +		writel(val, fep->hwp + FEC_QOS_SCHEME);
> +
> +		memset(&fep->cbs, 0, sizeof(fep->cbs));
> +
> +		return 0;
> +	}
> +
> +	if (cbs->idleslope - cbs->sendslope != speed * 1000 ||
> +	    cbs->idleslope <= 0 || cbs->sendslope >= 0)

So you check slopes here, why check them above then?

> +		return -EINVAL;
> +
> +	/* Another AVB queue */
> +	queue2 = (queue == 1) ? 2 : 1;

Braces are unneeded.

> +	if (cbs->idleslope + fep->cbs.idleslope[queue2] > speed * 1000) {
> +		netdev_err(ndev,
> +			   "The sum of all idle slope can't exceed link speed!\n");
> +		return -EINVAL;
> +	}
[...]

Thanks,
Olek