netdev - Re: [PATCH] can: m_can: don't enable transceiver when probing

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <zpgew4xinv5bpewthmwpx6igjw5fcajsj55q67k3q35meheywx@ookucxy5wco4>
Date: Sun, 7 Apr 2024 17:48:39 +0200
From: Markus Schneider-Pargmann <msp@...libre.com>
To: Martin Hundebøll <martin@...nix.com>
Cc: Chandrasekar Ramakrishnan <rcsekar@...sung.com>, 
	Marc Kleine-Budde <mkl@...gutronix.de>, Vincent Mailhol <mailhol.vincent@...adoo.fr>, 
	"David S. Miller" <davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>, 
	Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>, linux-can@...r.kernel.org, 
	netdev@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH] can: m_can: don't enable transceiver when probing

Hi Martin,

On Tue, Mar 26, 2024 at 01:26:38PM +0100, Martin Hundebøll wrote:
> The m_can driver sets and clears the CCCR.INIT bit during probe (both
> when testing the NON-ISO bit, and when configuring the chip). After
> clearing the CCCR.INIT bit, the transceiver enters normal mode, where it
> affects the CAN bus (i.e. it ACKs frames). This can cause troubles when
> the m_can node is only used for monitoring the bus, as one cannot setup
> listen-only mode before the device is probed.
> 
> Rework the probe flow, so that the CCCR.INIT bit is only cleared when
> upping the device. First, the tcan4x5x driver is changed to stay in
> standby mode during/after probe. This in turn requires changes when
> setting bits in the CCCR register, as its CSR and CSA bits are always
> high in standby mode.
> 
> Signed-off-by: Martin Hundebøll <martin@...nix.com>
> ---
>  drivers/net/can/m_can/m_can.c         | 129 ++++++++++++++------------
>  drivers/net/can/m_can/tcan4x5x-core.c |  14 ++-
>  2 files changed, 79 insertions(+), 64 deletions(-)
> 
> diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
> index 14b231c4d7ec..1ca245846fcd 100644
> --- a/drivers/net/can/m_can/m_can.c
> +++ b/drivers/net/can/m_can/m_can.c
> @@ -379,38 +379,60 @@ m_can_txe_fifo_read(struct m_can_classdev *cdev, u32 fgi, u32 offset, u32 *val)
>  	return cdev->ops->read_fifo(cdev, addr_offset, val, 1);
>  }
>  
> -static void m_can_config_endisable(struct m_can_classdev *cdev, bool enable)
> +static bool m_can_cccr_wait_bits(struct m_can_classdev *cdev, u32 mask, u32 val)

This function with its arguments reminds me of regmap_update_bits.
m_can_cccr_wait_bits() doesn't sound like a write function IMHO. Maybe
m_can_cccr_update_bits() would be more descriptive. I think the wait
part is not important as it's a hardware detail.

>  {
> -	u32 cccr = m_can_read(cdev, M_CAN_CCCR);
> -	u32 timeout = 10;
> -	u32 val = 0;
> -
> -	/* Clear the Clock stop request if it was set */
> -	if (cccr & CCCR_CSR)
> -		cccr &= ~CCCR_CSR;
> -
> -	if (enable) {
> -		/* enable m_can configuration */
> -		m_can_write(cdev, M_CAN_CCCR, cccr | CCCR_INIT);
> -		udelay(5);
> -		/* CCCR.CCE can only be set/reset while CCCR.INIT = '1' */
> -		m_can_write(cdev, M_CAN_CCCR, cccr | CCCR_INIT | CCCR_CCE);

This old code is explicitly setting CCCR_INIT first and then continues
to set CCE. Maybe it would be good to do the same thing in the new
m_can_config_enable()? As far as I can see CCCR_INIT is never set
explicitly before starting/stopping the device.

> -	} else {
> -		m_can_write(cdev, M_CAN_CCCR, cccr & ~(CCCR_INIT | CCCR_CCE));
> -	}
> +	u32 val_before = m_can_read(cdev, M_CAN_CCCR);
> +	u32 val_after = (val_before & ~mask) | val;
> +	size_t tries = 5;

Why are you reducing the timeout/tries from 10 to 5 here?

> +
> +	if (!(mask & CCCR_INIT) && !(val_before & CCCR_INIT))
> +		dev_warn(cdev->dev,
> +			 "trying to configure device when in normal mode. Expect failures\n");
> +
> +	/* The chip should be in standby mode when changing the CCCR register,
> +	 * and some chips set the CSR and CSA bits when in standby. Furthermore,
> +	 * the CSR and CSA bits should be written as zeros, even when they read
> +	 * ones.
> +	 */
> +	val_after &= ~(CCCR_CSR | CCCR_CSA);
> +
> +	while (tries--) {
> +		u32 val_read;
> +
> +		/* Write the desired value in each try, as setting some bits in
> +		 * the CCCR register require other bits to be set first. E.g.
> +		 * setting the NISO bit requires setting the CCE bit first.
> +		 */

As you state in this comment, CCE has to be set before NISO. Why don't
you do m_can_config_enable() test set NISO and then do
m_can_config_disable()?

> +		m_can_write(cdev, M_CAN_CCCR, val_after);
> +
> +		val_read = m_can_read(cdev, M_CAN_CCCR) & ~(CCCR_CSR | CCCR_CSA);
>  
> -	/* there's a delay for module initialization */
> -	if (enable)
> -		val = CCCR_INIT | CCCR_CCE;
> -
> -	while ((m_can_read(cdev, M_CAN_CCCR) & (CCCR_INIT | CCCR_CCE)) != val) {
> -		if (timeout == 0) {
> -			netdev_warn(cdev->net, "Failed to init module\n");
> -			return;
> -		}
> -		timeout--;
> -		udelay(1);
> +		if (val_read == val_after)
> +			return true;
> +
> +		usleep_range(1, 5);
>  	}
> +
> +	return false;
> +}
> +
> +static void m_can_config_enable(struct m_can_classdev *cdev)
> +{
> +	/* CCCR_INIT must be set in order to set CCCR_CCE, but access to
> +	 * configuration registers should only be enabled when in standby mode,
> +	 * where CCCR_INIT is always set.
> +	 */
> +	if (!m_can_cccr_wait_bits(cdev, CCCR_CCE, CCCR_CCE))
> +		netdev_err(cdev->net, "failed to enable configuration mode\n");
> +}
> +
> +static void m_can_config_disable(struct m_can_classdev *cdev)
> +{
> +	/* Only clear CCCR_CCE, since CCCR_INIT cannot be cleared while in
> +	 * standby mode
> +	 */
> +	if (!m_can_cccr_wait_bits(cdev, CCCR_CCE, 0))
> +		netdev_err(cdev->net, "failed to disable configuration registers\n");
>  }
>  
>  static void m_can_interrupt_enable(struct m_can_classdev *cdev, u32 interrupts)
> @@ -1403,7 +1425,7 @@ static int m_can_chip_config(struct net_device *dev)
>  	interrupts &= ~(IR_ARA | IR_ELO | IR_DRX | IR_TEFF | IR_TFE | IR_TCF |
>  			IR_HPM | IR_RF1F | IR_RF1W | IR_RF1N | IR_RF0F);
>  
> -	m_can_config_endisable(cdev, true);
> +	m_can_config_enable(cdev);
>  
>  	/* RX Buffer/FIFO Element Size 64 bytes data field */
>  	m_can_write(cdev, M_CAN_RXESC,
> @@ -1521,7 +1543,7 @@ static int m_can_chip_config(struct net_device *dev)
>  		    FIELD_PREP(TSCC_TCP_MASK, 0xf) |
>  		    FIELD_PREP(TSCC_TSS_MASK, TSCC_TSS_INTERNAL));
>  
> -	m_can_config_endisable(cdev, false);
> +	m_can_config_disable(cdev);
>  
>  	if (cdev->ops->init)
>  		cdev->ops->init(cdev);
> @@ -1544,12 +1566,15 @@ static int m_can_start(struct net_device *dev)
>  
>  	cdev->can.state = CAN_STATE_ERROR_ACTIVE;
>  
> -	m_can_enable_all_interrupts(cdev);
> -
>  	if (cdev->version > 30)
>  		cdev->tx_fifo_putidx = FIELD_GET(TXFQS_TFQPI_MASK,
>  						 m_can_read(cdev, M_CAN_TXFQS));
>  
> +	m_can_enable_all_interrupts(cdev);

Why do you move m_can_enable_all_interrupts() down?

> +
> +	if (!m_can_cccr_wait_bits(cdev, CCCR_INIT, 0))
> +		netdev_err(dev, "failed to enter normal mode\n");

Is this a hard error? Should the start be aborted here?

> +
>  	return 0;
>  }
>  
> @@ -1603,33 +1628,17 @@ static int m_can_check_core_release(struct m_can_classdev *cdev)
>   */
>  static bool m_can_niso_supported(struct m_can_classdev *cdev)
>  {
> -	u32 cccr_reg, cccr_poll = 0;
> -	int niso_timeout = -ETIMEDOUT;
> -	int i;
> +	bool niso_supported;
>  
> -	m_can_config_endisable(cdev, true);
> -	cccr_reg = m_can_read(cdev, M_CAN_CCCR);
> -	cccr_reg |= CCCR_NISO;
> -	m_can_write(cdev, M_CAN_CCCR, cccr_reg);
> +	/* First try to set the NISO bit (which requires the CCE bit too. */
> +	niso_supported = m_can_cccr_wait_bits(cdev, CCCR_NISO | CCCR_CCE,
> +					      CCCR_NISO | CCCR_CCE);
>  
> -	for (i = 0; i <= 10; i++) {
> -		cccr_poll = m_can_read(cdev, M_CAN_CCCR);
> -		if (cccr_poll == cccr_reg) {
> -			niso_timeout = 0;
> -			break;
> -		}
> +	/* Then clear the two bits again. */
> +	if (!m_can_cccr_wait_bits(cdev, CCCR_NISO | CCCR_CCE, 0))
> +		dev_err(cdev->dev, "failed to revert the NON-ISO bit in CCCR\n");
>  
> -		usleep_range(1, 5);
> -	}
> -
> -	/* Clear NISO */
> -	cccr_reg &= ~(CCCR_NISO);
> -	m_can_write(cdev, M_CAN_CCCR, cccr_reg);
> -
> -	m_can_config_endisable(cdev, false);
> -
> -	/* return false if time out (-ETIMEDOUT), else return true */
> -	return !niso_timeout;
> +	return niso_supported;
>  }
>  
>  static int m_can_dev_setup(struct m_can_classdev *cdev)
> @@ -1694,9 +1703,6 @@ static int m_can_dev_setup(struct m_can_classdev *cdev)
>  		return -EINVAL;
>  	}
>  
> -	if (cdev->ops->init)
> -		cdev->ops->init(cdev);
> -
>  	return 0;
>  }
>  
> @@ -1708,7 +1714,8 @@ static void m_can_stop(struct net_device *dev)
>  	m_can_disable_all_interrupts(cdev);
>  
>  	/* Set init mode to disengage from the network */
> -	m_can_config_endisable(cdev, true);
> +	if (!m_can_cccr_wait_bits(cdev, CCCR_INIT, CCCR_INIT))
> +		netdev_err(dev, "failed to enter standby mode\n");
>  
>  	/* set the state as STOPPED */
>  	cdev->can.state = CAN_STATE_STOPPED;
> diff --git a/drivers/net/can/m_can/tcan4x5x-core.c b/drivers/net/can/m_can/tcan4x5x-core.c
> index a42600dac70d..c9937dc0d700 100644
> --- a/drivers/net/can/m_can/tcan4x5x-core.c
> +++ b/drivers/net/can/m_can/tcan4x5x-core.c
> @@ -453,10 +453,18 @@ static int tcan4x5x_can_probe(struct spi_device *spi)
>  		goto out_power;
>  	}
>  
> -	ret = tcan4x5x_init(mcan_class);
> +	tcan4x5x_check_wake(priv);
> +
> +	ret = tcan4x5x_write_tcan_reg(mcan_class, TCAN4X5X_INT_EN, 0);
>  	if (ret) {
> -		dev_err(&spi->dev, "tcan initialization failed %pe\n",
> -			ERR_PTR(ret));
> +		dev_err(&spi->dev, "Disabling interrupts failed %pe\n", ERR_PTR(ret));
> +		goto out_power;
> +	}
> +
> +	ret = tcan4x5x_write_tcan_reg(mcan_class, TCAN4X5X_INT_FLAGS,
> +				      TCAN4X5X_CLEAR_ALL_INT);

Why don't you use tcan4x5x_clear_interrupts() here?

Best,
Markus

> +	if (ret) {
> +		dev_err(&spi->dev, "Clearing interrupts failed %pe\n", ERR_PTR(ret));
>  		goto out_power;
>  	}
>  
> -- 
> 2.44.0
>