linux-kernel - Re: [PATCH v2 3/3] remoteproc: k3-r5: Refactor sequential core power up/down operations

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <beffe56a-ae72-4e51-b574-0126e93f775a@ti.com>
Date: Fri, 3 Jan 2025 16:18:14 +0530
From: "Kumar, Udit" <u-kumar1@...com>
To: Beleswar Padhi <b-padhi@...com>, <andersson@...nel.org>,
        <mathieu.poirier@...aro.org>
CC: <afd@...com>, <hnagalla@...com>, <s-vadapalli@...com>, <srk@...com>,
        <jan.kiszka@...mens.com>, <christophe.jaillet@...adoo.fr>,
        <jkangas@...hat.com>, <eballetbo@...hat.com>,
        <linux-remoteproc@...r.kernel.org>, <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH v2 3/3] remoteproc: k3-r5: Refactor sequential core power
 up/down operations


On 12/24/2024 2:44 PM, Beleswar Padhi wrote:
> The existing implementation of the waiting mechanism in
> "k3_r5_cluster_rproc_init()" waits for the "released_from_reset" flag to
> be set as part of the firmware boot process in "k3_r5_rproc_start()".
> The "k3_r5_cluster_rproc_init()" function is invoked in the probe
> routine which causes unexpected failures in cases where the firmware is
> unavailable at boot time, resulting in probe failure and removal of the
> remoteproc handles in the sysfs paths.
>
> To address this, the waiting mechanism is refactored out of the probe
> routine into the appropriate "k3_r5_rproc_{prepare/unprepare}()"
> functions. This allows the probe routine to complete without depending
> on firmware booting, while still maintaining the required
> power-synchronization between cores.
>
> Further, this wait mechanism is dropped from
> "k3_r5_rproc_{start/stop}()" functions as they deal with Core Run/Halt
> operations, and as such, there is no constraint in Running or Halting
> the cores of a cluster in order.
>
> Fixes: 61f6f68447ab ("remoteproc: k3-r5: Wait for core0 power-up before powering up core1")
> Signed-off-by: Beleswar Padhi <b-padhi@...com>
> ---
> Link to v1:
> https://lore.kernel.org/all/20240906094045.2428977-1-b-padhi@ti.com/
>
> v2: Changelog:
> 1. Improved commit message to call out functions correctly. [Mathieu]
> 2. Removed sequential wait/checks from .start()/.stop() ops as there is no
> constraint for Core Run/Halt operations.
>
>   drivers/remoteproc/ti_k3_r5_remoteproc.c | 114 ++++++++++++-----------
>   1 file changed, 61 insertions(+), 53 deletions(-)
>
> diff --git a/drivers/remoteproc/ti_k3_r5_remoteproc.c b/drivers/remoteproc/ti_k3_r5_remoteproc.c
> index e218a803fdb5..15e5a10801cd 100644
> --- a/drivers/remoteproc/ti_k3_r5_remoteproc.c
> +++ b/drivers/remoteproc/ti_k3_r5_remoteproc.c
> @@ -456,13 +456,36 @@ static int k3_r5_rproc_prepare(struct rproc *rproc)
>   {
>   	struct k3_r5_rproc *kproc = rproc->priv;
>   	struct k3_r5_cluster *cluster = kproc->cluster;
> -	struct k3_r5_core *core = kproc->core;
> +	struct k3_r5_core *core = kproc->core, *core0, *core1;
>   	struct device *dev = kproc->dev;
>   	u32 ctrl = 0, cfg = 0, stat = 0;
>   	u64 boot_vec = 0;
>   	bool mem_init_dis;
>   	int ret;
>   
> +	/*
> +	 * R5 cores require to be powered on sequentially, core0 should be in
> +	 * higher power state than core1 in a cluster. So, wait for core0 to
> +	 * power up before proceeding to core1 and put timeout of 2sec. This
> +	 * waiting mechanism is necessary because rproc_auto_boot_callback() for
> +	 * core1 can be called before core0 due to thread execution order.
> +	 *
> +	 * By placing the wait mechanism here in .prepare() ops, this condition
> +	 * is enforced for rproc boot requests from sysfs as well.
> +	 */
> +	core0 = list_first_entry(&cluster->cores, struct k3_r5_core, elem);
> +	core1 = list_last_entry(&cluster->cores, struct k3_r5_core, elem);
> +	if (cluster->mode == CLUSTER_MODE_SPLIT && core == core1 &&
> +	    !core0->released_from_reset) {
> +		ret = wait_event_interruptible_timeout(cluster->core_transition,
> +						       core0->released_from_reset,
> +						       msecs_to_jiffies(2000));
> +		if (ret <= 0) {
> +			dev_err(dev, "can not power up core1 before core0");
> +			return -EPERM;
> +		}
> +	}
> +
>   	ret = ti_sci_proc_get_status(core->tsp, &boot_vec, &cfg, &ctrl, &stat);
>   	if (ret < 0)
>   		return ret;
> @@ -478,6 +501,13 @@ static int k3_r5_rproc_prepare(struct rproc *rproc)
>   		return ret;
>   	}
>   
> +	/*
> +	 * Notify all threads in the wait queue when core state has changed so
> +	 * that threads waiting for this condition can be executed.
> +	 */
> +	core->released_from_reset = true;
> +	wake_up_interruptible(&cluster->core_transition);

I think, you should signal this only for core-0,


> +
>   	/*
>   	 * Newer IP revisions like on J7200 SoCs support h/w auto-initialization
>   	 * of TCMs, so there is no need to perform the s/w memzero. This bit is
> @@ -523,10 +553,30 @@ static int k3_r5_rproc_unprepare(struct rproc *rproc)
>   {
>   	struct k3_r5_rproc *kproc = rproc->priv;
>   	struct k3_r5_cluster *cluster = kproc->cluster;
> -	struct k3_r5_core *core = kproc->core;
> +	struct k3_r5_core *core = kproc->core, *core0, *core1;
>   	struct device *dev = kproc->dev;
>   	int ret;
>   
> +	/*
> +	 * Ensure power-down of cores is sequential in split mode. Core1 must
> +	 * power down before Core0 to maintain the expected state. By placing
> +	 * the wait mechanism here in .unprepare() ops, this condition is
> +	 * enforced for rproc stop or shutdown requests from sysfs and device
> +	 * removal as well.
> +	 */
> +	core0 = list_first_entry(&cluster->cores, struct k3_r5_core, elem);
> +	core1 = list_last_entry(&cluster->cores, struct k3_r5_core, elem);
> +	if (cluster->mode == CLUSTER_MODE_SPLIT && core == core0 &&
> +	    core1->released_from_reset) {
> +		ret = wait_event_interruptible_timeout(cluster->core_transition,
> +						       !core1->released_from_reset,
> +						       msecs_to_jiffies(2000));
> +		if (ret <= 0) {
> +			dev_err(dev, "can not power down core0 before core1");
> +			return -EPERM;
> +		}
> +	}
> +
>   	/* Re-use LockStep-mode reset logic for Single-CPU mode */
>   	ret = (cluster->mode == CLUSTER_MODE_LOCKSTEP ||
>   	       cluster->mode == CLUSTER_MODE_SINGLECPU) ?
> @@ -534,6 +584,13 @@ static int k3_r5_rproc_unprepare(struct rproc *rproc)
>   	if (ret)
>   		dev_err(dev, "unable to disable cores, ret = %d\n", ret);
>   
> +	/*
> +	 * Notify all threads in the wait queue when core state has changed so
> +	 * that threads waiting for this condition can be executed.
> +	 */
> +	core->released_from_reset = false;
> +	wake_up_interruptible(&cluster->core_transition);
> +
>   	return ret;
>   }
>   
> @@ -559,7 +616,7 @@ static int k3_r5_rproc_start(struct rproc *rproc)
>   	struct k3_r5_rproc *kproc = rproc->priv;
>   	struct k3_r5_cluster *cluster = kproc->cluster;
>   	struct device *dev = kproc->dev;
> -	struct k3_r5_core *core0, *core;
> +	struct k3_r5_core *core;
>   	u32 boot_addr;
>   	int ret;
>   
> @@ -581,21 +638,9 @@ static int k3_r5_rproc_start(struct rproc *rproc)
>   				goto unroll_core_run;
>   		}
>   	} else {
> -		/* do not allow core 1 to start before core 0 */
> -		core0 = list_first_entry(&cluster->cores, struct k3_r5_core,
> -					 elem);
> -		if (core != core0 && core0->rproc->state == RPROC_OFFLINE) {
> -			dev_err(dev, "%s: can not start core 1 before core 0\n",
> -				__func__);
> -			return -EPERM;
> -		}
> -
>   		ret = k3_r5_core_run(core);
>   		if (ret)
>   			return ret;
> -
> -		core->released_from_reset = true;
> -		wake_up_interruptible(&cluster->core_transition);
>   	}
>   
>   	return 0;
> @@ -636,8 +681,7 @@ static int k3_r5_rproc_stop(struct rproc *rproc)
>   {
>   	struct k3_r5_rproc *kproc = rproc->priv;
>   	struct k3_r5_cluster *cluster = kproc->cluster;
> -	struct device *dev = kproc->dev;
> -	struct k3_r5_core *core1, *core = kproc->core;
> +	struct k3_r5_core *core = kproc->core;
>   	int ret;
>   
>   	/* halt all applicable cores */
> @@ -650,16 +694,6 @@ static int k3_r5_rproc_stop(struct rproc *rproc)
>   			}
>   		}
>   	} else {
> -		/* do not allow core 0 to stop before core 1 */
> -		core1 = list_last_entry(&cluster->cores, struct k3_r5_core,
> -					elem);
> -		if (core != core1 && core1->rproc->state != RPROC_OFFLINE) {
> -			dev_err(dev, "%s: can not stop core 0 before core 1\n",
> -				__func__);
> -			ret = -EPERM;
> -			goto out;
> -		}
> -
>   		ret = k3_r5_core_halt(core);
>   		if (ret)
>   			goto out;
> @@ -1154,12 +1188,6 @@ static int k3_r5_rproc_configure_mode(struct k3_r5_rproc *kproc)
>   		return reset_ctrl_status;
>   	}
>   
> -	/*
> -	 * Skip the waiting mechanism for sequential power-on of cores if the
> -	 * core has already been booted by another entity.
> -	 */
> -	core->released_from_reset = c_state;
> -
>   	ret = ti_sci_proc_get_status(core->tsp, &boot_vec, &cfg, &ctrl,
>   				     &stat);
>   	if (ret < 0) {
> @@ -1304,26 +1332,6 @@ static int k3_r5_cluster_rproc_init(struct platform_device *pdev)
>   		    cluster->mode == CLUSTER_MODE_SINGLECPU ||
>   		    cluster->mode == CLUSTER_MODE_SINGLECORE)
>   			break;
> -
> -		/*
> -		 * R5 cores require to be powered on sequentially, core0
> -		 * should be in higher power state than core1 in a cluster
> -		 * So, wait for current core to power up before proceeding
> -		 * to next core and put timeout of 2sec for each core.
> -		 *
> -		 * This waiting mechanism is necessary because
> -		 * rproc_auto_boot_callback() for core1 can be called before
> -		 * core0 due to thread execution order.
> -		 */
> -		ret = wait_event_interruptible_timeout(cluster->core_transition,
> -						       core->released_from_reset,
> -						       msecs_to_jiffies(2000));
> -		if (ret <= 0) {
> -			dev_err(dev,
> -				"Timed out waiting for %s core to power up!\n",
> -				rproc->name);
> -			goto out;
> -		}
>   	}
>   
>   	return 0;