linux-kernel - Re: [PATCH v1 3/3] PM: sleep: Fix possible deadlocks in core system-wide PM code

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAPDyKFohQjDzpYpm0QQLM4eTzGOzGfDNKUGHHC-niPBOrtR8BQ@mail.gmail.com>
Date: Tue, 2 Jan 2024 14:35:06 +0100
From: Ulf Hansson <ulf.hansson@...aro.org>
To: "Rafael J. Wysocki" <rjw@...ysocki.net>
Cc: Greg KH <gregkh@...uxfoundation.org>, linux-pm@...r.kernel.org, 
	Youngmin Nam <youngmin.nam@...sung.com>, rafael@...nel.org, linux-kernel@...r.kernel.org, 
	d7271.choe@...sung.com, janghyuck.kim@...sung.com, hyesoo.yu@...sung.com, 
	Alan Stern <stern@...land.harvard.edu>
Subject: Re: [PATCH v1 3/3] PM: sleep: Fix possible deadlocks in core
 system-wide PM code

On Wed, 27 Dec 2023 at 21:41, Rafael J. Wysocki <rjw@...ysocki.net> wrote:
>
> From: Rafael J. Wysocki <rafael.j.wysocki@...el.com>
>
> It is reported that in low-memory situations the system-wide resume core
> code deadlocks, because async_schedule_dev() executes its argument
> function synchronously if it cannot allocate memory (an not only then)
> and that function attempts to acquire a mutex that is already held.
>
> Address this by changing the code in question to use
> async_schedule_dev_nocall() for scheduling the asynchronous
> execution of device suspend and resume functions and to directly
> run them synchronously if async_schedule_dev_nocall() returns false.
>
> Fixes: 09beebd8f93b ("PM: sleep: core: Switch back to async_schedule_dev()")
> Link: https://lore.kernel.org/linux-pm/ZYvjiqX6EsL15moe@perf/
> Reported-by: Youngmin Nam <youngmin.nam@...sung.com>
> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@...el.com>
> ---
>
> The commit pointed to by the Fixes: tag is the last one that modified
> the code in question, even though the bug had been there already before.
>
> Still, the fix will not apply to the code before that commit.

An option could be to just do "Cc: stable@...r.kernel.org # v5.7+"
instead of pointing to a commit with a Fixes tag.

>
> ---
>  drivers/base/power/main.c |  148 +++++++++++++++++++++-------------------------
>  1 file changed, 68 insertions(+), 80 deletions(-)
>
> Index: linux-pm/drivers/base/power/main.c
> ===================================================================
> --- linux-pm.orig/drivers/base/power/main.c
> +++ linux-pm/drivers/base/power/main.c
> @@ -579,7 +579,7 @@ bool dev_pm_skip_resume(struct device *d
>  }
>
>  /**
> - * device_resume_noirq - Execute a "noirq resume" callback for given device.
> + * __device_resume_noirq - Execute a "noirq resume" callback for given device.
>   * @dev: Device to handle.
>   * @state: PM transition of the system being carried out.
>   * @async: If true, the device is being resumed asynchronously.
> @@ -587,7 +587,7 @@ bool dev_pm_skip_resume(struct device *d
>   * The driver of @dev will not receive interrupts while this function is being
>   * executed.
>   */
> -static int device_resume_noirq(struct device *dev, pm_message_t state, bool async)
> +static void __device_resume_noirq(struct device *dev, pm_message_t state, bool async)
>  {
>         pm_callback_t callback = NULL;
>         const char *info = NULL;
> @@ -655,7 +655,13 @@ Skip:
>  Out:
>         complete_all(&dev->power.completion);
>         TRACE_RESUME(error);
> -       return error;
> +
> +       if (error) {
> +               suspend_stats.failed_resume_noirq++;
> +               dpm_save_failed_step(SUSPEND_RESUME_NOIRQ);
> +               dpm_save_failed_dev(dev_name(dev));
> +               pm_dev_err(dev, state, async ? " async noirq" : " noirq", error);
> +       }
>  }
>
>  static bool is_async(struct device *dev)
> @@ -668,11 +674,15 @@ static bool dpm_async_fn(struct device *
>  {
>         reinit_completion(&dev->power.completion);
>
> -       if (is_async(dev)) {
> -               get_device(dev);
> -               async_schedule_dev(func, dev);
> +       if (!is_async(dev))
> +               return false;
> +
> +       get_device(dev);
> +
> +       if (async_schedule_dev_nocall(func, dev))
>                 return true;
> -       }
> +
> +       put_device(dev);
>
>         return false;
>  }
> @@ -680,15 +690,19 @@ static bool dpm_async_fn(struct device *
>  static void async_resume_noirq(void *data, async_cookie_t cookie)
>  {
>         struct device *dev = data;
> -       int error;
> -
> -       error = device_resume_noirq(dev, pm_transition, true);
> -       if (error)
> -               pm_dev_err(dev, pm_transition, " async", error);
>
> +       __device_resume_noirq(dev, pm_transition, true);
>         put_device(dev);
>  }
>
> +static void device_resume_noirq(struct device *dev)
> +{
> +       if (dpm_async_fn(dev, async_resume_noirq))
> +               return;
> +
> +       __device_resume_noirq(dev, pm_transition, false);
> +}
> +
>  static void dpm_noirq_resume_devices(pm_message_t state)
>  {
>         struct device *dev;
> @@ -698,14 +712,6 @@ static void dpm_noirq_resume_devices(pm_
>         mutex_lock(&dpm_list_mtx);
>         pm_transition = state;
>
> -       /*
> -        * Advanced the async threads upfront,
> -        * in case the starting of async threads is
> -        * delayed by non-async resuming devices.
> -        */
> -       list_for_each_entry(dev, &dpm_noirq_list, power.entry)
> -               dpm_async_fn(dev, async_resume_noirq);
> -

If I understand correctly, this means that we are no longer going to
run the async devices upfront, right?

Depending on how devices get ordered in the dpm_noirq_list, it sounds
like the above could have a negative impact on the total resume time!?
Of course, if all devices would be async capable this wouldn't be a
problem...

>         while (!list_empty(&dpm_noirq_list)) {
>                 dev = to_device(dpm_noirq_list.next);
>                 get_device(dev);
> @@ -713,17 +719,7 @@ static void dpm_noirq_resume_devices(pm_
>
>                 mutex_unlock(&dpm_list_mtx);
>
> -               if (!is_async(dev)) {
> -                       int error;
> -
> -                       error = device_resume_noirq(dev, state, false);
> -                       if (error) {
> -                               suspend_stats.failed_resume_noirq++;
> -                               dpm_save_failed_step(SUSPEND_RESUME_NOIRQ);
> -                               dpm_save_failed_dev(dev_name(dev));
> -                               pm_dev_err(dev, state, " noirq", error);
> -                       }
> -               }
> +               device_resume_noirq(dev);
>
>                 put_device(dev);
>
> @@ -751,14 +747,14 @@ void dpm_resume_noirq(pm_message_t state
>  }
>
>  /**
> - * device_resume_early - Execute an "early resume" callback for given device.
> + * __device_resume_early - Execute an "early resume" callback for given device.
>   * @dev: Device to handle.
>   * @state: PM transition of the system being carried out.
>   * @async: If true, the device is being resumed asynchronously.
>   *
>   * Runtime PM is disabled for @dev while this function is being executed.
>   */
> -static int device_resume_early(struct device *dev, pm_message_t state, bool async)
> +static void __device_resume_early(struct device *dev, pm_message_t state, bool async)
>  {
>         pm_callback_t callback = NULL;
>         const char *info = NULL;
> @@ -811,21 +807,31 @@ Out:
>
>         pm_runtime_enable(dev);
>         complete_all(&dev->power.completion);
> -       return error;
> +
> +       if (error) {
> +               suspend_stats.failed_resume_early++;
> +               dpm_save_failed_step(SUSPEND_RESUME_EARLY);
> +               dpm_save_failed_dev(dev_name(dev));
> +               pm_dev_err(dev, state, async ? " async early" : " early", error);
> +       }
>  }
>
>  static void async_resume_early(void *data, async_cookie_t cookie)
>  {
>         struct device *dev = data;
> -       int error;
> -
> -       error = device_resume_early(dev, pm_transition, true);
> -       if (error)
> -               pm_dev_err(dev, pm_transition, " async", error);
>
> +       __device_resume_early(dev, pm_transition, true);
>         put_device(dev);
>  }
>
> +static void device_resume_early(struct device *dev)
> +{
> +       if (dpm_async_fn(dev, async_resume_early))
> +               return;
> +
> +       __device_resume_early(dev, pm_transition, false);
> +}
> +
>  /**
>   * dpm_resume_early - Execute "early resume" callbacks for all devices.
>   * @state: PM transition of the system being carried out.
> @@ -839,14 +845,6 @@ void dpm_resume_early(pm_message_t state
>         mutex_lock(&dpm_list_mtx);
>         pm_transition = state;
>
> -       /*
> -        * Advanced the async threads upfront,
> -        * in case the starting of async threads is
> -        * delayed by non-async resuming devices.
> -        */
> -       list_for_each_entry(dev, &dpm_late_early_list, power.entry)
> -               dpm_async_fn(dev, async_resume_early);
> -

Ditto.

>         while (!list_empty(&dpm_late_early_list)) {
>                 dev = to_device(dpm_late_early_list.next);
>                 get_device(dev);
> @@ -854,17 +852,7 @@ void dpm_resume_early(pm_message_t state
>
>                 mutex_unlock(&dpm_list_mtx);
>
> -               if (!is_async(dev)) {
> -                       int error;
> -
> -                       error = device_resume_early(dev, state, false);
> -                       if (error) {
> -                               suspend_stats.failed_resume_early++;
> -                               dpm_save_failed_step(SUSPEND_RESUME_EARLY);
> -                               dpm_save_failed_dev(dev_name(dev));
> -                               pm_dev_err(dev, state, " early", error);
> -                       }
> -               }
> +               device_resume_early(dev);
>
>                 put_device(dev);
>
> @@ -888,12 +876,12 @@ void dpm_resume_start(pm_message_t state
>  EXPORT_SYMBOL_GPL(dpm_resume_start);
>
>  /**
> - * device_resume - Execute "resume" callbacks for given device.
> + * __device_resume - Execute "resume" callbacks for given device.
>   * @dev: Device to handle.
>   * @state: PM transition of the system being carried out.
>   * @async: If true, the device is being resumed asynchronously.
>   */
> -static int device_resume(struct device *dev, pm_message_t state, bool async)
> +static void __device_resume(struct device *dev, pm_message_t state, bool async)
>  {
>         pm_callback_t callback = NULL;
>         const char *info = NULL;
> @@ -975,20 +963,30 @@ static int device_resume(struct device *
>
>         TRACE_RESUME(error);
>
> -       return error;
> +       if (error) {
> +               suspend_stats.failed_resume++;
> +               dpm_save_failed_step(SUSPEND_RESUME);
> +               dpm_save_failed_dev(dev_name(dev));
> +               pm_dev_err(dev, state, async ? " async" : "", error);
> +       }
>  }
>
>  static void async_resume(void *data, async_cookie_t cookie)
>  {
>         struct device *dev = data;
> -       int error;
>
> -       error = device_resume(dev, pm_transition, true);
> -       if (error)
> -               pm_dev_err(dev, pm_transition, " async", error);
> +       __device_resume(dev, pm_transition, true);
>         put_device(dev);
>  }
>
> +static void device_resume(struct device *dev)
> +{
> +       if (dpm_async_fn(dev, async_resume))
> +               return;
> +
> +       __device_resume(dev, pm_transition, false);
> +}
> +
>  /**
>   * dpm_resume - Execute "resume" callbacks for non-sysdev devices.
>   * @state: PM transition of the system being carried out.
> @@ -1008,27 +1006,17 @@ void dpm_resume(pm_message_t state)
>         pm_transition = state;
>         async_error = 0;
>
> -       list_for_each_entry(dev, &dpm_suspended_list, power.entry)
> -               dpm_async_fn(dev, async_resume);
> -

Ditto.

>         while (!list_empty(&dpm_suspended_list)) {
>                 dev = to_device(dpm_suspended_list.next);
> +
>                 get_device(dev);
> -               if (!is_async(dev)) {
> -                       int error;
>
> -                       mutex_unlock(&dpm_list_mtx);
> +               mutex_unlock(&dpm_list_mtx);
>
> -                       error = device_resume(dev, state, false);
> -                       if (error) {
> -                               suspend_stats.failed_resume++;
> -                               dpm_save_failed_step(SUSPEND_RESUME);
> -                               dpm_save_failed_dev(dev_name(dev));
> -                               pm_dev_err(dev, state, "", error);
> -                       }
> +               device_resume(dev);
> +
> +               mutex_lock(&dpm_list_mtx);
>
> -                       mutex_lock(&dpm_list_mtx);
> -               }
>                 if (!list_empty(&dev->power.entry))
>                         list_move_tail(&dev->power.entry, &dpm_prepared_list);
>

Other than the potential issue I pointed out, the code as such looks good to me!

Kind regards
Uffe