linux-kernel - Re: [PATCH v1 3/3] PM: sleep: Fix possible deadlocks in core system-wide PM code

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJZ5v0jPtYFk=pPE63CGGL0kuc+N_bZqKdgBMC=PMrwqzHBDTg@mail.gmail.com>
Date: Tue, 2 Jan 2024 14:53:59 +0100
From: "Rafael J. Wysocki" <rafael@...nel.org>
To: Ulf Hansson <ulf.hansson@...aro.org>
Cc: "Rafael J. Wysocki" <rjw@...ysocki.net>, Greg KH <gregkh@...uxfoundation.org>, 
	linux-pm@...r.kernel.org, Youngmin Nam <youngmin.nam@...sung.com>, rafael@...nel.org, 
	linux-kernel@...r.kernel.org, d7271.choe@...sung.com, 
	janghyuck.kim@...sung.com, hyesoo.yu@...sung.com, 
	Alan Stern <stern@...land.harvard.edu>
Subject: Re: [PATCH v1 3/3] PM: sleep: Fix possible deadlocks in core
 system-wide PM code

On Tue, Jan 2, 2024 at 2:35 PM Ulf Hansson <ulf.hansson@...aro.org> wrote:
>
> On Wed, 27 Dec 2023 at 21:41, Rafael J. Wysocki <rjw@...ysocki.net> wrote:
> >
> > From: Rafael J. Wysocki <rafael.j.wysocki@...el.com>
> >
> > It is reported that in low-memory situations the system-wide resume core
> > code deadlocks, because async_schedule_dev() executes its argument
> > function synchronously if it cannot allocate memory (an not only then)
> > and that function attempts to acquire a mutex that is already held.
> >
> > Address this by changing the code in question to use
> > async_schedule_dev_nocall() for scheduling the asynchronous
> > execution of device suspend and resume functions and to directly
> > run them synchronously if async_schedule_dev_nocall() returns false.
> >
> > Fixes: 09beebd8f93b ("PM: sleep: core: Switch back to async_schedule_dev()")
> > Link: https://lore.kernel.org/linux-pm/ZYvjiqX6EsL15moe@perf/
> > Reported-by: Youngmin Nam <youngmin.nam@...sung.com>
> > Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@...el.com>
> > ---
> >
> > The commit pointed to by the Fixes: tag is the last one that modified
> > the code in question, even though the bug had been there already before.
> >
> > Still, the fix will not apply to the code before that commit.
>
> An option could be to just do "Cc: stable@...r.kernel.org # v5.7+"
> instead of pointing to a commit with a Fixes tag.

Right, but one can argue that every commit with a "Cc: stable" tag is
a fix, so it should carry a Fixes: tag too anyway.

> >
> > ---
> >  drivers/base/power/main.c |  148 +++++++++++++++++++++-------------------------
> >  1 file changed, 68 insertions(+), 80 deletions(-)
> >
> > Index: linux-pm/drivers/base/power/main.c
> > ===================================================================
> > --- linux-pm.orig/drivers/base/power/main.c
> > +++ linux-pm/drivers/base/power/main.c
> > @@ -579,7 +579,7 @@ bool dev_pm_skip_resume(struct device *d
> >  }
> >
> >  /**
> > - * device_resume_noirq - Execute a "noirq resume" callback for given device.
> > + * __device_resume_noirq - Execute a "noirq resume" callback for given device.
> >   * @dev: Device to handle.
> >   * @state: PM transition of the system being carried out.
> >   * @async: If true, the device is being resumed asynchronously.
> > @@ -587,7 +587,7 @@ bool dev_pm_skip_resume(struct device *d
> >   * The driver of @dev will not receive interrupts while this function is being
> >   * executed.
> >   */
> > -static int device_resume_noirq(struct device *dev, pm_message_t state, bool async)
> > +static void __device_resume_noirq(struct device *dev, pm_message_t state, bool async)
> >  {
> >         pm_callback_t callback = NULL;
> >         const char *info = NULL;
> > @@ -655,7 +655,13 @@ Skip:
> >  Out:
> >         complete_all(&dev->power.completion);
> >         TRACE_RESUME(error);
> > -       return error;
> > +
> > +       if (error) {
> > +               suspend_stats.failed_resume_noirq++;
> > +               dpm_save_failed_step(SUSPEND_RESUME_NOIRQ);
> > +               dpm_save_failed_dev(dev_name(dev));
> > +               pm_dev_err(dev, state, async ? " async noirq" : " noirq", error);
> > +       }
> >  }
> >
> >  static bool is_async(struct device *dev)
> > @@ -668,11 +674,15 @@ static bool dpm_async_fn(struct device *
> >  {
> >         reinit_completion(&dev->power.completion);
> >
> > -       if (is_async(dev)) {
> > -               get_device(dev);
> > -               async_schedule_dev(func, dev);
> > +       if (!is_async(dev))
> > +               return false;
> > +
> > +       get_device(dev);
> > +
> > +       if (async_schedule_dev_nocall(func, dev))
> >                 return true;
> > -       }
> > +
> > +       put_device(dev);
> >
> >         return false;
> >  }
> > @@ -680,15 +690,19 @@ static bool dpm_async_fn(struct device *
> >  static void async_resume_noirq(void *data, async_cookie_t cookie)
> >  {
> >         struct device *dev = data;
> > -       int error;
> > -
> > -       error = device_resume_noirq(dev, pm_transition, true);
> > -       if (error)
> > -               pm_dev_err(dev, pm_transition, " async", error);
> >
> > +       __device_resume_noirq(dev, pm_transition, true);
> >         put_device(dev);
> >  }
> >
> > +static void device_resume_noirq(struct device *dev)
> > +{
> > +       if (dpm_async_fn(dev, async_resume_noirq))
> > +               return;
> > +
> > +       __device_resume_noirq(dev, pm_transition, false);
> > +}
> > +
> >  static void dpm_noirq_resume_devices(pm_message_t state)
> >  {
> >         struct device *dev;
> > @@ -698,14 +712,6 @@ static void dpm_noirq_resume_devices(pm_
> >         mutex_lock(&dpm_list_mtx);
> >         pm_transition = state;
> >
> > -       /*
> > -        * Advanced the async threads upfront,
> > -        * in case the starting of async threads is
> > -        * delayed by non-async resuming devices.
> > -        */
> > -       list_for_each_entry(dev, &dpm_noirq_list, power.entry)
> > -               dpm_async_fn(dev, async_resume_noirq);
> > -
>
> If I understand correctly, this means that we are no longer going to
> run the async devices upfront, right?

Right.

> Depending on how devices get ordered in the dpm_noirq_list, it sounds
> like the above could have a negative impact on the total resume time!?

It could, but it is unclear at this time whether or not it will.

> Of course, if all devices would be async capable this wouldn't be a
> problem...

Sure.

So the existing behavior can be restored with the help of an
additional device flag, but I didn't decide to add such a flag just
yet.

I'll probably do it in 6.9, unless the performance impact is serious
enough, in which case it can be added earlier.

I still would prefer to get to a point at which the suspend and resume
paths are analogous (from the async POV) and that's what happens after
this patch, so I'd say that IMO it is better to address any
performance regressions on top of it.

> >         while (!list_empty(&dpm_noirq_list)) {
> >                 dev = to_device(dpm_noirq_list.next);
> >                 get_device(dev);
> > @@ -713,17 +719,7 @@ static void dpm_noirq_resume_devices(pm_
> >
> >                 mutex_unlock(&dpm_list_mtx);
> >
> > -               if (!is_async(dev)) {
> > -                       int error;
> > -
> > -                       error = device_resume_noirq(dev, state, false);
> > -                       if (error) {
> > -                               suspend_stats.failed_resume_noirq++;
> > -                               dpm_save_failed_step(SUSPEND_RESUME_NOIRQ);
> > -                               dpm_save_failed_dev(dev_name(dev));
> > -                               pm_dev_err(dev, state, " noirq", error);
> > -                       }
> > -               }
> > +               device_resume_noirq(dev);
> >
> >                 put_device(dev);
> >
> > @@ -751,14 +747,14 @@ void dpm_resume_noirq(pm_message_t state
> >  }
> >
> >  /**
> > - * device_resume_early - Execute an "early resume" callback for given device.
> > + * __device_resume_early - Execute an "early resume" callback for given device.
> >   * @dev: Device to handle.
> >   * @state: PM transition of the system being carried out.
> >   * @async: If true, the device is being resumed asynchronously.
> >   *
> >   * Runtime PM is disabled for @dev while this function is being executed.
> >   */
> > -static int device_resume_early(struct device *dev, pm_message_t state, bool async)
> > +static void __device_resume_early(struct device *dev, pm_message_t state, bool async)
> >  {
> >         pm_callback_t callback = NULL;
> >         const char *info = NULL;
> > @@ -811,21 +807,31 @@ Out:
> >
> >         pm_runtime_enable(dev);
> >         complete_all(&dev->power.completion);
> > -       return error;
> > +
> > +       if (error) {
> > +               suspend_stats.failed_resume_early++;
> > +               dpm_save_failed_step(SUSPEND_RESUME_EARLY);
> > +               dpm_save_failed_dev(dev_name(dev));
> > +               pm_dev_err(dev, state, async ? " async early" : " early", error);
> > +       }
> >  }
> >
> >  static void async_resume_early(void *data, async_cookie_t cookie)
> >  {
> >         struct device *dev = data;
> > -       int error;
> > -
> > -       error = device_resume_early(dev, pm_transition, true);
> > -       if (error)
> > -               pm_dev_err(dev, pm_transition, " async", error);
> >
> > +       __device_resume_early(dev, pm_transition, true);
> >         put_device(dev);
> >  }
> >
> > +static void device_resume_early(struct device *dev)
> > +{
> > +       if (dpm_async_fn(dev, async_resume_early))
> > +               return;
> > +
> > +       __device_resume_early(dev, pm_transition, false);
> > +}
> > +
> >  /**
> >   * dpm_resume_early - Execute "early resume" callbacks for all devices.
> >   * @state: PM transition of the system being carried out.
> > @@ -839,14 +845,6 @@ void dpm_resume_early(pm_message_t state
> >         mutex_lock(&dpm_list_mtx);
> >         pm_transition = state;
> >
> > -       /*
> > -        * Advanced the async threads upfront,
> > -        * in case the starting of async threads is
> > -        * delayed by non-async resuming devices.
> > -        */
> > -       list_for_each_entry(dev, &dpm_late_early_list, power.entry)
> > -               dpm_async_fn(dev, async_resume_early);
> > -
>
> Ditto.
>
> >         while (!list_empty(&dpm_late_early_list)) {
> >                 dev = to_device(dpm_late_early_list.next);
> >                 get_device(dev);
> > @@ -854,17 +852,7 @@ void dpm_resume_early(pm_message_t state
> >
> >                 mutex_unlock(&dpm_list_mtx);
> >
> > -               if (!is_async(dev)) {
> > -                       int error;
> > -
> > -                       error = device_resume_early(dev, state, false);
> > -                       if (error) {
> > -                               suspend_stats.failed_resume_early++;
> > -                               dpm_save_failed_step(SUSPEND_RESUME_EARLY);
> > -                               dpm_save_failed_dev(dev_name(dev));
> > -                               pm_dev_err(dev, state, " early", error);
> > -                       }
> > -               }
> > +               device_resume_early(dev);
> >
> >                 put_device(dev);
> >
> > @@ -888,12 +876,12 @@ void dpm_resume_start(pm_message_t state
> >  EXPORT_SYMBOL_GPL(dpm_resume_start);
> >
> >  /**
> > - * device_resume - Execute "resume" callbacks for given device.
> > + * __device_resume - Execute "resume" callbacks for given device.
> >   * @dev: Device to handle.
> >   * @state: PM transition of the system being carried out.
> >   * @async: If true, the device is being resumed asynchronously.
> >   */
> > -static int device_resume(struct device *dev, pm_message_t state, bool async)
> > +static void __device_resume(struct device *dev, pm_message_t state, bool async)
> >  {
> >         pm_callback_t callback = NULL;
> >         const char *info = NULL;
> > @@ -975,20 +963,30 @@ static int device_resume(struct device *
> >
> >         TRACE_RESUME(error);
> >
> > -       return error;
> > +       if (error) {
> > +               suspend_stats.failed_resume++;
> > +               dpm_save_failed_step(SUSPEND_RESUME);
> > +               dpm_save_failed_dev(dev_name(dev));
> > +               pm_dev_err(dev, state, async ? " async" : "", error);
> > +       }
> >  }
> >
> >  static void async_resume(void *data, async_cookie_t cookie)
> >  {
> >         struct device *dev = data;
> > -       int error;
> >
> > -       error = device_resume(dev, pm_transition, true);
> > -       if (error)
> > -               pm_dev_err(dev, pm_transition, " async", error);
> > +       __device_resume(dev, pm_transition, true);
> >         put_device(dev);
> >  }
> >
> > +static void device_resume(struct device *dev)
> > +{
> > +       if (dpm_async_fn(dev, async_resume))
> > +               return;
> > +
> > +       __device_resume(dev, pm_transition, false);
> > +}
> > +
> >  /**
> >   * dpm_resume - Execute "resume" callbacks for non-sysdev devices.
> >   * @state: PM transition of the system being carried out.
> > @@ -1008,27 +1006,17 @@ void dpm_resume(pm_message_t state)
> >         pm_transition = state;
> >         async_error = 0;
> >
> > -       list_for_each_entry(dev, &dpm_suspended_list, power.entry)
> > -               dpm_async_fn(dev, async_resume);
> > -
>
> Ditto.
>
> >         while (!list_empty(&dpm_suspended_list)) {
> >                 dev = to_device(dpm_suspended_list.next);
> > +
> >                 get_device(dev);
> > -               if (!is_async(dev)) {
> > -                       int error;
> >
> > -                       mutex_unlock(&dpm_list_mtx);
> > +               mutex_unlock(&dpm_list_mtx);
> >
> > -                       error = device_resume(dev, state, false);
> > -                       if (error) {
> > -                               suspend_stats.failed_resume++;
> > -                               dpm_save_failed_step(SUSPEND_RESUME);
> > -                               dpm_save_failed_dev(dev_name(dev));
> > -                               pm_dev_err(dev, state, "", error);
> > -                       }
> > +               device_resume(dev);
> > +
> > +               mutex_lock(&dpm_list_mtx);
> >
> > -                       mutex_lock(&dpm_list_mtx);
> > -               }
> >                 if (!list_empty(&dev->power.entry))
> >                         list_move_tail(&dev->power.entry, &dpm_prepared_list);
> >
>
> Other than the potential issue I pointed out, the code as such looks good to me!

Thank you!