lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Fri, 11 Sep 2020 17:48:59 -0500
From:   ebiederm@...ssion.com (Eric W. Biederman)
To:     Joerg Vehlow <lkml@...coder.de>
Cc:     peterz@...radead.org, Steven Rostedt <rostedt@...dmis.org>,
        Andrew Morton <akpm@...ux-foundation.org>,
        Thomas Gleixner <tglx@...utronix.de>,
        Sebastian Andrzej Siewior <bigeasy@...utronix.de>,
        Huang Ying <ying.huang@...el.com>,
        linux-kernel@...r.kernel.org,
        Joerg Vehlow <joerg.vehlow@...-tech.de>
Subject: Re: [BUG RT] dump-capture kernel not executed for panic in interrupt context

Joerg Vehlow <lkml@...coder.de> writes:

> Hi,
>
> here is the new version of the patch based on Peters suggestion
> It looks like it works fine. I added the BUG_ON to __crash_kexec, because it is
> a precondition, that panic_cpu is set correctly, otherwise the whole locking
> logic fails.
>
> The mutex_trylock can still be used, because it is only in syscall context and
> no interrupt context.

What is this patch supposed to be doing?

What bug is it fixing?

A BUG_ON that triggers inside of BUG_ONs seems not just suspect but
outright impossible to make use of.


I get the feeling skimming this that it is time to sort out and simplify
the locking here, rather than make it more complex, and more likely to
fail.

I get the feeling that over the years somehow the assumption that the
rest of the kernel is broken and that we need to get out of the broken
kernel as fast and as simply as possible has been lost.

Eric



> ---
>  kernel/kexec.c          |  8 ++--
>  kernel/kexec_core.c     | 86 +++++++++++++++++++++++++++--------------
>  kernel/kexec_file.c     |  4 +-
>  kernel/kexec_internal.h |  6 ++-
>  4 files changed, 69 insertions(+), 35 deletions(-)
>
> diff --git a/kernel/kexec.c b/kernel/kexec.c
> index f977786fe498..118a012aeac2 100644
> --- a/kernel/kexec.c
> +++ b/kernel/kexec.c
> @@ -255,12 +255,12 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned
> long, nr_segments,
>       *
>       * KISS: always take the mutex.
>       */
> -    if (!mutex_trylock(&kexec_mutex))
> +    if (!kexec_trylock())
>          return -EBUSY;
>
>      result = do_kexec_load(entry, nr_segments, segments, flags);
>
> -    mutex_unlock(&kexec_mutex);
> +    kexec_unlock();
>
>      return result;
>  }
> @@ -309,12 +309,12 @@ COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry,
>       *
>       * KISS: always take the mutex.
>       */
> -    if (!mutex_trylock(&kexec_mutex))
> +    if (!kexec_trylock())
>          return -EBUSY;
>
>      result = do_kexec_load(entry, nr_segments, ksegments, flags);
>
> -    mutex_unlock(&kexec_mutex);
> +    kexec_unlock();
>
>      return result;
>  }
> diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
> index c19c0dad1ebe..71682a33b1ba 100644
> --- a/kernel/kexec_core.c
> +++ b/kernel/kexec_core.c
> @@ -45,7 +45,7 @@
>  #include <crypto/sha.h>
>  #include "kexec_internal.h"
>
> -DEFINE_MUTEX(kexec_mutex);
> +static DEFINE_MUTEX(kexec_mutex);
>
>  /* Per cpu memory for storing cpu states in case of system crash. */
>  note_buf_t __percpu *crash_notes;
> @@ -70,6 +70,43 @@ struct resource crashk_low_res = {
>      .desc  = IORES_DESC_CRASH_KERNEL
>  };
>
> +void kexec_lock(void)
> +{
> +    /*
> +     * LOCK kexec_mutex        cmpxchg(&panic_cpu, INVALID, cpu)
> +     *   MB                  MB
> +     * panic_cpu == INVALID        kexec_mutex == LOCKED
> +     *
> +     * Ensures either we observe the cmpxchg, or crash_kernel() observes
> +     * our lock acquisition.
> +     */
> +    mutex_lock(&kexec_mutex);
> +    smp_mb();
> +    atomic_cond_read_acquire(&panic_cpu, VAL == PANIC_CPU_INVALID);
> +}
> +
> +int kexec_trylock(void) {
> +    if (!mutex_trylock(&kexec_mutex)) {
> +        return 0;
> +    }
> +    smp_mb();
> +    if (atomic_read(&panic_cpu) != PANIC_CPU_INVALID) {
> +         mutex_unlock(&kexec_mutex);
> +         return 0;
> +    }
> +    return 1;
> +}
> +
> +void kexec_unlock(void)
> +{
> +    mutex_unlock(&kexec_mutex);
> +}
> +
> +int kexec_is_locked(void)
> +{
> +    return mutex_is_locked(&kexec_mutex);
> +}
> +
>  int kexec_should_crash(struct task_struct *p)
>  {
>      /*
> @@ -943,24 +980,15 @@ int kexec_load_disabled;
>   */
>  void __noclone __crash_kexec(struct pt_regs *regs)
>  {
> -    /* Take the kexec_mutex here to prevent sys_kexec_load
> -     * running on one cpu from replacing the crash kernel
> -     * we are using after a panic on a different cpu.
> -     *
> -     * If the crash kernel was not located in a fixed area
> -     * of memory the xchg(&kexec_crash_image) would be
> -     * sufficient.  But since I reuse the memory...
> -     */
> -    if (mutex_trylock(&kexec_mutex)) {
> -        if (kexec_crash_image) {
> -            struct pt_regs fixed_regs;
> -
> -            crash_setup_regs(&fixed_regs, regs);
> -            crash_save_vmcoreinfo();
> -            machine_crash_shutdown(&fixed_regs);
> -            machine_kexec(kexec_crash_image);
> -        }
> -        mutex_unlock(&kexec_mutex);
> +    BUG_ON(atomic_read(&panic_cpu) != raw_smp_processor_id());
> +
> +    if (!kexec_is_locked() && kexec_crash_image) {
> +        struct pt_regs fixed_regs;
> +
> +        crash_setup_regs(&fixed_regs, regs);
> +        crash_save_vmcoreinfo();
> +        machine_crash_shutdown(&fixed_regs);
> +        machine_kexec(kexec_crash_image);
>      }
>  }
>  STACK_FRAME_NON_STANDARD(__crash_kexec);
> @@ -977,9 +1005,11 @@ void crash_kexec(struct pt_regs *regs)
>      this_cpu = raw_smp_processor_id();
>      old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu);
>      if (old_cpu == PANIC_CPU_INVALID) {
> -        /* This is the 1st CPU which comes here, so go ahead. */
> -        printk_safe_flush_on_panic();
> -        __crash_kexec(regs);
> +        if (!kexec_is_locked()) {
> +            /* This is the 1st CPU which comes here, so go ahead. */
> +            printk_safe_flush_on_panic();
> +            __crash_kexec(regs);
> +        }
>
>          /*
>           * Reset panic_cpu to allow another panic()/crash_kexec()
> @@ -993,10 +1023,10 @@ size_t crash_get_memory_size(void)
>  {
>      size_t size = 0;
>
> -    mutex_lock(&kexec_mutex);
> +    kexec_lock();
>      if (crashk_res.end != crashk_res.start)
>          size = resource_size(&crashk_res);
> -    mutex_unlock(&kexec_mutex);
> +    kexec_unlock();
>      return size;
>  }
>
> @@ -1016,7 +1046,7 @@ int crash_shrink_memory(unsigned long new_size)
>      unsigned long old_size;
>      struct resource *ram_res;
>
> -    mutex_lock(&kexec_mutex);
> +    kexec_lock();
>
>      if (kexec_crash_image) {
>          ret = -ENOENT;
> @@ -1054,7 +1084,7 @@ int crash_shrink_memory(unsigned long new_size)
>      insert_resource(&iomem_resource, ram_res);
>
>  unlock:
> -    mutex_unlock(&kexec_mutex);
> +    kexec_unlock();
>      return ret;
>  }
>
> @@ -1126,7 +1156,7 @@ int kernel_kexec(void)
>  {
>      int error = 0;
>
> -    if (!mutex_trylock(&kexec_mutex))
> +    if (!kexec_trylock())
>          return -EBUSY;
>      if (!kexec_image) {
>          error = -EINVAL;
> @@ -1203,7 +1233,7 @@ int kernel_kexec(void)
>  #endif
>
>   Unlock:
> -    mutex_unlock(&kexec_mutex);
> +    kexec_unlock();
>      return error;
>  }
>
> diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
> index ca40bef75a61..d40b0aedc187 100644
> --- a/kernel/kexec_file.c
> +++ b/kernel/kexec_file.c
> @@ -362,7 +362,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int,
> initrd_fd,
>
>      image = NULL;
>
> -    if (!mutex_trylock(&kexec_mutex))
> +    if (!kexec_trylock())
>          return -EBUSY;
>
>      dest_image = &kexec_image;
> @@ -434,7 +434,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int,
> initrd_fd,
>      if ((flags & KEXEC_FILE_ON_CRASH) && kexec_crash_image)
>          arch_kexec_protect_crashkres();
>
> -    mutex_unlock(&kexec_mutex);
> +    kexec_unlock();
>      kimage_free(image);
>      return ret;
>  }
> diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h
> index 39d30ccf8d87..2c1683cb1082 100644
> --- a/kernel/kexec_internal.h
> +++ b/kernel/kexec_internal.h
> @@ -15,7 +15,11 @@ int kimage_is_destination_range(struct kimage *image,
>
>  int machine_kexec_post_load(struct kimage *image);
>
> -extern struct mutex kexec_mutex;
> +void kexec_lock(void);
> +int kexec_trylock(void);
> +void kexec_unlock(void);
> +int kexec_is_locked(void);
> +
>
>  #ifdef CONFIG_KEXEC_FILE
>  #include <linux/purgatory.h>

Powered by blists - more mailing lists