lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJqdLrogefL5ZkxJfbQ75u45BFFJxttJd1V4yf=KUPxdHg7ocg@mail.gmail.com>
Date: Tue, 10 Feb 2026 21:13:39 +0100
From: Alexander Mikhalitsyn <alexander@...alicyn.com>
To: Andrei Vagin <avagin@...gle.com>
Cc: Kees Cook <kees@...nel.org>, Andrew Morton <akpm@...ux-foundation.org>, 
	Cyrill Gorcunov <gorcunov@...il.com>, Mike Rapoport <rppt@...nel.org>, linux-kernel@...r.kernel.org, 
	linux-fsdevel@...r.kernel.org, linux-mm@...ck.org, criu@...ts.linux.dev, 
	Chen Ridong <chenridong@...wei.com>, Christian Brauner <brauner@...nel.org>, 
	David Hildenbrand <david@...nel.org>, Eric Biederman <ebiederm@...ssion.com>, 
	Lorenzo Stoakes <lorenzo.stoakes@...cle.com>, Michal Koutny <mkoutny@...e.com>
Subject: Re: [PATCH 2/4] exec: inherit HWCAPs from the parent process

Am Mo., 9. Feb. 2026 um 20:06 Uhr schrieb Andrei Vagin <avagin@...gle.com>:
>
> Introduces a mechanism to inherit hardware capabilities (AT_HWCAP,
> AT_HWCAP2, etc.) from a parent process when they have been modified via
> prctl.
>
> To support C/R operations (snapshots, live migration) in heterogeneous
> clusters, we must ensure that processes utilize CPU features available
> on all potential target nodes. To solve this, we need to advertise a
> common feature set across the cluster.
>
> This patch adds a new mm flag MMF_USER_HWCAP, which is set when the
> auxiliary vector is modified via prctl(PR_SET_MM, PR_SET_MM_AUXV).  When
> execve() is called, if the current process has MMF_USER_HWCAP set, the
> HWCAP values are extracted from the current auxiliary vector and stored
> in the linux_binprm structure. These values are then used to populate
> the auxiliary vector of the new process, effectively inheriting the
> hardware capabilities.
>
> The inherited HWCAPs are masked with the hardware capabilities supported
> by the current kernel to ensure that we don't report more features than
> actually supported. This is important to avoid unexpected behavior,
> especially for processes with additional privileges.
>
> Signed-off-by: Andrei Vagin <avagin@...gle.com>

Cool stuff, LGTM!

Reviewed-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@...urfusion.io>

> ---
>  fs/binfmt_elf.c          |  8 +++---
>  fs/binfmt_elf_fdpic.c    |  8 +++---
>  fs/exec.c                | 61 ++++++++++++++++++++++++++++++++++++++++
>  include/linux/binfmts.h  | 11 ++++++++
>  include/linux/mm_types.h |  2 ++
>  kernel/fork.c            |  3 ++
>  kernel/sys.c             |  5 +++-
>  7 files changed, 89 insertions(+), 9 deletions(-)
>
> diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
> index 3eb734c192e9..aec129e33f0b 100644
> --- a/fs/binfmt_elf.c
> +++ b/fs/binfmt_elf.c
> @@ -246,7 +246,7 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
>          */
>         ARCH_DLINFO;
>  #endif
> -       NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
> +       NEW_AUX_ENT(AT_HWCAP, bprm->hwcap);
>         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
>         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
>         NEW_AUX_ENT(AT_PHDR, phdr_addr);
> @@ -264,13 +264,13 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
>         NEW_AUX_ENT(AT_SECURE, bprm->secureexec);
>         NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
>  #ifdef ELF_HWCAP2
> -       NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
> +       NEW_AUX_ENT(AT_HWCAP2, bprm->hwcap2);
>  #endif
>  #ifdef ELF_HWCAP3
> -       NEW_AUX_ENT(AT_HWCAP3, ELF_HWCAP3);
> +       NEW_AUX_ENT(AT_HWCAP3, bprm->hwcap3);
>  #endif
>  #ifdef ELF_HWCAP4
> -       NEW_AUX_ENT(AT_HWCAP4, ELF_HWCAP4);
> +       NEW_AUX_ENT(AT_HWCAP4, bprm->hwcap4);
>  #endif
>         NEW_AUX_ENT(AT_EXECFN, bprm->exec);
>         if (k_platform) {
> diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
> index a3d4e6973b29..55b482f03c82 100644
> --- a/fs/binfmt_elf_fdpic.c
> +++ b/fs/binfmt_elf_fdpic.c
> @@ -629,15 +629,15 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
>          */
>         ARCH_DLINFO;
>  #endif
> -       NEW_AUX_ENT(AT_HWCAP,   ELF_HWCAP);
> +       NEW_AUX_ENT(AT_HWCAP,   bprm->hwcap);
>  #ifdef ELF_HWCAP2
> -       NEW_AUX_ENT(AT_HWCAP2,  ELF_HWCAP2);
> +       NEW_AUX_ENT(AT_HWCAP2,  bprm->hwcap2);
>  #endif
>  #ifdef ELF_HWCAP3
> -       NEW_AUX_ENT(AT_HWCAP3,  ELF_HWCAP3);
> +       NEW_AUX_ENT(AT_HWCAP3,  bprm->hwcap3);
>  #endif
>  #ifdef ELF_HWCAP4
> -       NEW_AUX_ENT(AT_HWCAP4,  ELF_HWCAP4);
> +       NEW_AUX_ENT(AT_HWCAP4,  bprm->hwcap4);
>  #endif
>         NEW_AUX_ENT(AT_PAGESZ,  PAGE_SIZE);
>         NEW_AUX_ENT(AT_CLKTCK,  CLOCKS_PER_SEC);
> diff --git a/fs/exec.c b/fs/exec.c
> index 9d5ebc9d15b0..7401efbe4ba0 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -1462,6 +1462,17 @@ static struct linux_binprm *alloc_bprm(int fd, struct filename *filename, int fl
>          */
>         bprm->is_check = !!(flags & AT_EXECVE_CHECK);
>
> +       bprm->hwcap = ELF_HWCAP;
> +#ifdef ELF_HWCAP2
> +       bprm->hwcap2 = ELF_HWCAP2;
> +#endif
> +#ifdef ELF_HWCAP3
> +       bprm->hwcap3 = ELF_HWCAP3;
> +#endif
> +#ifdef ELF_HWCAP4
> +       bprm->hwcap4 = ELF_HWCAP4;
> +#endif
> +
>         retval = bprm_mm_init(bprm);
>         if (!retval)
>                 return bprm;
> @@ -1780,6 +1791,53 @@ static int bprm_execve(struct linux_binprm *bprm)
>         return retval;
>  }
>
> +static void inherit_hwcap(struct linux_binprm *bprm)
> +{
> +       int i, n;
> +
> +#ifdef ELF_HWCAP4
> +       n = 4;
> +#elif defined(ELF_HWCAP3)
> +       n = 3;
> +#elif defined(ELF_HWCAP2)
> +       n = 2;
> +#else
> +       n = 1;
> +#endif
> +
> +       for (i = 0; n && i < AT_VECTOR_SIZE; i += 2) {
> +               long val = current->mm->saved_auxv[i + 1];
> +
> +               switch (current->mm->saved_auxv[i]) {
> +               case AT_NULL:
> +                       goto done;
> +               case AT_HWCAP:
> +                       bprm->hwcap = val & ELF_HWCAP;
> +                       break;
> +#ifdef ELF_HWCAP2
> +               case AT_HWCAP2:
> +                       bprm->hwcap2 = val & ELF_HWCAP2;
> +                       break;
> +#endif
> +#ifdef ELF_HWCAP3
> +               case AT_HWCAP3:
> +                       bprm->hwcap3 = val & ELF_HWCAP3;
> +                       break;
> +#endif
> +#ifdef ELF_HWCAP4
> +               case AT_HWCAP4:
> +                       bprm->hwcap4 = val & ELF_HWCAP4;
> +                       break;
> +#endif
> +               default:
> +                       continue;
> +               }
> +               n--;
> +       }
> +done:
> +       mm_flags_set(MMF_USER_HWCAP, bprm->mm);
> +}
> +
>  static int do_execveat_common(int fd, struct filename *filename,
>                               struct user_arg_ptr argv,
>                               struct user_arg_ptr envp,
> @@ -1856,6 +1914,9 @@ static int do_execveat_common(int fd, struct filename *filename,
>                              current->comm, bprm->filename);
>         }
>
> +       if (mm_flags_test(MMF_USER_HWCAP, current->mm))
> +               inherit_hwcap(bprm);
> +
>         retval = bprm_execve(bprm);
>  out_free:
>         free_bprm(bprm);
> diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
> index 65abd5ab8836..94a3dcf9b1d2 100644
> --- a/include/linux/binfmts.h
> +++ b/include/linux/binfmts.h
> @@ -2,6 +2,7 @@
>  #ifndef _LINUX_BINFMTS_H
>  #define _LINUX_BINFMTS_H
>
> +#include <linux/elf.h>
>  #include <linux/sched.h>
>  #include <linux/unistd.h>
>  #include <asm/exec.h>
> @@ -67,6 +68,16 @@ struct linux_binprm {
>         unsigned long exec;
>
>         struct rlimit rlim_stack; /* Saved RLIMIT_STACK used during exec. */
> +       unsigned long hwcap;
> +#ifdef ELF_HWCAP2
> +       unsigned long hwcap2;
> +#endif
> +#ifdef ELF_HWCAP3
> +       unsigned long hwcap3;
> +#endif
> +#ifdef ELF_HWCAP4
> +       unsigned long hwcap4;
> +#endif
>
>         char buf[BINPRM_BUF_SIZE];
>  } __randomize_layout;
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 78950eb8926d..68c9131dceee 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -1871,6 +1871,8 @@ enum {
>  #define MMF_TOPDOWN            31      /* mm searches top down by default */
>  #define MMF_TOPDOWN_MASK       BIT(MMF_TOPDOWN)
>
> +#define MMF_USER_HWCAP         32      /* user-defined HWCAPs */
> +
>  #define MMF_INIT_LEGACY_MASK   (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\
>                                  MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK |\
>                                  MMF_VM_MERGE_ANY_MASK | MMF_TOPDOWN_MASK)
> diff --git a/kernel/fork.c b/kernel/fork.c
> index b1f3915d5f8e..0091315643de 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -1103,6 +1103,9 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
>
>                 __mm_flags_overwrite_word(mm, mmf_init_legacy_flags(flags));
>                 mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK;
> +
> +               if (mm_flags_test(MMF_USER_HWCAP, current->mm))
> +                       mm_flags_set(MMF_USER_HWCAP, mm);
>         } else {
>                 __mm_flags_overwrite_word(mm, default_dump_filter);
>                 mm->def_flags = 0;
> diff --git a/kernel/sys.c b/kernel/sys.c
> index 8d199cf457ae..6fbd7be21a5f 100644
> --- a/kernel/sys.c
> +++ b/kernel/sys.c
> @@ -2157,8 +2157,10 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data
>          * not introduce additional locks here making the kernel
>          * more complex.
>          */
> -       if (prctl_map.auxv_size)
> +       if (prctl_map.auxv_size) {
>                 memcpy(mm->saved_auxv, user_auxv, sizeof(user_auxv));
> +               mm_flags_set(MMF_USER_HWCAP, current->mm);
> +       }
>
>         mmap_read_unlock(mm);
>         return 0;
> @@ -2190,6 +2192,7 @@ static int prctl_set_auxv(struct mm_struct *mm, unsigned long addr,
>
>         task_lock(current);
>         memcpy(mm->saved_auxv, user_auxv, len);
> +       mm_flags_set(MMF_USER_HWCAP, current->mm);

nit: s/current->mm/mm/

There is no issue, because this function assumes mm == current->mm implicitly.

Maybe we should get rid of (struct mm_struct *mm) argument here? (not
a suggestion for change
of this patch, but just mentioning it here).

LGTM!

>         task_unlock(current);
>
>         return 0;
> --
> 2.53.0.239.g8d8fc8a987-goog
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ