[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20200715172732.35110a4e53e84fcec9aeac83@kernel.org>
Date: Wed, 15 Jul 2020 17:27:32 +0900
From: Masami Hiramatsu <mhiramat@...nel.org>
To: Jarkko Sakkinen <jarkko.sakkinen@...ux.intel.com>
Cc: linux-kernel@...r.kernel.org, x86@...r.kernel.org,
Andi Kleen <ak@...ux.intel.com>,
Peter Zijlstra <peterz@...radead.org>,
Thomas Gleixner <tglx@...utronix.de>,
Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
x86@...nel.org (maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT)),
"H. Peter Anvin" <hpa@...or.com>,
"Naveen N. Rao" <naveen.n.rao@...ux.ibm.com>,
Anil S Keshavamurthy <anil.s.keshavamurthy@...el.com>,
"David S. Miller" <davem@...emloft.net>,
Masami Hiramatsu <mhiramat@...nel.org>,
Jessica Yu <jeyu@...nel.org>,
Andrew Morton <akpm@...ux-foundation.org>,
"Aneesh Kumar K.V" <aneesh.kumar@...ux.ibm.com>,
Kees Cook <keescook@...omium.org>,
Will Deacon <will@...nel.org>,
Sami Tolvanen <samitolvanen@...gle.com>,
Alexandre Ghiti <alex@...ti.fr>,
Masahiro Yamada <masahiroy@...nel.org>,
Peter Collingbourne <pcc@...gle.com>,
Frederic Weisbecker <frederic@...nel.org>,
Krzysztof Kozlowski <krzk@...nel.org>,
Arnd Bergmann <arnd@...db.de>, Stephen Boyd <sboyd@...nel.org>,
Andy Lutomirski <luto@...nel.org>,
Josh Poimboeuf <jpoimboe@...hat.com>,
Miroslav Benes <mbenes@...e.cz>,
Babu Moger <Babu.Moger@....com>,
Omar Sandoval <osandov@...com>,
Nayna Jain <nayna@...ux.ibm.com>,
Marco Elver <elver@...gle.com>,
Brian Gerst <brgerst@...il.com>, Jiri Kosina <jkosina@...e.cz>,
Joe Lawrence <joe.lawrence@...hat.com>,
Mike Rapoport <rppt@...ux.ibm.com>
Subject: Re: [PATCH v3 1/3] kprobes: Add text_alloc() and text_free()
Hi Jarkko,
On Wed, 15 Jul 2020 01:32:27 +0300
Jarkko Sakkinen <jarkko.sakkinen@...ux.intel.com> wrote:
> Introduce new API for allocating space for code generaed at run-time
> leveraging from module_alloc() and module_memfree() code. Use this to
> perform memory allocations in the kprobes code in order to loose the
> bound between kprobes and modules subsystem.
>
> Initially, use this API only with arch/x86 and define a new config
> flag CONFIG_ARCH_HAS_TEXT_ALLOC to promote the availability of the
> new API.
This might need to be extended for the text_alloc() flags as we discuss
in previous thread. (e.g. bpf on arm64)
> Cc: Andi Kleen <ak@...ux.intel.com>
> Suggested-by: Peter Zijlstra <peterz@...radead.org>
> Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@...ux.intel.com>
> ---
> arch/Kconfig | 2 +-
> arch/x86/Kconfig | 3 ++
> arch/x86/kernel/Makefile | 1 +
> arch/x86/kernel/module.c | 49 ---------------------------
> arch/x86/kernel/text.c | 71 ++++++++++++++++++++++++++++++++++++++++
I think this would better be arch/x86/mm/text_alloc.c (text.c is too
generic, and this only provides memory allocation.)
> include/linux/text.h | 17 ++++++++++
"text_alloc.h" would be better, or puting the prototype in vmalloc.h
will be easier to use.
> kernel/kprobes.c | 11 +++++++
> kernel/module.c | 10 ++++++
> 8 files changed, 114 insertions(+), 50 deletions(-)
> create mode 100644 arch/x86/kernel/text.c
> create mode 100644 include/linux/text.h
>
> diff --git a/arch/Kconfig b/arch/Kconfig
> index 8cc35dc556c7..e3d6b6868ccb 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -61,7 +61,7 @@ config OPROFILE_NMI_TIMER
>
> config KPROBES
> bool "Kprobes"
> - depends on MODULES
> + depends on MODULES || ARCH_HAS_TEXT_ALLOC
> depends on HAVE_KPROBES
> select KALLSYMS
> help
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 0dea7fdd7a00..a4ee5d1300f6 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -2035,6 +2035,9 @@ config KEXEC_FILE
> config ARCH_HAS_KEXEC_PURGATORY
> def_bool KEXEC_FILE
>
> +config ARCH_HAS_TEXT_ALLOC
> + def_bool y
> +
> config KEXEC_SIG
> bool "Verify kernel signature during kexec_file_load() syscall"
> depends on KEXEC_FILE
> diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
> index e77261db2391..2878e4b753a0 100644
> --- a/arch/x86/kernel/Makefile
> +++ b/arch/x86/kernel/Makefile
> @@ -68,6 +68,7 @@ obj-y += tsc.o tsc_msr.o io_delay.o rtc.o
> obj-y += pci-iommu_table.o
> obj-y += resource.o
> obj-y += irqflags.o
> +obj-y += text.o
>
> obj-y += process.o
> obj-y += fpu/
> diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
> index 34b153cbd4ac..261df078f127 100644
> --- a/arch/x86/kernel/module.c
> +++ b/arch/x86/kernel/module.c
> @@ -36,55 +36,6 @@ do { \
> } while (0)
> #endif
>
> -#ifdef CONFIG_RANDOMIZE_BASE
> -static unsigned long module_load_offset;
> -
> -/* Mutex protects the module_load_offset. */
> -static DEFINE_MUTEX(module_kaslr_mutex);
> -
> -static unsigned long int get_module_load_offset(void)
> -{
> - if (kaslr_enabled()) {
> - mutex_lock(&module_kaslr_mutex);
> - /*
> - * Calculate the module_load_offset the first time this
> - * code is called. Once calculated it stays the same until
> - * reboot.
> - */
> - if (module_load_offset == 0)
> - module_load_offset =
> - (get_random_int() % 1024 + 1) * PAGE_SIZE;
> - mutex_unlock(&module_kaslr_mutex);
> - }
> - return module_load_offset;
> -}
> -#else
> -static unsigned long int get_module_load_offset(void)
> -{
> - return 0;
> -}
> -#endif
> -
> -void *module_alloc(unsigned long size)
> -{
> - void *p;
> -
> - if (PAGE_ALIGN(size) > MODULES_LEN)
> - return NULL;
> -
> - p = __vmalloc_node_range(size, MODULE_ALIGN,
> - MODULES_VADDR + get_module_load_offset(),
> - MODULES_END, GFP_KERNEL,
> - PAGE_KERNEL, 0, NUMA_NO_NODE,
> - __builtin_return_address(0));
> - if (p && (kasan_module_alloc(p, size) < 0)) {
> - vfree(p);
> - return NULL;
> - }
> -
> - return p;
> -}
Please don't touch this module_alloc() at all. Then we can
just call __vmalloc_node_range() in the text_alloc().
> -
> #ifdef CONFIG_X86_32
> int apply_relocate(Elf32_Shdr *sechdrs,
> const char *strtab,
> diff --git a/arch/x86/kernel/text.c b/arch/x86/kernel/text.c
> new file mode 100644
> index 000000000000..986b92ff1434
> --- /dev/null
> +++ b/arch/x86/kernel/text.c
> @@ -0,0 +1,71 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Kernel module help for x86.
> + * Copyright (C) 2001 Rusty Russell.
> + */
> +
> +#include <linux/kasan.h>
> +#include <linux/mm.h>
> +#include <linux/moduleloader.h>
> +#include <linux/vmalloc.h>
> +#include <asm/setup.h>
> +
> +#ifdef CONFIG_RANDOMIZE_BASE
> +static unsigned long module_load_offset;
> +
> +/* Mutex protects the module_load_offset. */
> +static DEFINE_MUTEX(module_kaslr_mutex);
> +
> +static unsigned long get_module_load_offset(void)
> +{
> + if (kaslr_enabled()) {
> + mutex_lock(&module_kaslr_mutex);
> + /*
> + * Calculate the module_load_offset the first time this
> + * code is called. Once calculated it stays the same until
> + * reboot.
> + */
> + if (module_load_offset == 0)
> + module_load_offset =
> + (get_random_int() % 1024 + 1) * PAGE_SIZE;
> + mutex_unlock(&module_kaslr_mutex);
> + }
> + return module_load_offset;
> +}
> +#else
> +static unsigned long get_module_load_offset(void)
> +{
> + return 0;
> +}
> +#endif
So as I pointed, we can ignore this for kprobes (and other
dynamic allocated trampoline code).
> +
> +void *text_alloc(unsigned long size)
> +{
> + void *p;
> +
> + if (PAGE_ALIGN(size) > MODULES_LEN)
> + return NULL;
> +
> + p = __vmalloc_node_range(size, MODULE_ALIGN,
> + MODULES_VADDR + get_module_load_offset(),
> + MODULES_END, GFP_KERNEL,
> + PAGE_KERNEL, 0, NUMA_NO_NODE,
> + __builtin_return_address(0));
> + if (p && (kasan_module_alloc(p, size) < 0)) {
> + vfree(p);
> + return NULL;
> + }
> +
> + return p;
> +}
> +
> +void text_free(void *region)
> +{
> + /*
> + * This memory may be RO, and freeing RO memory in an interrupt is not
> + * supported by vmalloc.
> + */
> + WARN_ON(in_interrupt());
> +
> + vfree(region);
> +}
> diff --git a/include/linux/text.h b/include/linux/text.h
> new file mode 100644
> index 000000000000..a27d4a42ecda
> --- /dev/null
> +++ b/include/linux/text.h
> @@ -0,0 +1,17 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +
> +#ifndef _LINUX_TEXT_H
> +#define _LINUX_TEXT_H
> +
> +/*
> + * An allocator used for allocating modules, core sections and init sections.
> + * Returns NULL on failure.
> + */
> +void *text_alloc(unsigned long size);
> +
> +/*
> + * Free memory returned from text_alloc().
> + */
> +void text_free(void *region);
Hmm, if this is this short, in this version we might better move
these to vmalloc.h.
> +
> +#endif /* _LINUX_TEXT_H */
> diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> index 2e97febeef77..fa7687eb0c0e 100644
> --- a/kernel/kprobes.c
> +++ b/kernel/kprobes.c
> @@ -35,6 +35,7 @@
> #include <linux/ftrace.h>
> #include <linux/cpu.h>
> #include <linux/jump_label.h>
> +#include <linux/text.h>
>
> #include <asm/sections.h>
> #include <asm/cacheflush.h>
> @@ -111,12 +112,20 @@ enum kprobe_slot_state {
>
> void __weak *alloc_insn_page(void)
> {
> +#ifdef CONFIG_ARCH_HAS_TEXT_ALLOC
> + return text_alloc(PAGE_SIZE);
> +#else
> return module_alloc(PAGE_SIZE);
> +#endif
> }
>
> void __weak free_insn_page(void *page)
> {
> +#ifdef CONFIG_ARCH_HAS_TEXT_ALLOC
> + text_free(page);
> +#else
> module_memfree(page);
> +#endif
> }
>
> struct kprobe_insn_cache kprobe_insn_slots = {
> @@ -1608,6 +1617,7 @@ static int check_kprobe_address_safe(struct kprobe *p,
> goto out;
> }
>
> +#ifdef CONFIG_MODULES
> /*
> * If the module freed .init.text, we couldn't insert
> * kprobes in there.
> @@ -1618,6 +1628,7 @@ static int check_kprobe_address_safe(struct kprobe *p,
> *probed_mod = NULL;
> ret = -ENOENT;
> }
> +#endif
This change is not related to text_alloc(). Please move this to 3/3.
> }
> out:
> preempt_enable();
> diff --git a/kernel/module.c b/kernel/module.c
> index aa183c9ac0a2..8adeb126b02c 100644
> --- a/kernel/module.c
> +++ b/kernel/module.c
> @@ -56,6 +56,7 @@
> #include <linux/bsearch.h>
> #include <linux/dynamic_debug.h>
> #include <linux/audit.h>
> +#include <linux/text.h>
> #include <uapi/linux/module.h>
> #include "module-internal.h"
>
> @@ -2151,7 +2152,12 @@ void __weak module_memfree(void *module_region)
> * supported by vmalloc.
> */
> WARN_ON(in_interrupt());
> +
> +#ifdef CONFIG_ARCH_HAS_TEXT_ALLOC
> + text_free(module_region);
> +#else
> vfree(module_region);
> +#endif
> }
>
> void __weak module_arch_cleanup(struct module *mod)
> @@ -2786,9 +2792,13 @@ static void dynamic_debug_remove(struct module *mod, struct _ddebug *debug)
>
> void * __weak module_alloc(unsigned long size)
> {
> +#ifdef CONFIG_ARCH_HAS_TEXT_ALLOC
> + return text_alloc(size);
> +#else
> return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
> GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS,
> NUMA_NO_NODE, __builtin_return_address(0));
> +#endif
> }
Please don't touch kernel/module.c too. This seems to make things complicated.
Thank you,
--
Masami Hiramatsu <mhiramat@...nel.org>
Powered by blists - more mailing lists