[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240418060613.cdcf85650e5d0c43da5d1c1b@kernel.org>
Date: Thu, 18 Apr 2024 06:06:13 +0900
From: Masami Hiramatsu (Google) <mhiramat@...nel.org>
To: Mike Rapoport <rppt@...nel.org>
Cc: linux-kernel@...r.kernel.org, Alexandre Ghiti <alexghiti@...osinc.com>,
Andrew Morton <akpm@...ux-foundation.org>, Björn Töpel
<bjorn@...nel.org>, Catalin Marinas <catalin.marinas@....com>, Christophe
Leroy <christophe.leroy@...roup.eu>, "David S. Miller"
<davem@...emloft.net>, Dinh Nguyen <dinguyen@...nel.org>, Donald Dutile
<ddutile@...hat.com>, Eric Chanudet <echanude@...hat.com>, Heiko Carstens
<hca@...ux.ibm.com>, Helge Deller <deller@....de>, Huacai Chen
<chenhuacai@...nel.org>, Kent Overstreet <kent.overstreet@...ux.dev>, Luis
Chamberlain <mcgrof@...nel.org>, Mark Rutland <mark.rutland@....com>,
Michael Ellerman <mpe@...erman.id.au>, Nadav Amit <nadav.amit@...il.com>,
Palmer Dabbelt <palmer@...belt.com>, Puranjay Mohan <puranjay12@...il.com>,
Rick Edgecombe <rick.p.edgecombe@...el.com>, Russell King
<linux@...linux.org.uk>, Song Liu <song@...nel.org>, Steven Rostedt
<rostedt@...dmis.org>, Thomas Bogendoerfer <tsbogend@...ha.franken.de>,
Thomas Gleixner <tglx@...utronix.de>, Will Deacon <will@...nel.org>,
bpf@...r.kernel.org, linux-arch@...r.kernel.org,
linux-arm-kernel@...ts.infradead.org, linux-mips@...r.kernel.org,
linux-mm@...ck.org, linux-modules@...r.kernel.org,
linux-parisc@...r.kernel.org, linux-riscv@...ts.infradead.org,
linux-s390@...r.kernel.org, linux-trace-kernel@...r.kernel.org,
linuxppc-dev@...ts.ozlabs.org, loongarch@...ts.linux.dev,
netdev@...r.kernel.org, sparclinux@...r.kernel.org, x86@...nel.org
Subject: Re: [PATCH v4 05/15] mm: introduce execmem_alloc() and
execmem_free()
On Thu, 11 Apr 2024 19:00:41 +0300
Mike Rapoport <rppt@...nel.org> wrote:
> From: "Mike Rapoport (IBM)" <rppt@...nel.org>
>
> module_alloc() is used everywhere as a mean to allocate memory for code.
>
> Beside being semantically wrong, this unnecessarily ties all subsystems
> that need to allocate code, such as ftrace, kprobes and BPF to modules and
> puts the burden of code allocation to the modules code.
>
> Several architectures override module_alloc() because of various
> constraints where the executable memory can be located and this causes
> additional obstacles for improvements of code allocation.
>
> Start splitting code allocation from modules by introducing execmem_alloc()
> and execmem_free() APIs.
>
> Initially, execmem_alloc() is a wrapper for module_alloc() and
> execmem_free() is a replacement of module_memfree() to allow updating all
> call sites to use the new APIs.
>
> Since architectures define different restrictions on placement,
> permissions, alignment and other parameters for memory that can be used by
> different subsystems that allocate executable memory, execmem_alloc() takes
> a type argument, that will be used to identify the calling subsystem and to
> allow architectures define parameters for ranges suitable for that
> subsystem.
>
This looks good to me for the kprobe part.
Acked-by: Masami Hiramatsu (Google) <mhiramat@...nel.org>
Thank you,
> Signed-off-by: Mike Rapoport (IBM) <rppt@...nel.org>
> ---
> arch/powerpc/kernel/kprobes.c | 6 ++--
> arch/s390/kernel/ftrace.c | 4 +--
> arch/s390/kernel/kprobes.c | 4 +--
> arch/s390/kernel/module.c | 5 +--
> arch/sparc/net/bpf_jit_comp_32.c | 8 ++---
> arch/x86/kernel/ftrace.c | 6 ++--
> arch/x86/kernel/kprobes/core.c | 4 +--
> include/linux/execmem.h | 57 ++++++++++++++++++++++++++++++++
> include/linux/moduleloader.h | 3 --
> kernel/bpf/core.c | 6 ++--
> kernel/kprobes.c | 8 ++---
> kernel/module/Kconfig | 1 +
> kernel/module/main.c | 25 +++++---------
> mm/Kconfig | 3 ++
> mm/Makefile | 1 +
> mm/execmem.c | 26 +++++++++++++++
> 16 files changed, 122 insertions(+), 45 deletions(-)
> create mode 100644 include/linux/execmem.h
> create mode 100644 mm/execmem.c
>
> diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
> index bbca90a5e2ec..9fcd01bb2ce6 100644
> --- a/arch/powerpc/kernel/kprobes.c
> +++ b/arch/powerpc/kernel/kprobes.c
> @@ -19,8 +19,8 @@
> #include <linux/extable.h>
> #include <linux/kdebug.h>
> #include <linux/slab.h>
> -#include <linux/moduleloader.h>
> #include <linux/set_memory.h>
> +#include <linux/execmem.h>
> #include <asm/code-patching.h>
> #include <asm/cacheflush.h>
> #include <asm/sstep.h>
> @@ -130,7 +130,7 @@ void *alloc_insn_page(void)
> {
> void *page;
>
> - page = module_alloc(PAGE_SIZE);
> + page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
> if (!page)
> return NULL;
>
> @@ -142,7 +142,7 @@ void *alloc_insn_page(void)
> }
> return page;
> error:
> - module_memfree(page);
> + execmem_free(page);
> return NULL;
> }
>
> diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
> index c46381ea04ec..798249ef5646 100644
> --- a/arch/s390/kernel/ftrace.c
> +++ b/arch/s390/kernel/ftrace.c
> @@ -7,13 +7,13 @@
> * Author(s): Martin Schwidefsky <schwidefsky@...ibm.com>
> */
>
> -#include <linux/moduleloader.h>
> #include <linux/hardirq.h>
> #include <linux/uaccess.h>
> #include <linux/ftrace.h>
> #include <linux/kernel.h>
> #include <linux/types.h>
> #include <linux/kprobes.h>
> +#include <linux/execmem.h>
> #include <trace/syscall.h>
> #include <asm/asm-offsets.h>
> #include <asm/text-patching.h>
> @@ -220,7 +220,7 @@ static int __init ftrace_plt_init(void)
> {
> const char *start, *end;
>
> - ftrace_plt = module_alloc(PAGE_SIZE);
> + ftrace_plt = execmem_alloc(EXECMEM_FTRACE, PAGE_SIZE);
> if (!ftrace_plt)
> panic("cannot allocate ftrace plt\n");
>
> diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
> index f0cf20d4b3c5..3c1b1be744de 100644
> --- a/arch/s390/kernel/kprobes.c
> +++ b/arch/s390/kernel/kprobes.c
> @@ -9,7 +9,6 @@
>
> #define pr_fmt(fmt) "kprobes: " fmt
>
> -#include <linux/moduleloader.h>
> #include <linux/kprobes.h>
> #include <linux/ptrace.h>
> #include <linux/preempt.h>
> @@ -21,6 +20,7 @@
> #include <linux/slab.h>
> #include <linux/hardirq.h>
> #include <linux/ftrace.h>
> +#include <linux/execmem.h>
> #include <asm/set_memory.h>
> #include <asm/sections.h>
> #include <asm/dis.h>
> @@ -38,7 +38,7 @@ void *alloc_insn_page(void)
> {
> void *page;
>
> - page = module_alloc(PAGE_SIZE);
> + page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
> if (!page)
> return NULL;
> set_memory_rox((unsigned long)page, 1);
> diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
> index 42215f9404af..ac97a905e8cd 100644
> --- a/arch/s390/kernel/module.c
> +++ b/arch/s390/kernel/module.c
> @@ -21,6 +21,7 @@
> #include <linux/moduleloader.h>
> #include <linux/bug.h>
> #include <linux/memory.h>
> +#include <linux/execmem.h>
> #include <asm/alternative.h>
> #include <asm/nospec-branch.h>
> #include <asm/facility.h>
> @@ -76,7 +77,7 @@ void *module_alloc(unsigned long size)
> #ifdef CONFIG_FUNCTION_TRACER
> void module_arch_cleanup(struct module *mod)
> {
> - module_memfree(mod->arch.trampolines_start);
> + execmem_free(mod->arch.trampolines_start);
> }
> #endif
>
> @@ -510,7 +511,7 @@ static int module_alloc_ftrace_hotpatch_trampolines(struct module *me,
>
> size = FTRACE_HOTPATCH_TRAMPOLINES_SIZE(s->sh_size);
> numpages = DIV_ROUND_UP(size, PAGE_SIZE);
> - start = module_alloc(numpages * PAGE_SIZE);
> + start = execmem_alloc(EXECMEM_FTRACE, numpages * PAGE_SIZE);
> if (!start)
> return -ENOMEM;
> set_memory_rox((unsigned long)start, numpages);
> diff --git a/arch/sparc/net/bpf_jit_comp_32.c b/arch/sparc/net/bpf_jit_comp_32.c
> index da2df1e84ed4..bda2dbd3f4c5 100644
> --- a/arch/sparc/net/bpf_jit_comp_32.c
> +++ b/arch/sparc/net/bpf_jit_comp_32.c
> @@ -1,10 +1,10 @@
> // SPDX-License-Identifier: GPL-2.0
> -#include <linux/moduleloader.h>
> #include <linux/workqueue.h>
> #include <linux/netdevice.h>
> #include <linux/filter.h>
> #include <linux/cache.h>
> #include <linux/if_vlan.h>
> +#include <linux/execmem.h>
>
> #include <asm/cacheflush.h>
> #include <asm/ptrace.h>
> @@ -713,7 +713,7 @@ cond_branch: f_offset = addrs[i + filter[i].jf];
> if (unlikely(proglen + ilen > oldproglen)) {
> pr_err("bpb_jit_compile fatal error\n");
> kfree(addrs);
> - module_memfree(image);
> + execmem_free(image);
> return;
> }
> memcpy(image + proglen, temp, ilen);
> @@ -736,7 +736,7 @@ cond_branch: f_offset = addrs[i + filter[i].jf];
> break;
> }
> if (proglen == oldproglen) {
> - image = module_alloc(proglen);
> + image = execmem_alloc(EXECMEM_BPF, proglen);
> if (!image)
> goto out;
> }
> @@ -758,7 +758,7 @@ cond_branch: f_offset = addrs[i + filter[i].jf];
> void bpf_jit_free(struct bpf_prog *fp)
> {
> if (fp->jited)
> - module_memfree(fp->bpf_func);
> + execmem_free(fp->bpf_func);
>
> bpf_prog_unlock_free(fp);
> }
> diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
> index 70139d9d2e01..c8ddb7abda7c 100644
> --- a/arch/x86/kernel/ftrace.c
> +++ b/arch/x86/kernel/ftrace.c
> @@ -25,6 +25,7 @@
> #include <linux/memory.h>
> #include <linux/vmalloc.h>
> #include <linux/set_memory.h>
> +#include <linux/execmem.h>
>
> #include <trace/syscall.h>
>
> @@ -261,15 +262,14 @@ void arch_ftrace_update_code(int command)
> #ifdef CONFIG_X86_64
>
> #ifdef CONFIG_MODULES
> -#include <linux/moduleloader.h>
> /* Module allocation simplifies allocating memory for code */
> static inline void *alloc_tramp(unsigned long size)
> {
> - return module_alloc(size);
> + return execmem_alloc(EXECMEM_FTRACE, size);
> }
> static inline void tramp_free(void *tramp)
> {
> - module_memfree(tramp);
> + execmem_free(tramp);
> }
> #else
> /* Trampolines can only be created if modules are supported */
> diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
> index d0e49bd7c6f3..72e6a45e7ec2 100644
> --- a/arch/x86/kernel/kprobes/core.c
> +++ b/arch/x86/kernel/kprobes/core.c
> @@ -40,12 +40,12 @@
> #include <linux/kgdb.h>
> #include <linux/ftrace.h>
> #include <linux/kasan.h>
> -#include <linux/moduleloader.h>
> #include <linux/objtool.h>
> #include <linux/vmalloc.h>
> #include <linux/pgtable.h>
> #include <linux/set_memory.h>
> #include <linux/cfi.h>
> +#include <linux/execmem.h>
>
> #include <asm/text-patching.h>
> #include <asm/cacheflush.h>
> @@ -495,7 +495,7 @@ void *alloc_insn_page(void)
> {
> void *page;
>
> - page = module_alloc(PAGE_SIZE);
> + page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
> if (!page)
> return NULL;
>
> diff --git a/include/linux/execmem.h b/include/linux/execmem.h
> new file mode 100644
> index 000000000000..43e7995593a1
> --- /dev/null
> +++ b/include/linux/execmem.h
> @@ -0,0 +1,57 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _LINUX_EXECMEM_ALLOC_H
> +#define _LINUX_EXECMEM_ALLOC_H
> +
> +#include <linux/types.h>
> +#include <linux/moduleloader.h>
> +
> +/**
> + * enum execmem_type - types of executable memory ranges
> + *
> + * There are several subsystems that allocate executable memory.
> + * Architectures define different restrictions on placement,
> + * permissions, alignment and other parameters for memory that can be used
> + * by these subsystems.
> + * Types in this enum identify subsystems that allocate executable memory
> + * and let architectures define parameters for ranges suitable for
> + * allocations by each subsystem.
> + *
> + * @EXECMEM_DEFAULT: default parameters that would be used for types that
> + * are not explcitly defined.
> + * @EXECMEM_MODULE_TEXT: parameters for module text sections
> + * @EXECMEM_KPROBES: parameters for kprobes
> + * @EXECMEM_FTRACE: parameters for ftrace
> + * @EXECMEM_BPF: parameters for BPF
> + * @EXECMEM_TYPE_MAX:
> + */
> +enum execmem_type {
> + EXECMEM_DEFAULT,
> + EXECMEM_MODULE_TEXT = EXECMEM_DEFAULT,
> + EXECMEM_KPROBES,
> + EXECMEM_FTRACE,
> + EXECMEM_BPF,
> + EXECMEM_TYPE_MAX,
> +};
> +
> +/**
> + * execmem_alloc - allocate executable memory
> + * @type: type of the allocation
> + * @size: how many bytes of memory are required
> + *
> + * Allocates memory that will contain executable code, either generated or
> + * loaded from kernel modules.
> + *
> + * The memory will have protections defined by architecture for executable
> + * region of the @type.
> + *
> + * Return: a pointer to the allocated memory or %NULL
> + */
> +void *execmem_alloc(enum execmem_type type, size_t size);
> +
> +/**
> + * execmem_free - free executable memory
> + * @ptr: pointer to the memory that should be freed
> + */
> +void execmem_free(void *ptr);
> +
> +#endif /* _LINUX_EXECMEM_ALLOC_H */
> diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h
> index 89b1e0ed9811..a3b8caee9405 100644
> --- a/include/linux/moduleloader.h
> +++ b/include/linux/moduleloader.h
> @@ -29,9 +29,6 @@ unsigned int arch_mod_section_prepend(struct module *mod, unsigned int section);
> sections. Returns NULL on failure. */
> void *module_alloc(unsigned long size);
>
> -/* Free memory returned from module_alloc. */
> -void module_memfree(void *module_region);
> -
> /* Determines if the section name is an init section (that is only used during
> * module loading).
> */
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index 696bc55de8e8..75a54024e2f4 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -22,7 +22,6 @@
> #include <linux/skbuff.h>
> #include <linux/vmalloc.h>
> #include <linux/random.h>
> -#include <linux/moduleloader.h>
> #include <linux/bpf.h>
> #include <linux/btf.h>
> #include <linux/objtool.h>
> @@ -37,6 +36,7 @@
> #include <linux/nospec.h>
> #include <linux/bpf_mem_alloc.h>
> #include <linux/memcontrol.h>
> +#include <linux/execmem.h>
>
> #include <asm/barrier.h>
> #include <asm/unaligned.h>
> @@ -1050,12 +1050,12 @@ void bpf_jit_uncharge_modmem(u32 size)
>
> void *__weak bpf_jit_alloc_exec(unsigned long size)
> {
> - return module_alloc(size);
> + return execmem_alloc(EXECMEM_BPF, size);
> }
>
> void __weak bpf_jit_free_exec(void *addr)
> {
> - module_memfree(addr);
> + execmem_free(addr);
> }
>
> struct bpf_binary_header *
> diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> index 9d9095e81792..047ca629ce49 100644
> --- a/kernel/kprobes.c
> +++ b/kernel/kprobes.c
> @@ -26,7 +26,6 @@
> #include <linux/slab.h>
> #include <linux/stddef.h>
> #include <linux/export.h>
> -#include <linux/moduleloader.h>
> #include <linux/kallsyms.h>
> #include <linux/freezer.h>
> #include <linux/seq_file.h>
> @@ -39,6 +38,7 @@
> #include <linux/jump_label.h>
> #include <linux/static_call.h>
> #include <linux/perf_event.h>
> +#include <linux/execmem.h>
>
> #include <asm/sections.h>
> #include <asm/cacheflush.h>
> @@ -113,17 +113,17 @@ enum kprobe_slot_state {
> void __weak *alloc_insn_page(void)
> {
> /*
> - * Use module_alloc() so this page is within +/- 2GB of where the
> + * Use execmem_alloc() so this page is within +/- 2GB of where the
> * kernel image and loaded module images reside. This is required
> * for most of the architectures.
> * (e.g. x86-64 needs this to handle the %rip-relative fixups.)
> */
> - return module_alloc(PAGE_SIZE);
> + return execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
> }
>
> static void free_insn_page(void *page)
> {
> - module_memfree(page);
> + execmem_free(page);
> }
>
> struct kprobe_insn_cache kprobe_insn_slots = {
> diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig
> index f3e0329337f6..744383c1eed1 100644
> --- a/kernel/module/Kconfig
> +++ b/kernel/module/Kconfig
> @@ -2,6 +2,7 @@
> menuconfig MODULES
> bool "Enable loadable module support"
> modules
> + select EXECMEM
> help
> Kernel modules are small pieces of compiled code which can
> be inserted in the running kernel, rather than being
> diff --git a/kernel/module/main.c b/kernel/module/main.c
> index 5b82b069e0d3..d56b7df0cbb6 100644
> --- a/kernel/module/main.c
> +++ b/kernel/module/main.c
> @@ -57,6 +57,7 @@
> #include <linux/audit.h>
> #include <linux/cfi.h>
> #include <linux/debugfs.h>
> +#include <linux/execmem.h>
> #include <uapi/linux/module.h>
> #include "internal.h"
>
> @@ -1179,16 +1180,6 @@ resolve_symbol_wait(struct module *mod,
> return ksym;
> }
>
> -void __weak module_memfree(void *module_region)
> -{
> - /*
> - * This memory may be RO, and freeing RO memory in an interrupt is not
> - * supported by vmalloc.
> - */
> - WARN_ON(in_interrupt());
> - vfree(module_region);
> -}
> -
> void __weak module_arch_cleanup(struct module *mod)
> {
> }
> @@ -1213,7 +1204,7 @@ static int module_memory_alloc(struct module *mod, enum mod_mem_type type)
> if (mod_mem_use_vmalloc(type))
> ptr = vmalloc(size);
> else
> - ptr = module_alloc(size);
> + ptr = execmem_alloc(EXECMEM_MODULE_TEXT, size);
>
> if (!ptr)
> return -ENOMEM;
> @@ -1244,7 +1235,7 @@ static void module_memory_free(struct module *mod, enum mod_mem_type type)
> if (mod_mem_use_vmalloc(type))
> vfree(ptr);
> else
> - module_memfree(ptr);
> + execmem_free(ptr);
> }
>
> static void free_mod_mem(struct module *mod)
> @@ -2496,9 +2487,9 @@ static void do_free_init(struct work_struct *w)
>
> llist_for_each_safe(pos, n, list) {
> initfree = container_of(pos, struct mod_initfree, node);
> - module_memfree(initfree->init_text);
> - module_memfree(initfree->init_data);
> - module_memfree(initfree->init_rodata);
> + execmem_free(initfree->init_text);
> + execmem_free(initfree->init_data);
> + execmem_free(initfree->init_rodata);
> kfree(initfree);
> }
> }
> @@ -2608,10 +2599,10 @@ static noinline int do_init_module(struct module *mod)
> * We want to free module_init, but be aware that kallsyms may be
> * walking this with preempt disabled. In all the failure paths, we
> * call synchronize_rcu(), but we don't want to slow down the success
> - * path. module_memfree() cannot be called in an interrupt, so do the
> + * path. execmem_free() cannot be called in an interrupt, so do the
> * work and call synchronize_rcu() in a work queue.
> *
> - * Note that module_alloc() on most architectures creates W+X page
> + * Note that execmem_alloc() on most architectures creates W+X page
> * mappings which won't be cleaned up until do_free_init() runs. Any
> * code such as mark_rodata_ro() which depends on those mappings to
> * be cleaned up needs to sync with the queued work by invoking
> diff --git a/mm/Kconfig b/mm/Kconfig
> index b1448aa81e15..f08a216d4793 100644
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -1241,6 +1241,9 @@ config LOCK_MM_AND_FIND_VMA
> config IOMMU_MM_DATA
> bool
>
> +config EXECMEM
> + bool
> +
> source "mm/damon/Kconfig"
>
> endmenu
> diff --git a/mm/Makefile b/mm/Makefile
> index 4abb40b911ec..001336c91864 100644
> --- a/mm/Makefile
> +++ b/mm/Makefile
> @@ -133,3 +133,4 @@ obj-$(CONFIG_IO_MAPPING) += io-mapping.o
> obj-$(CONFIG_HAVE_BOOTMEM_INFO_NODE) += bootmem_info.o
> obj-$(CONFIG_GENERIC_IOREMAP) += ioremap.o
> obj-$(CONFIG_SHRINKER_DEBUG) += shrinker_debug.o
> +obj-$(CONFIG_EXECMEM) += execmem.o
> diff --git a/mm/execmem.c b/mm/execmem.c
> new file mode 100644
> index 000000000000..ed2ea41a2543
> --- /dev/null
> +++ b/mm/execmem.c
> @@ -0,0 +1,26 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#include <linux/mm.h>
> +#include <linux/vmalloc.h>
> +#include <linux/execmem.h>
> +#include <linux/moduleloader.h>
> +
> +static void *__execmem_alloc(size_t size)
> +{
> + return module_alloc(size);
> +}
> +
> +void *execmem_alloc(enum execmem_type type, size_t size)
> +{
> + return __execmem_alloc(size);
> +}
> +
> +void execmem_free(void *ptr)
> +{
> + /*
> + * This memory may be RO, and freeing RO memory in an interrupt is not
> + * supported by vmalloc.
> + */
> + WARN_ON(in_interrupt());
> + vfree(ptr);
> +}
> --
> 2.43.0
>
>
--
Masami Hiramatsu (Google) <mhiramat@...nel.org>
Powered by blists - more mailing lists