[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAK7LNARL-Mtva-WuU-uktZsN3y1zyUNnFnZ25vRbQZ6M0wK0-g@mail.gmail.com>
Date: Wed, 19 Jun 2024 01:47:13 +0900
From: Masahiro Yamada <masahiroy@...nel.org>
To: Sami Tolvanen <samitolvanen@...gle.com>
Cc: Luis Chamberlain <mcgrof@...nel.org>, Miguel Ojeda <ojeda@...nel.org>,
Greg Kroah-Hartman <gregkh@...uxfoundation.org>, Matthew Maurer <mmaurer@...gle.com>,
Alex Gaynor <alex.gaynor@...il.com>, Wedson Almeida Filho <wedsonaf@...il.com>, Gary Guo <gary@...yguo.net>,
linux-kbuild@...r.kernel.org, linux-kernel@...r.kernel.org,
linux-modules@...r.kernel.org, rust-for-linux@...r.kernel.org
Subject: Re: [PATCH 13/15] modpost: Add support for hashing long symbol names
On Tue, Jun 18, 2024 at 2:58 AM Sami Tolvanen <samitolvanen@...gle.com> wrote:
>
> Rust frequently has symbol names longer than MODULE_NAME_LEN, because
> the full namespace path is encoded into the mangled name. Instead of
> modpost failing when running into a long name with CONFIG_MODVERSIONS,
> store a hash of the name in struct modversion_info.
>
> To avoid breaking userspace tools that parse the __versions array,
> include a null-terminated hash function name at the beginning of the
> name string, followed by a binary hash. In order to keep .mod.c files
> more human-readable, add a comment with the full symbol name before the
> array entry. Example output in rust_minimal.mod.c:
>
> static const struct modversion_info ____versions[]
> __used __section("__versions") = {
> /* _RNvNtNtCs1cdwasc6FUb_6kernel5print14format_strings4INFO */
> { 0x9ec5442f, "sha256\x00\x56\x96\xf4\x27\xdb\x4a\xbf[...]" },
> { 0x1d6595b1, "_RNvNtCs1cdwasc6FUb_6kernel5print11call_printk" },
> { 0x3c642974, "__rust_dealloc" },
> ...
> };
>
> modprobe output for the resulting module:
>
> $ modprobe --dump-modversions rust_minimal.ko
> 0x9ec5442f sha256
> 0x1d6595b1 _RNvNtCs1cdwasc6FUb_6kernel5print11call_printk
> 0x3c642974 __rust_dealloc
> ...
>
> While the output is less useful, the tool continues to work and can be
> later updated to produce more helpful output for hashed symbols.
>
> Note that this patch adds a generic SHA-256 implementation to modpost
> adapted from the Crypto API, but other hash functions may be used in
> future if needed.
>
> Suggested-by: Matthew Maurer <mmaurer@...gle.com>
> Signed-off-by: Sami Tolvanen <samitolvanen@...gle.com>
> ---
> scripts/mod/Makefile | 4 +-
> scripts/mod/modpost.c | 20 ++-
> scripts/mod/modpost.h | 20 +++
> scripts/mod/symhash.c | 327 ++++++++++++++++++++++++++++++++++++++++++
> 4 files changed, 364 insertions(+), 7 deletions(-)
> create mode 100644 scripts/mod/symhash.c
>
> diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile
> index c729bc936bae..ddd59ea9027e 100644
> --- a/scripts/mod/Makefile
> +++ b/scripts/mod/Makefile
> @@ -4,7 +4,7 @@ CFLAGS_REMOVE_empty.o += $(CC_FLAGS_LTO)
> hostprogs-always-y += modpost mk_elfconfig
> always-y += empty.o
>
> -modpost-objs := modpost.o file2alias.o sumversion.o symsearch.o
> +modpost-objs := modpost.o file2alias.o symhash.o sumversion.o symsearch.o
>
> devicetable-offsets-file := devicetable-offsets.h
>
> @@ -15,7 +15,7 @@ targets += $(devicetable-offsets-file) devicetable-offsets.s
>
> # dependencies on generated files need to be listed explicitly
>
> -$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o $(obj)/symsearch.o: $(obj)/elfconfig.h
> +$(obj)/modpost.o $(obj)/file2alias.o $(obj)/symhash.o $(obj)/sumversion.o $(obj)/symsearch.o: $(obj)/elfconfig.h
> $(obj)/file2alias.o: $(obj)/$(devicetable-offsets-file)
>
> quiet_cmd_elfconfig = MKELF $@
> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
> index f48d72d22dc2..2631e3e75a5c 100644
> --- a/scripts/mod/modpost.c
> +++ b/scripts/mod/modpost.c
> @@ -1900,7 +1900,10 @@ static void add_exported_symbols(struct buffer *buf, struct module *mod)
> **/
> static void add_versions(struct buffer *b, struct module *mod)
> {
> + char hash[SYMHASH_STR_LEN];
> struct symbol *s;
> + const char *name;
> + size_t len;
>
> if (!modversions)
> return;
> @@ -1917,13 +1920,20 @@ static void add_versions(struct buffer *b, struct module *mod)
> s->name, mod->name);
> continue;
> }
> - if (strlen(s->name) >= MODULE_NAME_LEN) {
> - error("too long symbol \"%s\" [%s.ko]\n",
> - s->name, mod->name);
> - break;
> + len = strlen(s->name);
> + /*
> + * For symbols with a long name, use the hash format, but include
> + * the full symbol name as a comment to keep the generated files
> + * human-readable.
> + */
> + if (len >= MODULE_NAME_LEN) {
> + buf_printf(b, "\t/* %s */\n", s->name);
> + name = symhash_str(s->name, len, hash);
> + } else {
> + name = s->name;
> }
> buf_printf(b, "\t{ %#8x, \"%s\" },\n",
> - s->crc, s->name);
> + s->crc, name);
> }
>
> buf_printf(b, "};\n");
> diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
> index ee43c7950636..cd2eb718936b 100644
> --- a/scripts/mod/modpost.h
> +++ b/scripts/mod/modpost.h
> @@ -183,6 +183,26 @@ void handle_moddevtable(struct module *mod, struct elf_info *info,
> Elf_Sym *sym, const char *symname);
> void add_moddevtable(struct buffer *buf, struct module *mod);
>
> +/* symhash.c */
> +/*
> + * For symbol names longer than MODULE_NAME_LEN, encode a hash of the
> + * symbol name in version information as follows:
> + *
> + * <hash name>\0<binary hash of the symbol name>
> + *
> + * e.g. as a string in .mod.c files:
> + * "sha256\x00\xNN{32}"
> + *
> + * The string is null terminated after the hash name to avoid breaking
> + * userspace tools that parse the __versions table and don't understand
> + * the format.
> + */
> +#define SYMHASH_STR_PREFIX "sha256\\x00"
> +#define SYMHASH_STR_PREFIX_LEN (sizeof(SYMHASH_STR_PREFIX) - 1)
> +#define SYMHASH_STR_LEN (SYMHASH_STR_PREFIX_LEN + 4*32 + 1)
> +
> +char *symhash_str(const char *name, size_t len, char hash_str[SYMHASH_STR_LEN]);
> +
> /* sumversion.c */
> void get_src_version(const char *modname, char sum[], unsigned sumlen);
>
> diff --git a/scripts/mod/symhash.c b/scripts/mod/symhash.c
> new file mode 100644
> index 000000000000..d0c9cf5f1f6c
> --- /dev/null
> +++ b/scripts/mod/symhash.c
> @@ -0,0 +1,327 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * symhash.c
> + *
> + * Symbol name hashing using a SHA-256 implementation adapted from the
> + * Cryptographic API.
> + */
> +#include <byteswap.h>
> +#include "modpost.h"
> +
> +#if HOST_ELFDATA == ELFDATA2MSB
> +/* Big endian */
> +#define be32_to_cpu(val) (val)
> +#define cpu_to_be32(val) (val)
> +#define cpu_to_be64(val) (val)
> +#else
> +/* Little endian */
> +#define be32_to_cpu(val) bswap_32(val)
> +#define cpu_to_be32(val) bswap_32(val)
> +#define cpu_to_be64(val) bswap_64(val)
> +#endif
> +
> +#define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory")
I know this is a copy-paste of the kernel space code,
but is barrier_data() also necessary for host programs?
> +
> +static inline void memzero_explicit(void *s, size_t count)
> +{
> + memset(s, 0, count);
> + barrier_data(s);
> +}
> +
--
Best Regards
Masahiro Yamada
Powered by blists - more mailing lists