lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <03d36fac-a808-4794-a442-11fa6fa18ad8@suse.com>
Date: Tue, 1 Oct 2024 16:04:39 +0200
From: Petr Pavlu <petr.pavlu@...e.com>
To: Sami Tolvanen <samitolvanen@...gle.com>
Cc: Masahiro Yamada <masahiroy@...nel.org>,
 Luis Chamberlain <mcgrof@...nel.org>, Miguel Ojeda <ojeda@...nel.org>,
 Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
 Matthew Maurer <mmaurer@...gle.com>, Alex Gaynor <alex.gaynor@...il.com>,
 Wedson Almeida Filho <wedsonaf@...il.com>, Gary Guo <gary@...yguo.net>,
 Petr Pavlu <petr.pavlu@...e.com>, Neal Gompa <neal@...pa.dev>,
 Hector Martin <marcan@...can.st>, Janne Grunau <j@...nau.net>,
 Miroslav Benes <mbenes@...e.cz>, Asahi Linux <asahi@...ts.linux.dev>,
 linux-kbuild@...r.kernel.org, linux-kernel@...r.kernel.org,
 linux-modules@...r.kernel.org, rust-for-linux@...r.kernel.org
Subject: Re: [PATCH v3 03/20] tools: Add gendwarfksyms

On 9/23/24 20:18, Sami Tolvanen wrote:
> Add a basic DWARF parser, which uses libdw to traverse the debugging
> information in an object file and looks for functions and variables.
> In follow-up patches, this will be expanded to produce symbol versions
> for CONFIG_MODVERSIONS from DWARF.
> 
> Signed-off-by: Sami Tolvanen <samitolvanen@...gle.com>
> ---
>  kernel/module/Kconfig                 |   8 ++
>  scripts/Makefile                      |   1 +
>  scripts/gendwarfksyms/.gitignore      |   2 +
>  scripts/gendwarfksyms/Makefile        |   8 ++
>  scripts/gendwarfksyms/dwarf.c         | 166 ++++++++++++++++++++++++++
>  scripts/gendwarfksyms/gendwarfksyms.c | 132 ++++++++++++++++++++
>  scripts/gendwarfksyms/gendwarfksyms.h |  97 +++++++++++++++
>  scripts/gendwarfksyms/symbols.c       |  82 +++++++++++++
>  8 files changed, 496 insertions(+)
>  create mode 100644 scripts/gendwarfksyms/.gitignore
>  create mode 100644 scripts/gendwarfksyms/Makefile
>  create mode 100644 scripts/gendwarfksyms/dwarf.c
>  create mode 100644 scripts/gendwarfksyms/gendwarfksyms.c
>  create mode 100644 scripts/gendwarfksyms/gendwarfksyms.h
>  create mode 100644 scripts/gendwarfksyms/symbols.c
> 
> diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig
> index ccdbd1bc12aa..c3a0172a909f 100644
> --- a/kernel/module/Kconfig
> +++ b/kernel/module/Kconfig
> @@ -168,6 +168,14 @@ config MODVERSIONS
>  	  make them incompatible with the kernel you are running.  If
>  	  unsure, say N.
>  
> +config GENDWARFKSYMS
> +	bool
> +	depends on DEBUG_INFO
> +	# Requires full debugging information, split DWARF not supported.
> +	depends on !DEBUG_INFO_REDUCED && !DEBUG_INFO_SPLIT
> +	# Requires ELF object files.
> +	depends on !LTO
> +
>  config ASM_MODVERSIONS
>  	bool
>  	default HAVE_ASM_MODVERSIONS && MODVERSIONS
> diff --git a/scripts/Makefile b/scripts/Makefile
> index 6bcda4b9d054..d7fec46d38c0 100644
> --- a/scripts/Makefile
> +++ b/scripts/Makefile
> @@ -54,6 +54,7 @@ targets += module.lds
>  
>  subdir-$(CONFIG_GCC_PLUGINS) += gcc-plugins
>  subdir-$(CONFIG_MODVERSIONS) += genksyms
> +subdir-$(CONFIG_GENDWARFKSYMS) += gendwarfksyms
>  subdir-$(CONFIG_SECURITY_SELINUX) += selinux
>  subdir-$(CONFIG_SECURITY_IPE) += ipe
>  
> diff --git a/scripts/gendwarfksyms/.gitignore b/scripts/gendwarfksyms/.gitignore
> new file mode 100644
> index 000000000000..0927f8d3cd96
> --- /dev/null
> +++ b/scripts/gendwarfksyms/.gitignore
> @@ -0,0 +1,2 @@
> +# SPDX-License-Identifier: GPL-2.0
> +/gendwarfksyms
> diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile
> new file mode 100644
> index 000000000000..9f8fec4fd39b
> --- /dev/null
> +++ b/scripts/gendwarfksyms/Makefile
> @@ -0,0 +1,8 @@
> +# SPDX-License-Identifier: GPL-2.0
> +hostprogs-always-y += gendwarfksyms
> +
> +gendwarfksyms-objs += gendwarfksyms.o
> +gendwarfksyms-objs += dwarf.o
> +gendwarfksyms-objs += symbols.o
> +
> +HOSTLDLIBS_gendwarfksyms := -ldw -lelf
> diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
> new file mode 100644
> index 000000000000..81df3e2ad3ae
> --- /dev/null
> +++ b/scripts/gendwarfksyms/dwarf.c
> @@ -0,0 +1,166 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2024 Google LLC
> + */
> +
> +#include "gendwarfksyms.h"
> +
> +static bool get_ref_die_attr(Dwarf_Die *die, unsigned int id, Dwarf_Die *value)
> +{
> +	Dwarf_Attribute da;
> +
> +	/* dwarf_formref_die returns a pointer instead of an error value. */
> +	return dwarf_attr(die, id, &da) && dwarf_formref_die(&da, value);
> +}
> +
> +#define DEFINE_GET_STRING_ATTR(attr)                         \
> +	static const char *get_##attr##_attr(Dwarf_Die *die) \
> +	{                                                    \
> +		Dwarf_Attribute da;                          \
> +		if (dwarf_attr(die, DW_AT_##attr, &da))      \
> +			return dwarf_formstring(&da);        \
> +		return NULL;                                 \
> +	}
> +
> +DEFINE_GET_STRING_ATTR(name)
> +DEFINE_GET_STRING_ATTR(linkage_name)
> +
> +static const char *get_symbol_name(Dwarf_Die *die)
> +{
> +	const char *name;
> +
> +	/* rustc uses DW_AT_linkage_name for exported symbols */
> +	name = get_linkage_name_attr(die);
> +	if (!name)
> +		name = get_name_attr(die);
> +
> +	return name;
> +}
> +
> +static bool match_export_symbol(struct state *state, Dwarf_Die *die)
> +{
> +	Dwarf_Die *source = die;
> +	Dwarf_Die origin;
> +
> +	/* If the DIE has an abstract origin, use it for type information. */
> +	if (get_ref_die_attr(die, DW_AT_abstract_origin, &origin))
> +		source = &origin;
> +
> +	state->sym = symbol_get(get_symbol_name(die));
> +
> +	/* Look up using the origin name if there are no matches. */
> +	if (!state->sym && source != die)
> +		state->sym = symbol_get(get_symbol_name(source));
> +
> +	state->die = *source;
> +	return !!state->sym;
> +}
> +
> +/*
> + * Type string processing
> + */
> +static void process(const char *s)
> +{
> +	s = s ?: "<null>";
> +
> +	if (dump_dies)
> +		fputs(s, stderr);
> +}
> +
> +bool match_all(Dwarf_Die *die)
> +{
> +	return true;
> +}
> +
> +int process_die_container(struct state *state, Dwarf_Die *die,
> +			  die_callback_t func, die_match_callback_t match)
> +{
> +	Dwarf_Die current;
> +	int res;
> +
> +	res = checkp(dwarf_child(die, &current));
> +	while (!res) {
> +		if (match(&current)) {
> +			/* <0 = error, 0 = continue, >0 = stop */
> +			res = checkp(func(state, &current));
> +			if (res)
> +				return res;
> +		}
> +
> +		res = checkp(dwarf_siblingof(&current, &current));
> +	}
> +
> +	return 0;
> +}
> +
> +/*
> + * Exported symbol processing
> + */
> +static void process_symbol(struct state *state, Dwarf_Die *die,
> +			   die_callback_t process_func)
> +{
> +	debug("%s", state->sym->name);
> +	check(process_func(state, die));
> +	if (dump_dies)
> +		fputs("\n", stderr);
> +}
> +
> +static int __process_subprogram(struct state *state, Dwarf_Die *die)
> +{
> +	process("subprogram");
> +	return 0;
> +}
> +
> +static void process_subprogram(struct state *state, Dwarf_Die *die)
> +{
> +	process_symbol(state, die, __process_subprogram);
> +}
> +
> +static int __process_variable(struct state *state, Dwarf_Die *die)
> +{
> +	process("variable ");
> +	return 0;
> +}
> +
> +static void process_variable(struct state *state, Dwarf_Die *die)
> +{
> +	process_symbol(state, die, __process_variable);
> +}
> +
> +static int process_exported_symbols(struct state *unused, Dwarf_Die *die)
> +{
> +	int tag = dwarf_tag(die);
> +
> +	switch (tag) {
> +	/* Possible containers of exported symbols */
> +	case DW_TAG_namespace:
> +	case DW_TAG_class_type:
> +	case DW_TAG_structure_type:
> +		return check(process_die_container(
> +			NULL, die, process_exported_symbols, match_all));
> +
> +	/* Possible exported symbols */
> +	case DW_TAG_subprogram:
> +	case DW_TAG_variable: {
> +		struct state state;
> +
> +		if (!match_export_symbol(&state, die))
> +			return 0;
> +
> +		if (tag == DW_TAG_subprogram)
> +			process_subprogram(&state, &state.die);
> +		else
> +			process_variable(&state, &state.die);
> +
> +		return 0;
> +	}
> +	default:
> +		return 0;
> +	}
> +}
> +
> +void process_cu(Dwarf_Die *cudie)
> +{
> +	check(process_die_container(NULL, cudie, process_exported_symbols,
> +				    match_all));
> +}
> diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c
> new file mode 100644
> index 000000000000..096a334fa5b3
> --- /dev/null
> +++ b/scripts/gendwarfksyms/gendwarfksyms.c
> @@ -0,0 +1,132 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2024 Google LLC
> + */
> +
> +#include <fcntl.h>
> +#include <getopt.h>
> +#include <errno.h>
> +#include <stdarg.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include "gendwarfksyms.h"
> +
> +/*
> + * Options
> + */
> +
> +/* Print debugging information to stderr */
> +int debug;
> +/* Dump DIE contents */
> +int dump_dies;
> +
> +static void usage(void)
> +{
> +	fputs("Usage: gendwarfksyms [options] elf-object-file ... < symbol-list\n\n"
> +	      "Options:\n"
> +	      "  -d, --debug          Print debugging information\n"
> +	      "      --dump-dies      Dump DWARF DIE contents\n"
> +	      "  -h, --help           Print this message\n"
> +	      "\n",
> +	      stderr);
> +}
> +
> +static int process_module(Dwfl_Module *mod, void **userdata, const char *name,
> +			  Dwarf_Addr base, void *arg)
> +{
> +	Dwarf_Addr dwbias;
> +	Dwarf_Die cudie;
> +	Dwarf_CU *cu = NULL;
> +	Dwarf *dbg;
> +	int res;
> +
> +	debug("%s", name);
> +	dbg = dwfl_module_getdwarf(mod, &dwbias);
> +
> +	do {
> +		res = dwarf_get_units(dbg, cu, &cu, NULL, NULL, &cudie, NULL);
> +		if (res < 0)
> +			error("dwarf_get_units failed: no debugging information?");
> +		if (res == 1)
> +			break; /* No more units */
> +
> +		process_cu(&cudie);
> +	} while (cu);
> +
> +	return DWARF_CB_OK;
> +}
> +
> +static const Dwfl_Callbacks callbacks = {
> +	.section_address = dwfl_offline_section_address,
> +	.find_debuginfo = dwfl_standard_find_debuginfo,
> +};
> +
> +int main(int argc, char **argv)
> +{
> +	unsigned int n;
> +	int opt;
> +
> +	struct option opts[] = { { "debug", 0, NULL, 'd' },
> +				 { "dump-dies", 0, &dump_dies, 1 },
> +				 { "help", 0, NULL, 'h' },
> +				 { 0, 0, NULL, 0 } };
> +
> +	while ((opt = getopt_long(argc, argv, "dh", opts, NULL)) != EOF) {
> +		switch (opt) {
> +		case 0:
> +			break;
> +		case 'd':
> +			debug = 1;
> +			break;
> +		case 'h':
> +			usage();
> +			return 0;
> +		default:
> +			usage();
> +			return 1;
> +		}
> +	}
> +
> +	if (optind >= argc) {
> +		usage();
> +		error("no input files?");
> +	}
> +
> +	symbol_read_exports(stdin);
> +
> +	for (n = optind; n < argc; n++) {
> +		Dwfl *dwfl;
> +		int fd;
> +
> +		fd = open(argv[n], O_RDONLY);
> +		if (fd == -1) {
> +			error("open failed for '%s': %s", argv[n],
> +			      strerror(errno));
> +			return -1;
> +		}
> +
> +		dwfl = dwfl_begin(&callbacks);
> +		if (!dwfl) {
> +			error("dwfl_begin failed for '%s': %s", argv[n],
> +			      dwarf_errmsg(-1));
> +			return -1;
> +		}
> +
> +		if (!dwfl_report_offline(dwfl, argv[n], argv[n], fd)) {
> +			error("dwfl_report_offline failed for '%s': %s",
> +			      argv[n], dwarf_errmsg(-1));
> +			return -1;
> +		}
> +
> +		dwfl_report_end(dwfl, NULL, NULL);
> +
> +		if (dwfl_getmodules(dwfl, &process_module, NULL, 0)) {
> +			error("dwfl_getmodules failed for '%s'", argv[n]);
> +			return -1;
> +		}

Nit: The four error() calls don't need to be followed by 'return -1;'
since the function now calls exit(1).

> +
> +		dwfl_end(dwfl);
> +	}
> +
> +	return 0;
> +}
> diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
> new file mode 100644
> index 000000000000..1a10d18f178e
> --- /dev/null
> +++ b/scripts/gendwarfksyms/gendwarfksyms.h
> @@ -0,0 +1,97 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (C) 2024 Google LLC
> + */
> +
> +#include <dwarf.h>
> +#include <elfutils/libdw.h>
> +#include <elfutils/libdwfl.h>
> +#include <inttypes.h>
> +#include <stdlib.h>
> +#include <stdio.h>
> +#include <string.h>
> +
> +#include <hash.h>
> +#include <hashtable.h>
> +#include <list.h>
> +#include <xalloc.h>
> +
> +#ifndef __GENDWARFKSYMS_H
> +#define __GENDWARFKSYMS_H
> +
> +/*
> + * Options -- in gendwarfksyms.c
> + */
> +extern int debug;
> +extern int dump_dies;
> +
> +/*
> + * Output helpers
> + */
> +#define __PREFIX "gendwarfksyms: "
> +#define __println(prefix, format, ...)                                \
> +	fprintf(stderr, prefix __PREFIX "%s: " format "\n", __func__, \
> +		##__VA_ARGS__)
> +
> +#define debug(format, ...)                                    \
> +	do {                                                  \
> +		if (debug)                                    \
> +			__println("", format, ##__VA_ARGS__); \
> +	} while (0)
> +
> +#define warn(format, ...) __println("warning: ", format, ##__VA_ARGS__)
> +#define error(format, ...)                                   \
> +	do {                                                 \
> +		__println("error: ", format, ##__VA_ARGS__); \
> +		exit(1);                                     \
> +	} while (0)
> +
> +/*
> + * Error handling helpers
> + */
> +#define __check(expr, test)                                     \
> +	({                                                      \
> +		int __res = expr;                               \
> +		if (test)                                       \
> +			error("`%s` failed: %d", #expr, __res); \
> +		__res;                                          \
> +	})
> +
> +/* Error == non-zero values */
> +#define check(expr) __check(expr, __res)
> +/* Error == negative values */
> +#define checkp(expr) __check(expr, __res < 0)
> +
> +/*
> + * symbols.c
> + */
> +
> +struct symbol {
> +	const char *name;
> +	struct hlist_node name_hash;
> +};
> +
> +typedef void (*symbol_callback_t)(struct symbol *, void *arg);
> +
> +void symbol_read_exports(FILE *file);
> +struct symbol *symbol_get(const char *name);
> +
> +/*
> + * dwarf.c
> + */
> +
> +struct state {
> +	struct symbol *sym;
> +	Dwarf_Die die;
> +};
> +
> +typedef int (*die_callback_t)(struct state *state, Dwarf_Die *die);
> +typedef bool (*die_match_callback_t)(Dwarf_Die *die);
> +bool match_all(Dwarf_Die *die);
> +
> +int process_die_container(struct state *state, Dwarf_Die *die,
> +			  die_callback_t func, die_match_callback_t match);
> +
> +void process_cu(Dwarf_Die *cudie);
> +
> +#endif /* __GENDWARFKSYMS_H */
> diff --git a/scripts/gendwarfksyms/symbols.c b/scripts/gendwarfksyms/symbols.c
> new file mode 100644
> index 000000000000..1809be93d18c
> --- /dev/null
> +++ b/scripts/gendwarfksyms/symbols.c
> @@ -0,0 +1,82 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2024 Google LLC
> + */
> +
> +#include "gendwarfksyms.h"
> +
> +#define SYMBOL_HASH_BITS 15
> +static HASHTABLE_DEFINE(symbol_names, 1 << SYMBOL_HASH_BITS);
> +
> +static int for_each(const char *name, symbol_callback_t func, void *data)
> +{
> +	struct hlist_node *tmp;
> +	struct symbol *match;
> +
> +	if (!name || !*name)
> +		return 0;
> +
> +	hash_for_each_possible_safe(symbol_names, match, tmp, name_hash,
> +				    hash_str(name)) {
> +		if (strcmp(match->name, name))
> +			continue;
> +
> +		if (func)
> +			func(match, data);
> +
> +		return 1;
> +	}
> +
> +	return 0;
> +}
> +
> +static bool is_exported(const char *name)
> +{
> +	return checkp(for_each(name, NULL, NULL)) > 0;
> +}
> +
> +void symbol_read_exports(FILE *file)
> +{
> +	struct symbol *sym;
> +	char *line = NULL;
> +	char *name = NULL;
> +	size_t size = 0;
> +	int nsym = 0;
> +
> +	while (getline(&line, &size, file) > 0) {
> +		if (sscanf(line, "%ms\n", &name) != 1)
> +			error("malformed input line: %s", line);
> +
> +		if (is_exported(name)) {
> +			/* Ignore duplicates */
> +			free(name);
> +			continue;
> +		}
> +
> +		sym = xcalloc(1, sizeof(struct symbol));
> +		sym->name = name;
> +
> +		hash_add(symbol_names, &sym->name_hash, hash_str(sym->name));
> +		++nsym;
> +
> +		debug("%s", sym->name);
> +	}
> +
> +	free(line);
> +	debug("%d exported symbols", nsym);
> +}
> +
> +static void get_symbol(struct symbol *sym, void *arg)
> +{
> +	struct symbol **res = arg;
> +
> +	*res = sym;
> +}
> +
> +struct symbol *symbol_get(const char *name)
> +{
> +	struct symbol *sym = NULL;
> +
> +	for_each(name, get_symbol, &sym);
> +	return sym;
> +}

Nit: The code inconsistently checks for a potential error from the
function for_each(). Looking at the whole series, the value is checked
using checkp() in functions symbol_set_crc(), symbol_set_ptr(),
symbol_set_die(), is_exported(), but not in symbol_get() and
elf_set_symbol_addr(). It would be good to unify this, or perhaps even
make for_each() return an unsigned int to indicate it never fails?

Looks otherwise ok to me, feel free to add:
Reviewed-by: Petr Pavlu <petr.pavlu@...e.com>

-- 
Thanks,
Petr

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ