lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20250203212631.565818-2-samitolvanen@google.com>
Date: Mon,  3 Feb 2025 21:26:32 +0000
From: Sami Tolvanen <samitolvanen@...gle.com>
To: Masahiro Yamada <masahiroy@...nel.org>
Cc: Luis Chamberlain <mcgrof@...nel.org>, Matthew Maurer <mmaurer@...gle.com>, 
	Petr Pavlu <petr.pavlu@...e.com>, Daniel Gomez <da.gomez@...sung.com>, 
	Sedat Dilek <sedat.dilek@...il.com>, linux-kbuild@...r.kernel.org, 
	linux-kernel@...r.kernel.org, linux-modules@...r.kernel.org, 
	rust-for-linux@...r.kernel.org, Sami Tolvanen <samitolvanen@...gle.com>, 
	Giuliano Procida <gprocida@...gle.com>
Subject: [PATCH] gendwarfksyms: Add a separate pass to resolve FQNs

Using dwarf_getscopes_die to resolve fully-qualified names turns out to
be rather slow, and also results in duplicate scopes being processed,
which doesn't help. Simply adding an extra pass to resolve names for all
DIEs before processing exports is noticeably faster.

For the object files with the most exports in a defconfig+Rust build,
the performance improvement is consistently >50%:

rust/bindings.o: 1038 exports
    before: 9.5980 +- 0.0183 seconds time elapsed  ( +-  0.19% )
     after: 4.3116 +- 0.0287 seconds time elapsed  ( +-  0.67% )

rust/core.o: 424 exports
    before: 5.3584 +- 0.0204 seconds time elapsed  ( +-  0.38% )
     after: 0.05348 +- 0.00129 seconds time elapsed  ( +-  2.42% )
            ^ Not a mistake.

net/core/dev.o: 190 exports
    before: 9.0507 +- 0.0297 seconds time elapsed  ( +-  0.33% )
     after: 3.2882 +- 0.0165 seconds time elapsed  ( +-  0.50% )

rust/kernel.o: 129 exports
    before: 6.8571 +- 0.0317 seconds time elapsed  ( +-  0.46% )
     after: 2.9096 +- 0.0316 seconds time elapsed  ( +-  1.09% )

net/core/skbuff.o: 120 exports
    before: 5.4805 +- 0.0291 seconds time elapsed  ( +-  0.53% )
     after: 2.0339 +- 0.0231 seconds time elapsed  ( +-  1.14% )

drivers/gpu/drm/display/drm_dp_helper.o: 101 exports
    before: 1.7877 +- 0.0187 seconds time elapsed  ( +-  1.05% )
     after: 0.69245 +- 0.00994 seconds time elapsed  ( +-  1.44% )

net/core/sock.o: 97 exports
    before: 5.8327 +- 0.0653 seconds time elapsed  ( +-  1.12% )
     after: 2.0784 +- 0.0291 seconds time elapsed  ( +-  1.40% )

drivers/net/phy/phy_device.o: 95 exports
    before: 3.0671 +- 0.0371 seconds time elapsed  ( +-  1.21% )
     after: 1.2127 +- 0.0207 seconds time elapsed  ( +-  1.70% )

drivers/pci/pci.o: 93 exports
    before: 1.1130 +- 0.0113 seconds time elapsed  ( +-  1.01% )
     after: 0.4848 +- 0.0127 seconds time elapsed  ( +-  2.63% )

kernel/sched/core.o: 83 exports
    before: 3.5092 +- 0.0223 seconds time elapsed  ( +-  0.64% )
     after: 1.1231 +- 0.0145 seconds time elapsed  ( +-  1.29% )

Overall, a defconfig+DWARF5 build with gendwarfksyms and Rust is 14.8%
faster with this patch applied on my test system. Without Rust, there's
still a 10.4% improvement in build time when gendwarfksyms is used.

Note that symbol versions are unchanged with this patch.

Suggested-by: Giuliano Procida <gprocida@...gle.com>
Signed-off-by: Sami Tolvanen <samitolvanen@...gle.com>
---
 scripts/gendwarfksyms/die.c           |   2 +-
 scripts/gendwarfksyms/dwarf.c         | 152 ++++++++++++++------------
 scripts/gendwarfksyms/gendwarfksyms.h |   2 +
 scripts/gendwarfksyms/types.c         |   2 +-
 4 files changed, 86 insertions(+), 72 deletions(-)

diff --git a/scripts/gendwarfksyms/die.c b/scripts/gendwarfksyms/die.c
index 66bd4c9bc952..6183bbbe7b54 100644
--- a/scripts/gendwarfksyms/die.c
+++ b/scripts/gendwarfksyms/die.c
@@ -6,7 +6,7 @@
 #include <string.h>
 #include "gendwarfksyms.h"
 
-#define DIE_HASH_BITS 15
+#define DIE_HASH_BITS 16
 
 /* {die->addr, state} -> struct die * */
 static HASHTABLE_DEFINE(die_map, 1 << DIE_HASH_BITS);
diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
index 534d9aa7c114..eed247d8abfc 100644
--- a/scripts/gendwarfksyms/dwarf.c
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -3,6 +3,7 @@
  * Copyright (C) 2024 Google LLC
  */
 
+#define _GNU_SOURCE
 #include <assert.h>
 #include <inttypes.h>
 #include <stdarg.h>
@@ -193,79 +194,17 @@ static void process_fmt(struct die *cache, const char *fmt, ...)
 	va_end(args);
 }
 
-#define MAX_FQN_SIZE 64
-
-/* Get a fully qualified name from DWARF scopes */
-static char *get_fqn(Dwarf_Die *die)
+static void update_fqn(struct die *cache, Dwarf_Die *die)
 {
-	const char *list[MAX_FQN_SIZE];
-	Dwarf_Die *scopes = NULL;
-	bool has_name = false;
-	char *fqn = NULL;
-	char *p;
-	int count = 0;
-	int len = 0;
-	int res;
-	int i;
-
-	res = checkp(dwarf_getscopes_die(die, &scopes));
-	if (!res) {
-		list[count] = get_name_attr(die);
-
-		if (!list[count])
-			return NULL;
-
-		len += strlen(list[count]);
-		count++;
-
-		goto done;
-	}
-
-	for (i = res - 1; i >= 0 && count < MAX_FQN_SIZE; i--) {
-		if (dwarf_tag(&scopes[i]) == DW_TAG_compile_unit)
-			continue;
-
-		list[count] = get_name_attr(&scopes[i]);
-
-		if (list[count]) {
-			has_name = true;
-		} else {
-			list[count] = "<anonymous>";
-			has_name = false;
-		}
+	struct die *fqn;
 
-		len += strlen(list[count]);
-		count++;
-
-		if (i > 0) {
-			list[count++] = "::";
-			len += 2;
-		}
+	if (!cache->fqn) {
+		if (!__die_map_get((uintptr_t)die->addr, DIE_FQN, &fqn) &&
+		    *fqn->fqn)
+			cache->fqn = xstrdup(fqn->fqn);
+		else
+			cache->fqn = "";
 	}
-
-	free(scopes);
-
-	if (count == MAX_FQN_SIZE)
-		warn("increase MAX_FQN_SIZE: reached the maximum");
-
-	/* Consider the DIE unnamed if the last scope doesn't have a name */
-	if (!has_name)
-		return NULL;
-done:
-	fqn = xmalloc(len + 1);
-	*fqn = '\0';
-
-	p = fqn;
-	for (i = 0; i < count; i++)
-		p = stpcpy(p, list[i]);
-
-	return fqn;
-}
-
-static void update_fqn(struct die *cache, Dwarf_Die *die)
-{
-	if (!cache->fqn)
-		cache->fqn = get_fqn(die) ?: "";
 }
 
 static void process_fqn(struct die *cache, Dwarf_Die *die)
@@ -1148,8 +1087,81 @@ static void process_symbol_ptr(struct symbol *sym, void *arg)
 	cache_free(&state.expansion_cache);
 }
 
+static int resolve_fqns(struct state *parent, struct die *unused,
+			Dwarf_Die *die)
+{
+	struct state state;
+	struct die *cache;
+	const char *name;
+	bool use_prefix;
+	char *prefix = NULL;
+	char *fqn = "";
+	int tag;
+
+	if (!__die_map_get((uintptr_t)die->addr, DIE_FQN, &cache))
+		return 0;
+
+	tag = dwarf_tag(die);
+
+	/*
+	 * Only namespaces and structures need to pass a prefix to the next
+	 * scope.
+	 */
+	use_prefix = tag == DW_TAG_namespace || tag == DW_TAG_class_type ||
+		     tag == DW_TAG_structure_type;
+
+	state.expand.current_fqn = NULL;
+	name = get_name_attr(die);
+
+	if (parent && parent->expand.current_fqn && (use_prefix || name)) {
+		/*
+		 * The fqn for the current DIE, and if needed, a prefix for the
+		 * next scope.
+		 */
+		if (asprintf(&prefix, "%s::%s", parent->expand.current_fqn,
+			     name ? name : "<anonymous>") < 0)
+			error("asprintf failed");
+
+		if (use_prefix)
+			state.expand.current_fqn = prefix;
+
+		/*
+		 * Use fqn only if the DIE has a name. Otherwise fqn will
+		 * remain empty.
+		 */
+		if (name) {
+			fqn = prefix;
+			/* prefix will be freed by die_map. */
+			prefix = NULL;
+		}
+	} else if (name) {
+		/* No prefix from the previous scope. Use only the name. */
+		fqn = xstrdup(name);
+
+		if (use_prefix)
+			state.expand.current_fqn = fqn;
+	}
+
+	/* If the DIE has a non-empty name, cache it. */
+	if (*fqn) {
+		cache = die_map_get(die, DIE_FQN);
+		/* Move ownership of fqn to die_map. */
+		cache->fqn = fqn;
+		cache->state = DIE_FQN;
+	}
+
+	check(process_die_container(&state, NULL, die, resolve_fqns,
+				    match_all));
+
+	free(prefix);
+	return 0;
+}
+
 void process_cu(Dwarf_Die *cudie)
 {
+	check(process_die_container(NULL, NULL, cudie, resolve_fqns,
+				    match_all));
+
 	check(process_die_container(NULL, NULL, cudie, process_exported_symbols,
 				    match_all));
 
diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
index 197a1a8123c6..2feec168bf73 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.h
+++ b/scripts/gendwarfksyms/gendwarfksyms.h
@@ -139,6 +139,7 @@ void symbol_free(void);
 
 enum die_state {
 	DIE_INCOMPLETE,
+	DIE_FQN,
 	DIE_UNEXPANDED,
 	DIE_COMPLETE,
 	DIE_SYMBOL,
@@ -170,6 +171,7 @@ static inline const char *die_state_name(enum die_state state)
 {
 	switch (state) {
 	CASE_CONST_TO_STR(DIE_INCOMPLETE)
+	CASE_CONST_TO_STR(DIE_FQN)
 	CASE_CONST_TO_STR(DIE_UNEXPANDED)
 	CASE_CONST_TO_STR(DIE_COMPLETE)
 	CASE_CONST_TO_STR(DIE_SYMBOL)
diff --git a/scripts/gendwarfksyms/types.c b/scripts/gendwarfksyms/types.c
index 6c03265f4d10..6f37289104ff 100644
--- a/scripts/gendwarfksyms/types.c
+++ b/scripts/gendwarfksyms/types.c
@@ -248,7 +248,7 @@ static char *get_type_name(struct die *cache)
 		warn("found incomplete cache entry: %p", cache);
 		return NULL;
 	}
-	if (cache->state == DIE_SYMBOL)
+	if (cache->state == DIE_SYMBOL || cache->state == DIE_FQN)
 		return NULL;
 	if (!cache->fqn || !*cache->fqn)
 		return NULL;

base-commit: 2014c95afecee3e76ca4a56956a936e23283f05b
-- 
2.48.1.362.g079036d154-goog


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ