lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-Id: <20260203151316.24506-1-david.laight.linux@gmail.com>
Date: Tue,  3 Feb 2026 15:13:15 +0000
From: david.laight.linux@...il.com
To: Willy Tarreau <w@....eu>,
	Thomas Weißschuh <linux@...ssschuh.net>,
	linux-kernel@...r.kernel.org,
	Cheng Li <lechain@...il.com>
Cc: David Laight <david.laight.linux@...il.com>
Subject: [PATCH next] tools/nolibc: Optimise and common up number to ascii functions

From: David Laight <david.laight.linux@...il.com>

Implement u[64]to[ah]_r() using a common function that uses multiply
by reciprocal to generate the least significant digit first and then
reverses the string.

On 32bit this is five multiplies (with 64bit product) for each output
digit. I think the old utoa_r() always did 36 multiplies and a lot
of subtracts - so this is likely faster even for 32bit values.
Definitely better for 64bit values (especially small ones).

Clearly shifts are faster for base 16, but reversing the output buffer
makes a big difference.

Sharing the code reduces the footprint (unless gcc decides to constant
fold the functions).
Definitely helps vfprintf() where the constants get loaded and a single
call is down.
Also makes it cheap to add octal support to vfprintf for completeness.

Signed-off-by: David Laight <david.laight.linux@...il.com>
---
 tools/include/nolibc/stdlib.h | 145 ++++++++++++++--------------------
 1 file changed, 58 insertions(+), 87 deletions(-)

diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h
index f184e108ed0a..8a2e86f60ae9 100644
--- a/tools/include/nolibc/stdlib.h
+++ b/tools/include/nolibc/stdlib.h
@@ -188,36 +188,67 @@ void *realloc(void *old_ptr, size_t new_size)
 	return ret;
 }
 
-/* Converts the unsigned long integer <in> to its hex representation into
+/* Converts the unsigned 64bit integer <in> to base <base> ascii into
  * buffer <buffer>, which must be long enough to store the number and the
- * trailing zero (17 bytes for "ffffffffffffffff" or 9 for "ffffffff"). The
- * buffer is filled from the first byte, and the number of characters emitted
- * (not counting the trailing zero) is returned. The function is constructed
- * in a way to optimize the code size and avoid any divide that could add a
- * dependency on large external functions.
+ * trailing zero. The buffer is filled from the first byte, and the number
+ * of characters emitted (not counting the trailing zero) is returned.
+ * The function uses 'multiply be reciprocal' for the divisions and
+ * requires the caller pass the correct compile-time constant.
  */
-static __attribute__((unused))
-int utoh_r(unsigned long in, char *buffer)
+#define __U64TOA_RECIP(base) ((base) & 1 ? ~0ull / (base) : (1ull << 63) / ((base) / 2))
+static __attribute__((unused, noinline))
+int __u64toa_base(uint64_t in, char *buffer, unsigned int base, uint64_t recip)
 {
-	signed char pos = (~0UL > 0xfffffffful) ? 60 : 28;
-	int digits = 0;
-	int dig;
+	unsigned int digits = 0;
+	unsigned int dig;
+	uint64_t q;
+	char *p;
 
+	/* Generate least significant digit first */
 	do {
-		dig = in >> pos;
-		in -= (uint64_t)dig << pos;
-		pos -= 4;
-		if (dig || digits || pos < 0) {
-			if (dig > 9)
-				dig += 'a' - '0' - 10;
-			buffer[digits++] = '0' + dig;
+#if defined(__SIZEOF_INT128__) && !defined(__mips__)
+		q = ((unsigned __int128)in * recip) >> 64;
+#else
+		uint64_t p = (uint32_t)in * (recip >> 32);
+		q = (in >> 32) * (recip >> 32) + (p >> 32);
+		p = (uint32_t)p + (in >> 32) * (uint32_t)recip;
+		q += p >> 32;
+#endif
+		dig = in - q * base;
+		/* Correct for any rounding errors (eg from low*low multiply) */
+		while (dig >= base) {
+			dig -= base;
+			q++;
 		}
-	} while (pos >= 0);
+		if (dig > 9)
+			dig += 'a' - '0' - 10;
+		buffer[digits++] = '0' + dig;
+	} while ((in = q));
 
 	buffer[digits] = 0;
+
+	/* Order reverse to result */
+	for (p = buffer + digits - 1; p > buffer; buffer++, p--) {
+		dig = *buffer;
+		*buffer = *p;
+		*p = dig;
+	}
+
 	return digits;
 }
 
+/* Converts the unsigned long integer <in> to its hex representation into
+ * buffer <buffer>, which must be long enough to store the number and the
+ * trailing zero (17 bytes for "ffffffffffffffff" or 9 for "ffffffff"). The
+ * buffer is filled from the first byte, and the number of characters emitted
+ * (not counting the trailing zero) is returned.
+ */
+static __inline__ __attribute__((unused))
+int utoh_r(unsigned long in, char *buffer)
+{
+	return __u64toa_base(in, buffer, 16, __U64TOA_RECIP(16));
+}
+
 /* converts unsigned long <in> to an hex string using the static itoa_buffer
  * and returns the pointer to that string.
  */
@@ -233,30 +264,11 @@ char *utoh(unsigned long in)
  * trailing zero (21 bytes for 18446744073709551615 in 64-bit, 11 for
  * 4294967295 in 32-bit). The buffer is filled from the first byte, and the
  * number of characters emitted (not counting the trailing zero) is returned.
- * The function is constructed in a way to optimize the code size and avoid
- * any divide that could add a dependency on large external functions.
  */
-static __attribute__((unused))
+static __inline__ __attribute__((unused))
 int utoa_r(unsigned long in, char *buffer)
 {
-	unsigned long lim;
-	int digits = 0;
-	int pos = (~0UL > 0xfffffffful) ? 19 : 9;
-	int dig;
-
-	do {
-		for (dig = 0, lim = 1; dig < pos; dig++)
-			lim *= 10;
-
-		if (digits || in >= lim || !pos) {
-			for (dig = 0; in >= lim; dig++)
-				in -= lim;
-			buffer[digits++] = '0' + dig;
-		}
-	} while (pos--);
-
-	buffer[digits] = 0;
-	return digits;
+	return __u64toa_base(in, buffer, 10, __U64TOA_RECIP(10));
 }
 
 /* Converts the signed long integer <in> to its string representation into
@@ -324,34 +336,12 @@ char *utoa(unsigned long in)
  * buffer <buffer>, which must be long enough to store the number and the
  * trailing zero (17 bytes for "ffffffffffffffff"). The buffer is filled from
  * the first byte, and the number of characters emitted (not counting the
- * trailing zero) is returned. The function is constructed in a way to optimize
- * the code size and avoid any divide that could add a dependency on large
- * external functions.
+ * trailing zero) is returned.
  */
-static __attribute__((unused))
+static __inline__ __attribute__((unused))
 int u64toh_r(uint64_t in, char *buffer)
 {
-	signed char pos = 60;
-	int digits = 0;
-	int dig;
-
-	do {
-		if (sizeof(long) >= 8) {
-			dig = (in >> pos) & 0xF;
-		} else {
-			/* 32-bit platforms: avoid a 64-bit shift */
-			uint32_t d = (pos >= 32) ? (in >> 32) : in;
-			dig = (d >> (pos & 31)) & 0xF;
-		}
-		if (dig > 9)
-			dig += 'a' - '0' - 10;
-		pos -= 4;
-		if (dig || digits || pos < 0)
-			buffer[digits++] = '0' + dig;
-	} while (pos >= 0);
-
-	buffer[digits] = 0;
-	return digits;
+	return __u64toa_base(in, buffer, 16, __U64TOA_RECIP(16));
 }
 
 /* converts uint64_t <in> to an hex string using the static itoa_buffer and
@@ -368,31 +358,12 @@ char *u64toh(uint64_t in)
  * buffer <buffer>, which must be long enough to store the number and the
  * trailing zero (21 bytes for 18446744073709551615). The buffer is filled from
  * the first byte, and the number of characters emitted (not counting the
- * trailing zero) is returned. The function is constructed in a way to optimize
- * the code size and avoid any divide that could add a dependency on large
- * external functions.
+ * trailing zero) is returned.
  */
-static __attribute__((unused))
+static __inline__ __attribute__((unused))
 int u64toa_r(uint64_t in, char *buffer)
 {
-	unsigned long long lim;
-	int digits = 0;
-	int pos = 19; /* start with the highest possible digit */
-	int dig;
-
-	do {
-		for (dig = 0, lim = 1; dig < pos; dig++)
-			lim *= 10;
-
-		if (digits || in >= lim || !pos) {
-			for (dig = 0; in >= lim; dig++)
-				in -= lim;
-			buffer[digits++] = '0' + dig;
-		}
-	} while (pos--);
-
-	buffer[digits] = 0;
-	return digits;
+	return __u64toa_base(in, buffer, 10, __U64TOA_RECIP(10));
 }
 
 /* Converts the signed 64-bit integer <in> to its string representation into
-- 
2.39.5


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ