lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20260206191121.3602-7-david.laight.linux@gmail.com>
Date: Fri,  6 Feb 2026 19:11:16 +0000
From: david.laight.linux@...il.com
To: Willy Tarreau <w@....eu>,
	Thomas Weißschuh <linux@...ssschuh.net>,
	linux-kernel@...r.kernel.org,
	Cheng Li <lechain@...il.com>
Cc: David Laight <david.laight.linux@...il.com>
Subject: [PATCH v2 next 06/11] tools/nolibc/printf: Use bit-masks to hold requested flag, length and conversion chars

From: David Laight <david.laight.linux@...il.com>

Use flags bits (1u << (ch & 31)) for the flags, length modifiers, and
conversion specifiers.
This makes it easy to test for multiple values at once.

Detect the conversion flags " #+-0" although they are currently all ignored.

Add support for length modifiers 't' and 'z' (both long) and 'q' and 'L'
(both long long).

Add support for "%i" (the same as %d").

Unconditionally generate the signed values (for %d) to remove a second
set of checks for the size.

Signed-off-by: David Laight <david.laight.linux@...il.com>
---

Changes for v2:
- Use #defines to make the code a lot more readable.
- Include the changes from the old patch 10 that used masks for the
  conversion specifiers.
- Detect all the valid flag characters even though they are not implemented.
- Support for left justifying field is moved to patch 7.

 tools/include/nolibc/stdio.h | 151 ++++++++++++++++++++++++-----------
 1 file changed, 103 insertions(+), 48 deletions(-)

diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h
index bb54f488c228..b14cf8224403 100644
--- a/tools/include/nolibc/stdio.h
+++ b/tools/include/nolibc/stdio.h
@@ -240,19 +240,44 @@ char *fgets(char *s, int size, FILE *stream)
 }
 
 
-/* minimal printf(). It supports the following formats:
- *  - %[l*]{d,u,c,x,p}
- *  - %s
- *  - unknown modifiers are ignored.
+/* simple printf(). It supports the following formats:
+ *  - %[-][width][{l,t,z,ll,L,j,q}]{d,u,c,x,p,s,m,%}
+ *  - %%
+ *  - invalid formats are copied to the output buffer
  */
+
+/* This code uses 'flag' variables that are indexed by the low 6 bits
+ * of characters to optimise checks for multiple characters.
+ *
+ * _NOLIBC_PF_FLAGS_CONTAIN(flags, 'a', 'b'. ...)
+ * returns non-zero if the bit for any of the specified characters is set.
+ *
+ * _NOLIBC_PF_CHAR_IS_ONE_OF(ch, 'a', 'b'. ...)
+ * returns the flag bit for ch if it is one of the specified characters.
+ * All the characters must be in the same 32 character block (non-alphabetic,
+ * upper case, or lower case) of the ASCII character set.)
+ */
+#define _NOLIBC_PF_FLAG(ch) (1u << ((ch) & 0x1f))
+#define _NOLIBC_PF_FLAG_NZ(ch) ((ch) ? _NOLIBC_PF_FLAG(ch) : 0)
+#define _NOLIBC_PF_FLAG8(cmp_1, cmp_2, cmp_3, cmp_4, cmp_5, cmp_6, cmp_7, cmp_8, ...) \
+	(_NOLIBC_PF_FLAG_NZ(cmp_1) | _NOLIBC_PF_FLAG_NZ(cmp_2) | \
+	 _NOLIBC_PF_FLAG_NZ(cmp_3) | _NOLIBC_PF_FLAG_NZ(cmp_4) | \
+	 _NOLIBC_PF_FLAG_NZ(cmp_5) | _NOLIBC_PF_FLAG_NZ(cmp_6) | \
+	 _NOLIBC_PF_FLAG_NZ(cmp_7) | _NOLIBC_PF_FLAG_NZ(cmp_8))
+#define _NOLIBC_PF_FLAGS_CONTAIN(flags, ...) \
+	((flags) & _NOLIBC_PF_FLAG8(__VA_ARGS__, 0, 0, 0, 0, 0, 0, 0))
+#define _NOLIBC_PF_CHAR_IS_ONE_OF(ch, cmp_1, ...) \
+	(ch < (cmp_1 & ~0x1f) || ch > (cmp_1 | 0x1f) ? 0 : \
+		_NOLIBC_PF_FLAGS_CONTAIN(_NOLIBC_PF_FLAG(ch), cmp_1, __VA_ARGS__))
+
 typedef int (*__nolibc_printf_cb)(void *state, const char *buf, size_t size);
 
 static __attribute__((unused, format(printf, 3, 0)))
 int __nolibc_printf(__nolibc_printf_cb cb, void *state, const char *fmt, va_list args)
 {
-	char lpref, ch;
-	unsigned long long v;
+	char ch;
 	unsigned int written, width;
+	unsigned int flags, ch_flag;
 	size_t len;
 	char tmpbuf[21];
 	const char *outstr;
@@ -265,6 +290,7 @@ int __nolibc_printf(__nolibc_printf_cb cb, void *state, const char *fmt, va_list
 			break;
 
 		width = 0;
+		flags = 0;
 		if (ch != '%') {
 			while (*fmt && *fmt != '%')
 				fmt++;
@@ -274,6 +300,14 @@ int __nolibc_printf(__nolibc_printf_cb cb, void *state, const char *fmt, va_list
 
 			ch = *fmt++;
 
+			/* Conversion flag characters */
+			for (;; ch = *fmt++) {
+				ch_flag = _NOLIBC_PF_CHAR_IS_ONE_OF(ch, ' ', '#', '+', '-', '0');
+				if (!ch_flag)
+					break;
+				flags |= ch_flag;
+			}
+
 			/* width */
 			while (ch >= '0' && ch <= '9') {
 				width *= 10;
@@ -282,62 +316,77 @@ int __nolibc_printf(__nolibc_printf_cb cb, void *state, const char *fmt, va_list
 				ch = *fmt++;
 			}
 
-			/* Length modifiers */
-			if (ch == 'l') {
-				lpref = 1;
-				ch = *fmt++;
-				if (ch == 'l') {
-					lpref = 2;
-					ch = *fmt++;
+			/* Length modifier.
+			 * They miss the conversion flags characters " #+-0" so can go into flags.
+			 * Change both L and ll to q.
+			 */
+			if (ch == 'L')
+				ch = 'q';
+			ch_flag = _NOLIBC_PF_CHAR_IS_ONE_OF(ch, 'l', 't', 'z', 'j', 'q');
+			if (ch_flag != 0) {
+				if (ch == 'l' && fmt[0] == 'l') {
+					fmt++;
+					ch_flag = _NOLIBC_PF_FLAG('q');
 				}
-			} else if (ch == 'j') {
-				/* intmax_t is long long */
-				lpref = 2;
+				flags |= ch_flag;
 				ch = *fmt++;
-			} else {
-				lpref = 0;
 			}
 
-			if (ch == 'c' || ch == 'd' || ch == 'u' || ch == 'x' || ch == 'p') {
+			/* Conversion specifiers. */
+
+			/* Numeric conversion specifiers. */
+			ch_flag = _NOLIBC_PF_CHAR_IS_ONE_OF(ch, 'c', 'd', 'i', 'u', 'x', 'p');
+			if (ch_flag != 0) {
+				unsigned long long v;
+				long long signed_v;
 				char *out = tmpbuf;
 
-				if (ch == 'p')
+				/* 'long' is needed for pointer/string conversions and ltz lengths.
+				 * A single test can be used provided 'p' (the same bit as '0')
+				 * is masked from flags.
+				 */
+				if (_NOLIBC_PF_FLAGS_CONTAIN(ch_flag | (flags & ~_NOLIBC_PF_FLAG('p')),
+							     'p', 'l', 't', 'z')) {
 					v = va_arg(args, unsigned long);
-				else if (lpref) {
-					if (lpref > 1)
-						v = va_arg(args, unsigned long long);
-					else
-						v = va_arg(args, unsigned long);
-				} else
+					signed_v = (long)v;
+				} else if (_NOLIBC_PF_FLAGS_CONTAIN(flags, 'j', 'q')) {
+					v = va_arg(args, unsigned long long);
+					signed_v = v;
+				} else {
 					v = va_arg(args, unsigned int);
+					signed_v = (int)v;
+				}
 
-				if (ch == 'd') {
-					/* sign-extend the value */
-					if (lpref == 0)
-						v = (long long)(int)v;
-					else if (lpref == 1)
-						v = (long long)(long)v;
+				if (_NOLIBC_PF_FLAGS_CONTAIN(ch_flag, 'c')) {
+					/* "%c" - single character. */
+					tmpbuf[0] = v;
+					len = 1;
+					outstr = tmpbuf;
+					goto do_output;
 				}
 
-				switch (ch) {
-				case 'c':
-					out[0] = v;
-					out[1] = 0;
-					break;
-				case 'd':
-					i64toa_r(v, out);
-					break;
-				case 'u':
+				if (_NOLIBC_PF_FLAGS_CONTAIN(ch_flag, 'd', 'i')) {
+					/* "%d" and "%i" - signed decimal numbers. */
+					if (signed_v < 0) {
+						*out++ = '-';
+						v = -(signed_v + 1);
+						v++;
+					}
+				}
+
+				/* Convert the number to ascii in the required base. */
+				if (_NOLIBC_PF_FLAGS_CONTAIN(ch_flag, 'd', 'i', 'u')) {
+					/* Base 10 */
 					u64toa_r(v, out);
-					break;
-				case 'p':
-					*(out++) = '0';
-					*(out++) = 'x';
-					__nolibc_fallthrough;
-				default: /* 'x' and 'p' above */
+				} else {
+					/* Base 16 */
+					if (_NOLIBC_PF_FLAGS_CONTAIN(ch_flag, 'p')) {
+						*(out++) = '0';
+						*(out++) = 'x';
+					}
 					u64toh_r(v, out);
-					break;
 				}
+
 				outstr = tmpbuf;
 			}
 			else if (ch == 's') {
@@ -366,8 +415,14 @@ int __nolibc_printf(__nolibc_printf_cb cb, void *state, const char *fmt, va_list
 			len = strlen(outstr);
 		}
 
+do_output:
 		written += len;
 
+		/* An OPTIMIZER_HIDE_VAR() seems to stop gcc back-merging this
+		 * code into one of the conditionals above.
+		 */
+		__asm__ volatile("" : "=r"(len) : "0"(len));
+
 		while (width > len) {
 			unsigned int pad_len = ((width - len - 1) & 15) + 1;
 			width -= pad_len;
-- 
2.39.5


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ