[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aOTQl2SD5W8zBuNl@wu-Pro-E500-G6-WS720T>
Date: Tue, 7 Oct 2025 16:34:31 +0800
From: Guan-Chun Wu <409411716@....tku.edu.tw>
To: David Laight <david.laight.linux@...il.com>
Cc: Caleb Sander Mateos <csander@...estorage.com>,
akpm@...ux-foundation.org, axboe@...nel.dk,
ceph-devel@...r.kernel.org, ebiggers@...nel.org, hch@....de,
home7438072@...il.com, idryomov@...il.com, jaegeuk@...nel.org,
kbusch@...nel.org, linux-fscrypt@...r.kernel.org,
linux-kernel@...r.kernel.org, linux-nvme@...ts.infradead.org,
sagi@...mberg.me, tytso@....edu, visitorckw@...il.com,
xiubli@...hat.com
Subject: Re: [PATCH v3 3/6] lib/base64: rework encode/decode for speed and
stricter validation
On Mon, Oct 06, 2025 at 09:52:12PM +0100, David Laight wrote:
> On Wed, 1 Oct 2025 17:39:32 +0800
> Guan-Chun Wu <409411716@....tku.edu.tw> wrote:
>
> > On Tue, Sep 30, 2025 at 05:11:12PM -0700, Caleb Sander Mateos wrote:
> > > On Fri, Sep 26, 2025 at 12:01 AM Guan-Chun Wu <409411716@....tku.edu.tw> wrote:
> > > >
> > > > The old base64 implementation relied on a bit-accumulator loop, which was
> > > > slow for larger inputs and too permissive in validation. It would accept
> > > > extra '=', missing '=', or even '=' appearing in the middle of the input,
> > > > allowing malformed strings to pass. This patch reworks the internals to
> > > > improve performance and enforce stricter validation.
> > > >
> > > > Changes:
> > > > - Encoder:
> > > > * Process input in 3-byte blocks, mapping 24 bits into four 6-bit
> > > > symbols, avoiding bit-by-bit shifting and reducing loop iterations.
> > > > * Handle the final 1-2 leftover bytes explicitly and emit '=' only when
> > > > requested.
> > > > - Decoder:
> > > > * Based on the reverse lookup tables from the previous patch, decode
> > > > input in 4-character groups.
> > > > * Each group is looked up directly, converted into numeric values, and
> > > > combined into 3 output bytes.
> > > > * Explicitly handle padded and unpadded forms:
> > > > - With padding: input length must be a multiple of 4, and '=' is
> > > > allowed only in the last two positions. Reject stray or early '='.
> > > > - Without padding: validate tail lengths (2 or 3 chars) and require
> > > > unused low bits to be zero.
> > > > * Removed the bit-accumulator style loop to reduce loop iterations.
> > > >
> > > > Performance (x86_64, Intel Core i7-10700 @ 2.90GHz, avg over 1000 runs,
> > > > KUnit):
> > > >
> > > > Encode:
> > > > 64B ~90ns -> ~32ns (~2.8x)
> > > > 1KB ~1332ns -> ~510ns (~2.6x)
> > > >
> > > > Decode:
> > > > 64B ~1530ns -> ~64ns (~23.9x)
> > > > 1KB ~27726ns -> ~982ns (~28.3x)
> > > >
> > > > Co-developed-by: Kuan-Wei Chiu <visitorckw@...il.com>
> > > > Signed-off-by: Kuan-Wei Chiu <visitorckw@...il.com>
> > > > Co-developed-by: Yu-Sheng Huang <home7438072@...il.com>
> > > > Signed-off-by: Yu-Sheng Huang <home7438072@...il.com>
> > > > Signed-off-by: Guan-Chun Wu <409411716@....tku.edu.tw>
> > > > ---
> > > > lib/base64.c | 150 +++++++++++++++++++++++++++++++++++++--------------
> > > > 1 file changed, 110 insertions(+), 40 deletions(-)
> > > >
> > > > diff --git a/lib/base64.c b/lib/base64.c
> > > > index b20fdf168..fd1db4611 100644
> > > > --- a/lib/base64.c
> > > > +++ b/lib/base64.c
> > > > @@ -93,26 +93,43 @@ static const s8 base64_rev_tables[][256] = {
> > > > int base64_encode(const u8 *src, int srclen, char *dst, bool padding, enum base64_variant variant)
> > > > {
> > > > u32 ac = 0;
> > > > - int bits = 0;
> > > > - int i;
> > > > char *cp = dst;
> > > > const char *base64_table = base64_tables[variant];
> > > >
> > > > - for (i = 0; i < srclen; i++) {
> > > > - ac = (ac << 8) | src[i];
> > > > - bits += 8;
> > > > - do {
> > > > - bits -= 6;
> > > > - *cp++ = base64_table[(ac >> bits) & 0x3f];
> > > > - } while (bits >= 6);
> > > > - }
> > > > - if (bits) {
> > > > - *cp++ = base64_table[(ac << (6 - bits)) & 0x3f];
> > > > - bits -= 6;
> > > > + while (srclen >= 3) {
> > > > + ac = ((u32)src[0] << 16) |
> > > > + ((u32)src[1] << 8) |
> > > > + (u32)src[2];
> > > > +
> > > > + *cp++ = base64_table[ac >> 18];
> > > > + *cp++ = base64_table[(ac >> 12) & 0x3f];
> > > > + *cp++ = base64_table[(ac >> 6) & 0x3f];
> > > > + *cp++ = base64_table[ac & 0x3f];
> > > > +
> > > > + src += 3;
> > > > + srclen -= 3;
> > > > }
> > > > - while (bits < 0) {
> > > > - *cp++ = '=';
> > > > - bits += 2;
> > > > +
> > > > + switch (srclen) {
> > > > + case 2:
> > > > + ac = ((u32)src[0] << 16) |
> > > > + ((u32)src[1] << 8);
> > > > +
> > > > + *cp++ = base64_table[ac >> 18];
> > > > + *cp++ = base64_table[(ac >> 12) & 0x3f];
> > > > + *cp++ = base64_table[(ac >> 6) & 0x3f];
> > > > + if (padding)
> > > > + *cp++ = '=';
> > > > + break;
> > > > + case 1:
> > > > + ac = ((u32)src[0] << 16);
> > > > + *cp++ = base64_table[ac >> 18];
> > > > + *cp++ = base64_table[(ac >> 12) & 0x3f];
> > > > + if (padding) {
> > > > + *cp++ = '=';
> > > > + *cp++ = '=';
> > > > + }
> > > > + break;
> > > > }
> > > > return cp - dst;
> > > > }
> > > > @@ -128,39 +145,92 @@ EXPORT_SYMBOL_GPL(base64_encode);
> > > > *
> > > > * Decodes a string using the selected Base64 variant.
> > > > *
> > > > - * This implementation hasn't been optimized for performance.
> > > > - *
> > > > * Return: the length of the resulting decoded binary data in bytes,
> > > > * or -1 if the string isn't a valid Base64 string.
> > > > */
> > > > int base64_decode(const char *src, int srclen, u8 *dst, bool padding, enum base64_variant variant)
> > > > {
> > > > - u32 ac = 0;
> > > > - int bits = 0;
> > > > - int i;
> > > > u8 *bp = dst;
> > > > - s8 ch;
> > > > -
> > > > - for (i = 0; i < srclen; i++) {
> > > > - if (src[i] == '=') {
> > > > - ac = (ac << 6);
> > > > - bits += 6;
> > > > - if (bits >= 8)
> > > > - bits -= 8;
> > > > - continue;
> > > > - }
> > > > - ch = base64_rev_tables[variant][(u8)src[i]];
> > > > - if (ch == -1)
> > > > + s8 input1, input2, input3, input4;
> > > > + u32 val;
> > > > +
> > > > + if (srclen == 0)
> > > > + return 0;
> > >
> > > Doesn't look like this special case is necessary; all the if and while
> > > conditions below are false if srclen == 0, so the function will just
> > > end up returning 0 in that case anyways. It would be nice to avoid
> > > this branch, especially as it seems like an uncommon case.
> > >
> >
> > You're right. I'll remove it. Thanks.
> >
> > > > +
> > > > + /* Validate the input length for padding */
> > > > + if (unlikely(padding && (srclen & 0x03) != 0))
> > > > + return -1;
> > > > +
> > > > + while (srclen >= 4) {
> > > > + /* Decode the next 4 characters */
> > > > + input1 = base64_rev_tables[variant][(u8)src[0]];
> > > > + input2 = base64_rev_tables[variant][(u8)src[1]];
> > > > + input3 = base64_rev_tables[variant][(u8)src[2]];
> > > > + input4 = base64_rev_tables[variant][(u8)src[3]];
> > > > +
> > > > + /* Return error if any Base64 character is invalid */
> > > > + if (unlikely(input1 < 0 || input2 < 0 || (!padding && (input3 < 0 || input4 < 0))))
> > > > + return -1;
> > > > +
> > > > + /* Handle padding */
> > > > + if (unlikely(padding && ((input3 < 0 && input4 >= 0) ||
> > > > + (input3 < 0 && src[2] != '=') ||
> > > > + (input4 < 0 && src[3] != '=') ||
> > > > + (srclen > 4 && (input3 < 0 || input4 < 0)))))
> > >
> > > Would be preferable to check and strip the padding (i.e. decrease
> > > srclen) before this main loop. That way we could avoid several
> > > branches in this hot loop that are only necessary to handle the
> > > padding chars.
> > >
> >
> > You're right. As long as we check and strip the padding first, the
> > behavior with or without padding can be the same, and it could also
> > reduce some unnecessary branches. I'll make the change.
>
> As I said earlier.
> Calculate 'val' first using signed arithmetic.
> If it is non-negative there are three bytes to write.
> If negative then check for src[2] and src[3] being '=' (etc) before erroring out.
>
> That way there is only one check in the normal path.
>
> David
>
Thanks for the feedback. We’ll update the implementation accordingly.
Best regards,
Guan-Chun
> >
> > Best regards,
> > Guan-Chun
> >
> > > > + return -1;
> > > > + val = ((u32)input1 << 18) |
> > > > + ((u32)input2 << 12) |
> > > > + ((u32)((input3 < 0) ? 0 : input3) << 6) |
> > > > + (u32)((input4 < 0) ? 0 : input4);
> > > > +
> > > > + *bp++ = (u8)(val >> 16);
> > > > +
> > > > + if (input3 >= 0)
> > > > + *bp++ = (u8)(val >> 8);
> > > > + if (input4 >= 0)
> > > > + *bp++ = (u8)val;
> > > > +
> > > > + src += 4;
> > > > + srclen -= 4;
> > > > + }
> > > > +
> > > > + /* Handle leftover characters when padding is not used */
> > > > + if (!padding && srclen > 0) {
> > > > + switch (srclen) {
> > > > + case 2:
> > > > + input1 = base64_rev_tables[variant][(u8)src[0]];
> > > > + input2 = base64_rev_tables[variant][(u8)src[1]];
> > > > + if (unlikely(input1 < 0 || input2 < 0))
> > > > + return -1;
> > > > +
> > > > + val = ((u32)input1 << 6) | (u32)input2; /* 12 bits */
> > > > + if (unlikely(val & 0x0F))
> > > > + return -1; /* low 4 bits must be zero */
> > > > +
> > > > + *bp++ = (u8)(val >> 4);
> > > > + break;
> > > > + case 3:
> > > > + input1 = base64_rev_tables[variant][(u8)src[0]];
> > > > + input2 = base64_rev_tables[variant][(u8)src[1]];
> > > > + input3 = base64_rev_tables[variant][(u8)src[2]];
> > > > + if (unlikely(input1 < 0 || input2 < 0 || input3 < 0))
> > > > + return -1;
> > > > +
> > > > + val = ((u32)input1 << 12) |
> > > > + ((u32)input2 << 6) |
> > > > + (u32)input3; /* 18 bits */
> > > > +
> > > > + if (unlikely(val & 0x03))
> > > > + return -1; /* low 2 bits must be zero */
> > > > +
> > > > + *bp++ = (u8)(val >> 10);
> > > > + *bp++ = (u8)((val >> 2) & 0xFF);
> > >
> > > "& 0xFF" is redundant with the cast to u8.
> > >
> > > Best,
> > > Caleb
> > >
> > > > + break;
> > > > + default:
> > > > return -1;
> > > > - ac = (ac << 6) | ch;
> > > > - bits += 6;
> > > > - if (bits >= 8) {
> > > > - bits -= 8;
> > > > - *bp++ = (u8)(ac >> bits);
> > > > }
> > > > }
> > > > - if (ac & ((1 << bits) - 1))
> > > > - return -1;
> > > > +
> > > > return bp - dst;
> > > > }
> > > > EXPORT_SYMBOL_GPL(base64_decode);
> > > > --
> > > > 2.34.1
> > > >
> > > >
> >
>
Powered by blists - more mailing lists