lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date:	Wed, 7 Sep 2011 14:44:02 -0500
From:	"Bob Pearson" <rpearson@...temfabricworks.com>
To:	"'Joakim Tjernlund'" <joakim.tjernlund@...nsmode.se>
Cc:	<akpm@...ux-foundation.org>, <fzago@...temfabricworks.com>,
	"'George Spelvin'" <linux@...izon.com>,
	<linux-kernel@...r.kernel.org>
Subject: RE: [PATCH v6 08/10] crc32-add-slicing-by-8.diff



> -----Original Message-----
> From: Joakim Tjernlund [mailto:joakim.tjernlund@...nsmode.se]
> Sent: Wednesday, September 07, 2011 2:31 AM
> To: Bob Pearson
> Cc: akpm@...ux-foundation.org; fzago@...temfabricworks.com; George
> Spelvin; linux-kernel@...r.kernel.org
> Subject: Re: [PATCH v6 08/10] crc32-add-slicing-by-8.diff
> 
> Bob Pearson <rpearson@...temfabricworks.com> wrote on 2011/09/01
> 00:30:32:
> >
> > add slicing-by-8 algorithm to the existing
> > slicing-by-4 algorithm. This consists of:
> >    - extend largest BITS size from 32 to 64
> >    - extend tables from tab[4][256] to up to tab[8][256]
> >    - Add code for inner loop.
> >
> > Signed-off-by: Bob Pearson <rpearson@...temfabricworks.com>
> >
> > ---
> >  lib/crc32.c          |   40 ++++++++++++++++++++++++++++------------
> >  lib/crc32defs.h      |   29 +++++++++++++++++++++--------
> >  lib/gen_crc32table.c |   43 +++++++++++++++++++++++++++----------------
> >  3 files changed, 76 insertions(+), 36 deletions(-)
> >
> > Index: for-next/lib/crc32.c
> >
> ==========================================================
> =========
> > --- for-next.orig/lib/crc32.c
> > +++ for-next/lib/crc32.c
> > @@ -47,25 +47,28 @@ MODULE_LICENSE("GPL");
> >
> >  #if CRC_LE_BITS > 8 || CRC_BE_BITS > 8
> >
> > +/* implements slicing-by-4 or slicing-by-8 algorithm */
> >  static inline u32
> >  crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32
> (*tab)[256])
> >  {
> >  # ifdef __LITTLE_ENDIAN
> >  #  define DO_CRC(x) (crc = t0[(crc ^ (x)) & 255] ^ (crc >> 8))
> > -#  define DO_CRC4 crc = t3[(crc) & 255] ^ \
> > -         t2[(crc >> 8) & 255] ^ \
> > -         t1[(crc >> 16) & 255] ^ \
> > -         t0[(crc >> 24) & 255]
> > +#  define DO_CRC4 (t3[(q) & 255] ^ t2[(q >> 8) & 255] ^ \
> > +         t1[(q >> 16) & 255] ^ t0[(q >> 24) & 255])
> > +#  define DO_CRC8 (t7[(q) & 255] ^ t6[(q >> 8) & 255] ^ \
> > +         t5[(q >> 16) & 255] ^ t4[(q >> 24) & 255])
> >  # else
> >  #  define DO_CRC(x) (crc = t0[((crc >> 24) ^ (x)) & 255] ^ (crc << 8))
> > -#  define DO_CRC4 crc = t0[(crc) & 255] ^ \
> > -         t1[(crc >> 8) & 255] ^ \
> > -         t2[(crc >> 16) & 255] ^ \
> > -         t3[(crc >> 24) & 255]
> > +#  define DO_CRC4 (t0[(q) & 255] ^ t1[(q >> 8) & 255] ^ \
> > +         t2[(q >> 16) & 255] ^ t3[(q >> 24) & 255])
> > +#  define DO_CRC8 (t4[(q) & 255] ^ t5[(q >> 8) & 255] ^ \
> > +         t6[(q >> 16) & 255] ^ t7[(q >> 24) & 255])
> 
> Don't like the new DO_CRC8 macro. You could get by with my earlier
> suggestion:
> #  define DO_CRC4(crc, x0, x1, x2, x3) \
> 		x3[(crc) & 255] ^		\
> 		x2[(crc >> 8) & 255] ^	\
> 		x1[(crc >> 16) & 255] ^ \
> 		x0[(crc >> 24) & 255]
> 
> Then the code becomes something like
> if (bits == 64) {
> 		crc = DO_CRC4(crc, t4, t5, t6, t7);
> 		++b;
> 		crc ^= DO_CRC4(*b, t0, t1, t2, t3);
> } else
> 		crc = DO_CRC4(crc, t0, t1, t2, t3);

OK by me.

> 
> >  # endif
> >     const u32 *b;
> > -   size_t    rem_len;
> > +   size_t rem_len;
> >     const u32 *t0 = tab[0], *t1 = tab[1], *t2 = tab[2], *t3 = tab[3];
> > +   const u32 *t4 = tab[4], *t5 = tab[5], *t6 = tab[6], *t7 = tab[7];
> 
> t4 to t7 is only used in 64 bit mode.

Yes but with the CEC_XX_BITS->bits change this point is moot.

> 
> BTW, it the 64 CRC bits on 32 bits BE arch bug fixed?

I have a 64 bit sparc machine and a bunch of x86 variants. As far as I am
able to tell these are working for  all the different algorithms.
If you have a 32 bit PPC machine please try to see if this is still a
problem.

> 
> > +   u32 q;
> >
> >     /* Align it */
> >     if (unlikely((long)buf & 3 && len)) {
> > @@ -73,13 +76,25 @@ crc32_body(u32 crc, unsigned char const
> >           DO_CRC(*buf++);
> >        } while ((--len) && ((long)buf)&3);
> >     }
> > +
> > +# if CRC_LE_BITS == 32
> >     rem_len = len & 3;
> > -   /* load data 32 bits wide, xor data 32 bits wide. */
> >     len = len >> 2;
> > +# else
> > +   rem_len = len & 7;
> > +   len = len >> 3;
> 
> I still fail to see why this is needed. You still do 32 bit loads so this
> only makes the code uglier, harder to maintain and makes small unaligned
crc
> bufs
> slower.

You suggested and I changed the initial alignment code to pick the first
available 4 byte boundary.
After that, the inner loop consumes respectively 4 or 8 bytes of data per
iteration for the 32/64 version of the algorithm. So the computation of len
and rem_len *must* be made differently in the two cases.

> 
> ....
> 
> > Index: for-next/lib/gen_crc32table.c
> >
> ==========================================================
> =========
> > --- for-next.orig/lib/gen_crc32table.c
> > +++ for-next/lib/gen_crc32table.c
> > @@ -1,23 +1,28 @@
> ..
> >
> > -static void output_table(uint32_t (*table)[256], int len, char *trans)
> > +static void output_table(uint32_t (*table)[256], int rows, int len,
char
> *trans)
> 
> This table is not always 256 entries. I suggested a cleaner impl. earlier.
> Something like this:
> 
> -static void output_table(uint32_t table[4][256], int len, char *trans)
> +static void output_table(uint32_t table[], int len, char *trans)
>  {
> -	int i, j;
> -
> -	for (j = 0 ; j < 4; j++) {
> -		printf("{");
> -		for (i = 0; i < len - 1; i++) {
> -			if (i % ENTRIES_PER_LINE == 0)
> -				printf("\n");
> -			printf("%s(0x%8.8xL), ", trans, table[j][i]);
> -		}
> -		printf("%s(0x%8.8xL)},\n", trans, table[j][len - 1]);
> +	int i;
> +
> +	printf("{");
> +	for (i = 0; i < len - 1; i++) {
> +		if (i % ENTRIES_PER_LINE == 0)
> +			printf("\n");
> +		printf("%s(0x%8.8xL), ", trans, table[i]);
>  	}
> +	printf("%s(0x%8.8xL)},\n", trans, table[len - 1]);
>  }
> 
>  int main(int argc, char** argv)
>  {
> +	int i;
> +
>  	printf("/* this file is generated - do not edit */\n\n");
> 
>  	if (CRC_LE_BITS > 1) {
>  		crc32init_le();
> -		printf("static const u32 crc32table_le[4][256] = {");
> -		output_table(crc32table_le, LE_TABLE_SIZE, "tole");
> +		printf("static const u32 crc32table_le[%d][%d] = {",
> +		       LE_TABLE_ROWS, LE_TABLE_SIZE);
> +		for (i = 0 ; i < LE_TABLE_ROWS; i++)
> +			output_table(crc32table_le[i], LE_TABLE_SIZE,
> "tole");
>  		printf("};\n");
>  	}
> 
>  	if (CRC_BE_BITS > 1) {
>  		crc32init_be();
> -		printf("static const u32 crc32table_be[4][256] = {");
> -		output_table(crc32table_be, BE_TABLE_SIZE, "tobe");
> +		printf("static const u32 crc32table_be[%d][%d] = {",
> +		       BE_TABLE_ROWS, BE_TABLE_SIZE);
> +		for (i = 0 ; i < BE_TABLE_ROWS; i++)
> +			output_table(crc32table_be[i], BE_TABLE_SIZE,
> "tobe");
>  		printf("};\n");
>  	}
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ