lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20101218225436.28264.qmail@science.horizon.com>
Date:	18 Dec 2010 17:54:36 -0500
From:	"George Spelvin" <linux@...izon.com>
To:	bharrosh@...asas.com
Cc:	linux@...izon.com, linux-arch@...r.kernel.org,
	linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: Big git diff speedup by avoiding x86 "fast string" memcmp

> static inline int dentry_memcmp_long(const unsigned char *cs,
> 				const unsigned char *ct, ssize_t count)
> {
> 	int ret;
> 	const unsigned long *ls = (const unsigned long *)cs;
> 	const unsigned long *lt = (const unsigned long *)ct;
> 
> 	while (count > 8) {
> 		ret = (*cs != *ct);
> 		if (ret)
> 			break;
> 		cs++;
> 		ct++;
> 		count-=8;
> 	}
> 	if (count) {
> 		unsigned long t = *ct & ((0xffffffffffffffff >> ((8 - count) * 8))
> 		ret = (*cs != t)
> 	}
> 
> 	return ret;
> }

First, let's get the code right, and use correct types, but also, there
are some tricks to reduce the masking cost.

As long as you have to mask one string, *and* don't have to worry about
running off the end of mapped memory, there's no additional cost to
masking both in the loop.  Just test (a ^ b) & mask.

#if 1	/* Table lookup */

static unsigned long const dentry_memcmp_mask[8] = {
#if defined(__LITTLE_ENDIAN)
	0xff, 0xffff, 0xffffff, 0xffffffff,
#if sizeof (unsigned long) > 4
	0xffffffffff, 0xffffffffffff, 0xffffffffffffff, 0xffffffffffffffff
#endif
#elif defined(__BIG_ENDIAN)
#if sizeof (unsigned long) == 4
	0xff000000, 0xffff0000, 0xffffff00, 0xffffffff
#else
	0xff00000000000000, 0xffff000000000000,
	0xffffff0000000000, 0xffffffff00000000,
	0xffffffffff000000, 0xffffffffffff0000,
	0xffffffffffffff00, 0xffffffffffffffff
#endif
#else
#error undefined endianness
#endif
};

#define dentry_memcmp_mask(count) dentry_memcmp_mask[(count)-1]

#else /* In-line code */

#if defined(__LITTLE_ENDIAN)
#define dentry_memcmp_mask(count) (-1ul >> (sizeof 1ul - (count)) * 8)
#elif defined(__BIG_ENDIAN)
#define dentry_memcmp_mask(count) (-1ul << (sizeof 1ul - (count)) * 8)
#else
#error undefined endianness

#endif

static inline bool dentry_memcmp_long(unsigned char const *cs,
				unsigned char const *ct, ssize_t count)
{
	unsigned long const *ls = (unsigned long const *)cs;
	unsigned long const *lt = (unsigned long const *)ct;

	while (count > sizeof *cs) {
		if (*cs != *ct)
			return true;
		cs++;
		ct++;
		count -= sizeof *cs;
	}
	/* Last 1..8 bytes */
	return ((*ct ^ *cs) & dentry_memcmp_mask(count)) != 0;
}

If your string is at least 8 bytes long, and the processor has fast unaligned
loads, you can skip the mask entirely by redundantly comparing some bytes
(although the code bloat is probably undesirable for inline code):

static inline bool dentry_memcmp_long(const unsigned char *cs,
				const unsigned char *ct, ssize_t count)
{
	unsigned long const *ls = (unsigned long const *)cs;
	unsigned long const *lt = (unsigned long const *)ct;

	if (count < sizeof *cs)
		return ((*ct ^ *cs) & dentry_memcmp_mask(count)) != 0;

	while (count > sizeof *cs) {
		if (*cs != *ct)
			return true;
		cs++;
		ct++;
		count -= sizeof *cs;
	}
	cs = (unsigned long const *)((char const *)cs + count - sizeof *cs);
	ct = (unsigned long const *)((char const *)ct + count - sizeof *ct);
	return *cs != *ct;
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ