lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <6dc928cd-adbb-0304-1308-fb556d48698d@163.com>
Date:   Fri, 27 Jul 2018 13:48:43 +0800
From:   Zhaoxiu Zeng <zengzhaoxiu@....com>
To:     Greg Kroah-Hartman <gregkh@...uxfoundation.org>
Cc:     Andrew Morton <akpm@...ux-foundation.org>,
        Matthew Wilcox <mawilcox@...rosoft.com>,
        Dan Carpenter <dan.carpenter@...cle.com>,
        Geert Uytterhoeven <geert@...ux-m68k.org>,
        Thomas Gleixner <tglx@...utronix.de>,
        Andrey Ryabinin <aryabinin@...tuozzo.com>,
        linux-kernel@...r.kernel.org, Zhaoxiu Zeng <zhaoxiu.zeng@...il.com>
Subject: Re: [PATCH 1/1] lib: use sunday algorithm to do strstr() and
 strnstr()

在 2018/7/27 1:17, Zhaoxiu Zeng 写道:
> 在 2018/7/23 2:37, Greg Kroah-Hartman 写道:
>> On Mon, Jul 23, 2018 at 01:37:15AM +0800, Zhaoxiu Zeng wrote:
>>> From: Zhaoxiu Zeng <zhaoxiu.zeng@...il.com>
>>>
>>> The Sunday algorithm is a variation of Boyer-Moore algorithm, it is easy and fast.
>>> For the Sunday algorithm, to see
>>> 	http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/sundayen.htm
>>
>> So you say, but what does this really buy us?  Why make this change?
>> How was it tested?  What is the downside of not taking this?
>>
>> thanks,
>>
>> greg k-h
>>
> 
> I use the following program to test on fc28.
> Compile with O2, the new version is almost 2X faster than the original.
> The code size of the original is 0x80, the newer is 0xB0.
> 
> 
> #include <stdio.h>
> #include <stdlib.h>
> #include <stdint.h>
> #include <string.h>
> #include <time.h>
> #include <unistd.h>
> 
> /**
>  * strstr - Find the first substring in a %NUL terminated string
>  * @s1: The string to be searched
>  * @s2: The string to search for
>  */
> char *strstr1(const char *s1, const char *s2)
> {
> 	size_t l1, l2;
> 
> 	l2 = strlen(s2);
> 	if (!l2)
> 		return (char *)s1;
> 	l1 = strlen(s1);
> 	while (l1 >= l2) {
> 		l1--;
> 		if (!memcmp(s1, s2, l2))
> 			return (char *)s1;
> 		s1++;
> 	}
> 	return NULL;
> }
> 
> char *strstr2(const char *s1, const char *s2)
> {
> 	size_t l2;
> 	const char *pchk = s1;
> 
> #if 1//def __HAVE_ARCH_STRLEN
> 	l2 = strlen(s2);
> #else
> 	for (l2 = 0; s2[l2] != 0; l2++)
> 		/* nothing */;
> #endif
> 	if (!l2)
> 		return (char *)s1;
> 
> 	/*
> 	 * Sunday matching
> 	 */
> 	while (1) {
> 		size_t i;
> 		char k;
> 
> 		/* compare */
> 		for (i = 0; i < l2; i++) {
> 			if (s1[i] != s2[i])
> 				break;
> 		}
> 		if (i >= l2)
> 			return (char *)s1;
> 
> 		/* if s1 terminate early? */
> 		if (pchk < s1 + i)
> 			pchk = s1 + i;
> 		do {
> 			k = *pchk;
> 			if (k == 0)
> 				return NULL;
> 		} while (pchk++ != s1 + l2);
> 
> 		/* find the k's last presence in s2 (k = s1[l2]) */
> 		for (i = l2; --i != (size_t)-1; ) {
> 			if (k == s2[i])
> 				break;
> 		}
> 
> 		/* shift */
> 		s1 += l2 - i;
> 	}
> }
> 
> #ifdef __i386__
> #  define RDTSC_CLOBBER "%eax", "%ebx", "%ecx", "%edx"
> #elif __x86_64__
> #  define RDTSC_CLOBBER "%rax", "%rbx", "%rcx", "%rdx"
> #else
> # error unknown platform
> #endif
> 
> #define RDTSC_START(cycles)                                \
>     do {                                                   \
>         register unsigned cyc_high, cyc_low;               \
>         asm volatile("CPUID\n\t"                           \
>                      "RDTSC\n\t"                           \
>                      "mov %%edx, %0\n\t"                   \
>                      "mov %%eax, %1\n\t"                   \
>                      : "=r" (cyc_high), "=r" (cyc_low)     \
>                      :: RDTSC_CLOBBER);                    \
>         (cycles) = ((uint64_t)cyc_high << 32) | cyc_low;   \
>     } while (0)
> 
> #define RDTSC_STOP(cycles)                                 \
>     do {                                                   \
>         register unsigned cyc_high, cyc_low;               \
>         asm volatile("RDTSCP\n\t"                          \
>                      "mov %%edx, %0\n\t"                   \
>                      "mov %%eax, %1\n\t"                   \
>                      "CPUID\n\t"                           \
>                      : "=r" (cyc_high), "=r" (cyc_low)     \
>                      :: RDTSC_CLOBBER);                    \
>         (cycles) = ((uint64_t)cyc_high << 32) | cyc_low;   \
>     } while (0)
> 
> typedef unsigned long long TIME_T;
> #define start_time(start)		RDTSC_START(start)
> #define stop_time(stop)			RDTSC_STOP(stop)
> #define diff_time(start, stop)	(stop - start)
> 
> #define MAX_HAYSTACK_LENGTH 64
> #define MAX_NEEDLE_LENGTH 16
> #define TEST_LOOPS 1000
> 
> char haystack[MAX_HAYSTACK_LENGTH + 1];
> char needle[MAX_NEEDLE_LENGTH + 1];
> 
> int main(int argc, char **argv)
> {
>     size_t i, j, n;
>     void *p1, *p2;
> 
>     srand(time(0));
> 
>     for (i = 0; i < MAX_HAYSTACK_LENGTH; ) {
>         haystack[i] = rand();
>         if (haystack[i] != 0)
>             i++;
>     }
>     haystack[i] = 0;
> 
>     for (i = 0; i < MAX_HAYSTACK_LENGTH; i++) {
>         TIME_T start, stop;
>         unsigned long long elapsed1, elapsed2;
> 
>         j = rand() % MAX_NEEDLE_LENGTH;
>         if (i <= MAX_HAYSTACK_LENGTH - (MAX_NEEDLE_LENGTH - j))
>             n = i;
>         else
>             n = MAX_HAYSTACK_LENGTH - (MAX_NEEDLE_LENGTH - j);
>         memcpy(needle + j, haystack + n, MAX_NEEDLE_LENGTH - j);
>         needle[MAX_NEEDLE_LENGTH] = 0;
> 
>         elapsed1 = ~0UL;
>         for (n = 0; n < TEST_LOOPS; n++) {
>             start_time(start);
>             asm volatile("":::"memory");
>             p1 = strstr1(haystack, needle + j);
>             asm volatile("":::"memory");
>             stop_time(stop);
> 
>             if (elapsed1 > diff_time(start, stop))
>                 elapsed1 = diff_time(start, stop);
>         }
> 
>         elapsed2 = ~0UL;
>         for (n = 0; n < TEST_LOOPS; n++) {
>             start_time(start);
>             asm volatile("":::"memory");
>             p2 = strstr2(haystack, needle + j);
>             asm volatile("":::"memory");
>             stop_time(stop);
> 
>             if (elapsed2 > diff_time(start, stop))
>                 elapsed2 = diff_time(start, stop);
>         }
> 
>         if (p1 != p2)
>             fprintf(stderr, "Error: %p != %p\n", p1, p2);
>         else
>             fprintf(stdout, "%3d, %3d:\t%lld,\t%lld\n", i, j, elapsed1, elapsed2);
>     }
> 
>     return 0;
> }
> 
> The test result:
> [zzx@...ora strstr]$ ./test
>   0,   3:	68,	68
>   1,  13:	74,	66
>   2,   2:	88,	94
>   3,   5:	96,	88
>   4,   8:	108,	80
>   5,  13:	110,	76
>   6,  12:	132,	82
>   7,   9:	142,	82
>   8,  11:	152,	88
>   9,  13:	146,	90
>  10,  14:	154,	94
>  11,  15:	132,	104
>  12,   1:	196,	114
>  13,   8:	206,	108
>  14,  14:	186,	110
>  15,  13:	194,	112
>  16,   0:	260,	124
>  17,  10:	258,	124
>  18,  15:	208,	136
>  19,  11:	276,	136
>  20,  13:	256,	128
>  21,  12:	292,	144
>  22,  11:	300,	142
>  23,  13:	278,	144
>  24,   7:	334,	152
>  25,   1:	346,	166
>  26,   6:	368,	168
>  27,  15:	278,	182
>  28,   1:	374,	168
>  29,   3:	382,	188
>  30,   8:	398,	172
>  31,   4:	402,	188
>  32,   0:	430,	180
>  33,  10:	398,	184
>  34,  10:	410,	192
>  35,   8:	434,	180
>  36,   7:	444,	198
>  37,   6:	454,	204
>  38,   1:	464,	220
>  39,   2:	472,	206
>  40,   4:	482,	210
>  41,  15:	388,	262
>  42,   2:	502,	210
>  43,   5:	510,	220
>  44,   8:	520,	214
>  45,   0:	564,	236
>  46,   3:	550,	242
>  47,   8:	552,	262
>  48,  10:	528,	270
>  49,   2:	572,	256
>  50,   3:	586,	246
>  51,   7:	590,	278
>  52,  14:	506,	308
>  53,  14:	514,	298
>  54,   4:	602,	240
>  55,   5:	626,	284
>  56,  15:	506,	316
>  57,  10:	606,	326
>  58,   4:	606,	240
>  59,   0:	596,	240
>  60,  13:	570,	316
>  61,  13:	576,	328
>  62,   5:	618,	284
>  63,  13:	576,	328
> 
> Thanks!
> 

The original strnstr might has a bug too!
For example, assume s1 is "123\0abc...." and s2 is "abc\0",
call strnstr(s1, s2, 7) will return &s1[4], but the correct result is NULL.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ