lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <9b18802b-a451-eb81-3317-1de406f03118@163.com>
Date:   Fri, 27 Jul 2018 01:17:06 +0800
From:   Zhaoxiu Zeng <zengzhaoxiu@....com>
To:     Greg Kroah-Hartman <gregkh@...uxfoundation.org>
Cc:     Andrew Morton <akpm@...ux-foundation.org>,
        Matthew Wilcox <mawilcox@...rosoft.com>,
        Dan Carpenter <dan.carpenter@...cle.com>,
        Geert Uytterhoeven <geert@...ux-m68k.org>,
        Thomas Gleixner <tglx@...utronix.de>,
        Andrey Ryabinin <aryabinin@...tuozzo.com>,
        linux-kernel@...r.kernel.org, Zhaoxiu Zeng <zhaoxiu.zeng@...il.com>
Subject: Re: [PATCH 1/1] lib: use sunday algorithm to do strstr() and
 strnstr()

在 2018/7/23 2:37, Greg Kroah-Hartman 写道:
> On Mon, Jul 23, 2018 at 01:37:15AM +0800, Zhaoxiu Zeng wrote:
>> From: Zhaoxiu Zeng <zhaoxiu.zeng@...il.com>
>>
>> The Sunday algorithm is a variation of Boyer-Moore algorithm, it is easy and fast.
>> For the Sunday algorithm, to see
>> 	http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/sundayen.htm
> 
> So you say, but what does this really buy us?  Why make this change?
> How was it tested?  What is the downside of not taking this?
> 
> thanks,
> 
> greg k-h
> 

I use the following program to test on fc28.
Compile with O2, the new version is almost 2X faster than the original.
The code size of the original is 0x80, the newer is 0xB0.


#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <time.h>
#include <unistd.h>

/**
 * strstr - Find the first substring in a %NUL terminated string
 * @s1: The string to be searched
 * @s2: The string to search for
 */
char *strstr1(const char *s1, const char *s2)
{
	size_t l1, l2;

	l2 = strlen(s2);
	if (!l2)
		return (char *)s1;
	l1 = strlen(s1);
	while (l1 >= l2) {
		l1--;
		if (!memcmp(s1, s2, l2))
			return (char *)s1;
		s1++;
	}
	return NULL;
}

char *strstr2(const char *s1, const char *s2)
{
	size_t l2;
	const char *pchk = s1;

#if 1//def __HAVE_ARCH_STRLEN
	l2 = strlen(s2);
#else
	for (l2 = 0; s2[l2] != 0; l2++)
		/* nothing */;
#endif
	if (!l2)
		return (char *)s1;

	/*
	 * Sunday matching
	 */
	while (1) {
		size_t i;
		char k;

		/* compare */
		for (i = 0; i < l2; i++) {
			if (s1[i] != s2[i])
				break;
		}
		if (i >= l2)
			return (char *)s1;

		/* if s1 terminate early? */
		if (pchk < s1 + i)
			pchk = s1 + i;
		do {
			k = *pchk;
			if (k == 0)
				return NULL;
		} while (pchk++ != s1 + l2);

		/* find the k's last presence in s2 (k = s1[l2]) */
		for (i = l2; --i != (size_t)-1; ) {
			if (k == s2[i])
				break;
		}

		/* shift */
		s1 += l2 - i;
	}
}

#ifdef __i386__
#  define RDTSC_CLOBBER "%eax", "%ebx", "%ecx", "%edx"
#elif __x86_64__
#  define RDTSC_CLOBBER "%rax", "%rbx", "%rcx", "%rdx"
#else
# error unknown platform
#endif

#define RDTSC_START(cycles)                                \
    do {                                                   \
        register unsigned cyc_high, cyc_low;               \
        asm volatile("CPUID\n\t"                           \
                     "RDTSC\n\t"                           \
                     "mov %%edx, %0\n\t"                   \
                     "mov %%eax, %1\n\t"                   \
                     : "=r" (cyc_high), "=r" (cyc_low)     \
                     :: RDTSC_CLOBBER);                    \
        (cycles) = ((uint64_t)cyc_high << 32) | cyc_low;   \
    } while (0)

#define RDTSC_STOP(cycles)                                 \
    do {                                                   \
        register unsigned cyc_high, cyc_low;               \
        asm volatile("RDTSCP\n\t"                          \
                     "mov %%edx, %0\n\t"                   \
                     "mov %%eax, %1\n\t"                   \
                     "CPUID\n\t"                           \
                     : "=r" (cyc_high), "=r" (cyc_low)     \
                     :: RDTSC_CLOBBER);                    \
        (cycles) = ((uint64_t)cyc_high << 32) | cyc_low;   \
    } while (0)

typedef unsigned long long TIME_T;
#define start_time(start)		RDTSC_START(start)
#define stop_time(stop)			RDTSC_STOP(stop)
#define diff_time(start, stop)	(stop - start)

#define MAX_HAYSTACK_LENGTH 64
#define MAX_NEEDLE_LENGTH 16
#define TEST_LOOPS 1000

char haystack[MAX_HAYSTACK_LENGTH + 1];
char needle[MAX_NEEDLE_LENGTH + 1];

int main(int argc, char **argv)
{
    size_t i, j, n;
    void *p1, *p2;

    srand(time(0));

    for (i = 0; i < MAX_HAYSTACK_LENGTH; ) {
        haystack[i] = rand();
        if (haystack[i] != 0)
            i++;
    }
    haystack[i] = 0;

    for (i = 0; i < MAX_HAYSTACK_LENGTH; i++) {
        TIME_T start, stop;
        unsigned long long elapsed1, elapsed2;

        j = rand() % MAX_NEEDLE_LENGTH;
        if (i <= MAX_HAYSTACK_LENGTH - (MAX_NEEDLE_LENGTH - j))
            n = i;
        else
            n = MAX_HAYSTACK_LENGTH - (MAX_NEEDLE_LENGTH - j);
        memcpy(needle + j, haystack + n, MAX_NEEDLE_LENGTH - j);
        needle[MAX_NEEDLE_LENGTH] = 0;

        elapsed1 = ~0UL;
        for (n = 0; n < TEST_LOOPS; n++) {
            start_time(start);
            asm volatile("":::"memory");
            p1 = strstr1(haystack, needle + j);
            asm volatile("":::"memory");
            stop_time(stop);

            if (elapsed1 > diff_time(start, stop))
                elapsed1 = diff_time(start, stop);
        }

        elapsed2 = ~0UL;
        for (n = 0; n < TEST_LOOPS; n++) {
            start_time(start);
            asm volatile("":::"memory");
            p2 = strstr2(haystack, needle + j);
            asm volatile("":::"memory");
            stop_time(stop);

            if (elapsed2 > diff_time(start, stop))
                elapsed2 = diff_time(start, stop);
        }

        if (p1 != p2)
            fprintf(stderr, "Error: %p != %p\n", p1, p2);
        else
            fprintf(stdout, "%3d, %3d:\t%lld,\t%lld\n", i, j, elapsed1, elapsed2);
    }

    return 0;
}

The test result:
[zzx@...ora strstr]$ ./test
  0,   3:	68,	68
  1,  13:	74,	66
  2,   2:	88,	94
  3,   5:	96,	88
  4,   8:	108,	80
  5,  13:	110,	76
  6,  12:	132,	82
  7,   9:	142,	82
  8,  11:	152,	88
  9,  13:	146,	90
 10,  14:	154,	94
 11,  15:	132,	104
 12,   1:	196,	114
 13,   8:	206,	108
 14,  14:	186,	110
 15,  13:	194,	112
 16,   0:	260,	124
 17,  10:	258,	124
 18,  15:	208,	136
 19,  11:	276,	136
 20,  13:	256,	128
 21,  12:	292,	144
 22,  11:	300,	142
 23,  13:	278,	144
 24,   7:	334,	152
 25,   1:	346,	166
 26,   6:	368,	168
 27,  15:	278,	182
 28,   1:	374,	168
 29,   3:	382,	188
 30,   8:	398,	172
 31,   4:	402,	188
 32,   0:	430,	180
 33,  10:	398,	184
 34,  10:	410,	192
 35,   8:	434,	180
 36,   7:	444,	198
 37,   6:	454,	204
 38,   1:	464,	220
 39,   2:	472,	206
 40,   4:	482,	210
 41,  15:	388,	262
 42,   2:	502,	210
 43,   5:	510,	220
 44,   8:	520,	214
 45,   0:	564,	236
 46,   3:	550,	242
 47,   8:	552,	262
 48,  10:	528,	270
 49,   2:	572,	256
 50,   3:	586,	246
 51,   7:	590,	278
 52,  14:	506,	308
 53,  14:	514,	298
 54,   4:	602,	240
 55,   5:	626,	284
 56,  15:	506,	316
 57,  10:	606,	326
 58,   4:	606,	240
 59,   0:	596,	240
 60,  13:	570,	316
 61,  13:	576,	328
 62,   5:	618,	284
 63,  13:	576,	328

Thanks!

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ