[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <AANLkTinZUTSADw44UJ2g98AFCzs15DVt6zqLYr61HEaS@mail.gmail.com>
Date: Thu, 27 May 2010 19:35:05 +0300
From: Andy Shevchenko <andy.shevchenko@...il.com>
To: Fredrik Gustafsson <iveqy@...qy.com>
Cc: linux-kernel@...r.kernel.org
Subject: Re: hex_to_bin speedup
On Thu, May 27, 2010 at 5:28 PM, Fredrik Gustafsson <iveqy@...qy.com> wrote:
> I saw from the mailinglist that the initial patch suggested for this
> function did not use tolower() but an if statement.
Exactly, and there was a small discussion around it.
> On Thu, May 27, 2010 at 02:22:30PM +0300, Andy Shevchenko wrote:
>> To your test case:
>> - the data is not random (CPU cache hits distribution is differ to real)
> I thought that because the best case senario was equal, only the worst
> case was of interest. Of course you're right. I rewrote the testcase to
> use random numbers instead.
>
>> - the compiler call and parameters are not published (gcc -O2
>> probably removes unused parts of code)
> I didn't use any parameters at all. However with the improved benchmark
> I tested both without and with -O2 and got different results. I'm not
> qualified to judge what this can do for the kernel.
>
>> - please, provide assembler excerpts to compare implementations
> Please se [3]-[6]
>
> P.S. How come we don't CC the LKML?
My fault. Return back to public.
> --
> iveqy
>
> [1] Benchmark results
> $ gcc -O2 main.c && ./a.out
> tolower() was faster 2657 times
> if-statement was faster 2571 times
> Equal fast 4772 times
So, optimizer do the job for tolower(). Thus, nothing to worry about.
> $ gcc main.c && ./a.out
> tolower() was faster 1532 times
> if-statement was faster 8465 times
> Equal fast 3 times
>
> [2] Benchmark source
>
> #include <sys/time.h>
> #include <stdio.h>
>
> int hex_to_bin_if(char ch)
> {
> if ((ch >= '0') && (ch <= '9'))
> return ch - '0';
> if ((ch >= 'a') && (ch <= 'f'))
> return ch - 'a' + 10;
> if ((ch >= 'A') && (ch <= 'F'))
> return ch - 'A' + 10;
> return -1;
> }
> int hex_to_bin_to(char ch)
> {
> if ((ch >= '0') && (ch <= '9'))
> return ch - '0';
> ch = tolower(ch);
> if ((ch >= 'a') && (ch <= 'f'))
> return ch - 'a' + 10;
> return -1;
> }
> char bin_to_hex(int i)
> {
> switch(i) {
> case 0:
> return '0';
> case 1:
> return '1';
> case 2:
> return '2';
> case 3:
> return '3';
> case 4:
> return '4';
> case 5:
> return '5';
> case 6:
> return '6';
> case 7:
> return '7';
> case 8:
> return '8';
> case 9:
> return '9';
> case 10:
> return 'a';
> case 11:
> return 'b';
> case 12:
> return 'c';
> case 13:
> return 'd';
> case 14:
> return 'e';
> case 15:
> return 'f';
> case 16:
> return '0';
> case 17:
> return '1';
> case 18:
> return '2';
> case 19:
> return '3';
> case 20:
> return '4';
> case 21:
> return '5';
> case 22:
> return '6';
> case 23:
> return '7';
> case 24:
> return '8';
> case 25:
> return '9';
> case 26:
> return 'A';
> case 27:
> return 'B';
> case 28:
> return 'C';
> case 29:
> return 'D';
> case 30:
> return 'E';
> case 31:
> return 'F';
> }
> }
> int main()
> {
> // Needed variables
> struct timeval start, end;
> int i,itr,res,ran,j,won_if = 0, won_to = 0,equal = 0;
> itr = 10000;
> char c;
> time_t tif,tto,diff;
> unsigned int iseed = (unsigned int)time(NULL);
> srand(iseed);
>
> for(j = 0; j < itr; j++) {
> // Test if-statement
> gettimeofday(&start,NULL);
> for(i = 0; i < itr; i++) {
> ran = rand() % 32;
> c = bin_to_hex(ran);
> res = hex_to_bin_if(c);
> }
> gettimeofday(&end,NULL);
> tif = end.tv_usec - start.tv_usec;
>
> // Test tolower()
> gettimeofday(&start,NULL);
> for(i = 0; i < itr; i++) {
> ran = rand() % 32;
> c = bin_to_hex(ran);
> res = hex_to_bin_to(c);
> }
> gettimeofday(&end,NULL);
> tto = end.tv_usec - start.tv_usec;
>
> // Calculate winner
> diff = tto - tif;
> if(diff == 0) {
> equal++;
> } else if(tto - tif > 0) {
> won_if++;
> } else {
> diff *= -1;
> won_to++;
> }
> }
> printf("tolower() was faster %d times\n",won_to);
> printf("if-statement was faster %d times\n",won_if);
> printf("Equal fast %d times\n",equal);
> }
> [3] Assembler code if-statement (gcc -S main.c)
> hex_to_bin_if:
> pushl %ebp
> movl %esp, %ebp
> subl $8, %esp
> movl 8(%ebp), %eax
> movb %al, -4(%ebp)
> cmpb $47, -4(%ebp)
> jle .L2
> cmpb $57, -4(%ebp)
> jg .L2
> movsbl -4(%ebp),%eax
> subl $48, %eax
> movl %eax, -8(%ebp)
> jmp .L3
> .L2:
> cmpb $96, -4(%ebp)
> jle .L4
> cmpb $102, -4(%ebp)
> jg .L4
> movsbl -4(%ebp),%eax
> subl $87, %eax
> movl %eax, -8(%ebp)
> jmp .L3
> .L4:
> cmpb $64, -4(%ebp)
> jle .L5
> cmpb $70, -4(%ebp)
> jg .L5
> movsbl -4(%ebp),%eax
> subl $55, %eax
> movl %eax, -8(%ebp)
> jmp .L3
> .L5:
> movl $-1, -8(%ebp)
> .L3:
> movl -8(%ebp), %eax
> leave
> ret
> .size hex_to_bin_if, .-hex_to_bin_if
> .globl hex_to_bin_to
> .type hex_to_bin_to, @function
>
> [4] Assembler code tolower() (gcc -S main.c)
> hex_to_bin_to:
> pushl %ebp
> movl %esp, %ebp
> subl $24, %esp
> movl 8(%ebp), %eax
> movb %al, -4(%ebp)
> cmpb $47, -4(%ebp)
> jle .L8
> cmpb $57, -4(%ebp)
> jg .L8
> movsbl -4(%ebp),%eax
> subl $48, %eax
> movl %eax, -8(%ebp)
> jmp .L9
> .L8:
> movsbl -4(%ebp),%eax
> movl %eax, (%esp)
> call tolower
> movb %al, -4(%ebp)
> cmpb $96, -4(%ebp)
> jle .L10
> cmpb $102, -4(%ebp)
> jg .L10
> movsbl -4(%ebp),%eax
> subl $87, %eax
> movl %eax, -8(%ebp)
> jmp .L9
> .L10:
> movl $-1, -8(%ebp)
> .L9:
> movl -8(%ebp), %eax
> leave
> ret
> .size hex_to_bin_to, .-hex_to_bin_to
>
> [5] Assembler code if-statement (gcc -S -O2 main.c)
> hex_to_bin_if:
> pushl %ebp
> movl %esp, %ebp
> movzbl 8(%ebp), %edx
> leal -48(%edx), %eax
> cmpb $9, %al
> jbe .L8
> leal -97(%edx), %eax
> cmpb $5, %al
> ja .L4
> movsbl %dl,%eax
> leal -87(%eax), %ecx
> .L3:
> movl %ecx, %eax
> popl %ebp
> ret
> .p2align 4,,7
> .p2align 3
> .L8:
> movsbl %dl,%eax
> leal -48(%eax), %ecx
> movl %ecx, %eax
> popl %ebp
> ret
> .p2align 4,,7
> .p2align 3
> .L4:
> leal -65(%edx), %eax
> movl $-1, %ecx
> cmpb $5, %al
> ja .L3
> movsbl %dl,%eax
> leal -55(%eax), %ecx
> jmp .L3
> .size hex_to_bin_if, .-hex_to_bin_if
> .p2align 4,,15
>
> [6] Assembler code tolower() (gcc -S -O2 main.c)
> hex_to_bin_to:
> pushl %ebp
> movl %esp, %ebp
> subl $8, %esp
> movzbl 8(%ebp), %edx
> leal -48(%edx), %eax
> cmpb $9, %al
> ja .L38
> movsbl %dl,%eax
> leal -48(%eax), %edx
> .L39:
> movl %edx, %eax
> leave
> ret
> .p2align 4,,7
> .p2align 3
> .L38:
> movsbl %dl,%eax
> movl %eax, (%esp)
> call tolower
> movl $-1, %edx
> movl %eax, %ecx
> leal -97(%ecx), %eax
> cmpb $5, %al
> ja .L39
> movsbl %cl,%eax
> leal -87(%eax), %edx
> jmp .L39
> .size hex_to_bin_to, .-hex_to_bin_to
> .section .rodata.str1.4,"aMS",@progbits,1
> .align 4
>
--
With Best Regards,
Andy Shevchenko
Powered by blists - more mailing lists