netdev - Re: [PATCH 1/2] ipmr: delete redundant variable

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <48883131.9070805@cn.fujitsu.com>
Date:	Thu, 24 Jul 2008 15:37:21 +0800
From:	Wang Chen <wangchen@...fujitsu.com>
To:	Ingo Oeser <netdev@...eo.de>
CC:	"David S. Miller" <davem@...emloft.net>,
	NETDEV <netdev@...r.kernel.org>
Subject: Re: [PATCH 1/2] ipmr: delete redundant variable

Ingo Oeser said the following on 2008-7-23 20:05:
> But please check the generated assembly yourself on a CISC and RISC
> machine to get an idea of the effects. It will be a nice learning 
> experience I enjoyed myself already.
> 

I did the experiment.

I used the following C code to compare which approach is better and get
a result that two are same on performance.

----main.c
#define maxvif 32

struct vif {
	int *dev;
	unsigned long bytes_in, bytyes_out;
	unsigned long pkt_in, pkt_out;
	unsigned long rate_limit;
	unsigned char threshhold;
	unsigned short flags;
	int	local, remote;
	int	link;
};

struct vif vif_table[maxvif];

int main()
{
	struct vif *v;
	int ct;

	v = &vif_table[0];
	for (ct = 0; ct < maxvif; ct++, v++)
		if(v->link==1)
			break;
	return 0;
}
---

---main2.c
#define maxvif 32

struct vif {
	int *dev;
	unsigned long bytes_in, bytyes_out;
	unsigned long pkt_in, pkt_out;
	unsigned long rate_limit;
	unsigned char threshhold;
	unsigned short flags;
	int	local, remote;
	int	link;
};

struct vif vif_table[maxvif];

int main()
{
	struct vif *v;
	int ct;

	v = &vif_table[0];
	for (ct = 0; ct < maxvif; ct++)
		if(vif_table[ct].link==1)
			break;
	return 0;
}
---

Use gcc -S -O2 to compile:
---x86 asm main.s
	.file	"main.c"
	.text
	.p2align 4,,15
.globl main
	.type	main, @function
main:
	leal	4(%esp), %ecx
	andl	$-16, %esp
	pushl	-4(%ecx)
	movl	$vif_table, %eax
	pushl	%ebp
	movl	%esp, %ebp
	pushl	%ecx
	jmp	.L2
	.p2align 4,,7
.L8:
	cmpl	$vif_table+1240, %eax
	je	.L3
	addl	$40, %eax
.L2:
	cmpl	$1, 36(%eax)
	jne	.L8
.L3:
	popl	%ecx
	xorl	%eax, %eax
	popl	%ebp
	leal	-4(%ecx), %esp
	ret
	.size	main, .-main
	.comm	vif_table,1280,32
	.ident	"GCC: (GNU) 4.1.2 20070115 (prerelease) (SUSE Linux)"
	.section	.note.GNU-stack,"",@progbits
---

---x86 asm main2.s
	.file	"main2.c"
	.text
	.p2align 4,,15
.globl main
	.type	main, @function
main:
	leal	4(%esp), %ecx
	andl	$-16, %esp
	pushl	-4(%ecx)
	xorl	%eax, %eax
	pushl	%ebp
	movl	%esp, %ebp
	pushl	%ecx
	jmp	.L2
	.p2align 4,,7
.L8:
	addl	$40, %eax
	cmpl	$1280, %eax
	je	.L3
.L2:
	cmpl	$1, vif_table+36(%eax)
	jne	.L8
.L3:
	popl	%ecx
	xorl	%eax, %eax
	popl	%ebp
	leal	-4(%ecx), %esp
	ret
	.size	main, .-main
	.comm	vif_table,1280,32
	.ident	"GCC: (GNU) 4.1.2 20070115 (prerelease) (SUSE Linux)"
	.section	.note.GNU-stack,"",@progbits
---

In loop area, main.s and main2.s have the following difference:
main.s :
	cmpl	$vif_table+1240, %eax
	cmpl	$1, 36(%eax)
main2.s:
	cmpl	$1280, %eax
	cmpl	$1, vif_table+36(%eax)
The difference can't cause different performance.

OK. Here is the asm on SPARC(not cross compile)
---main.s
                       	.global main                

			main:
/* 000000	  21 */		sethi	%hi(vif_table),%o5
/* 0x0004	  22 */		or	%g0,0,%o4
/* 0x0008	  21 */		add	%o5,%lo(vif_table),%o3
/* 0x000c	  23 */		ld	[%o3+36],%o5

			.L900000106:
/* 0x0010	  23 */		cmp	%o5,1
/* 0x0014	     */		be,pn	%icc,.L77000028
/* 0x0018	  22 */		add	%o4,1,%o4

			.L77000025:
/* 0x001c	  22 */		add	%o3,40,%o3
/* 0x0020	     */		cmp	%o4,32
/* 0x0024	     */		bl,a,pt	%icc,.L900000106
/* 0x0028	  23 */		ld	[%o3+36],%o5

			.L77000028:
/* 0x002c	  22 */		retl	! Result =  %o0
/* 0x0030	     */		or	%g0,0,%o0
/* 0x0034	   0 */		.type	main,2
/* 0x0034	   0 */		.size	main,(.-main)
/* 0x0034	   0 */		.global	__fsr_init_value
/* 0x0034	     */		 __fsr_init_value=0
---

---main2.s
                       	.global main   

			main:
/* 000000	  22 */		sethi	%hi(vif_table+36),%o5
/* 0x0004	     */		or	%g0,0,%o3
/* 0x0008	     */		add	%o5,%lo(vif_table+36),%o4
/* 0x000c	  23 */		ld	[%o5+%lo(vif_table+36)],%o5

			.L900000106:
/* 0x0010	  23 */		cmp	%o5,1
/* 0x0014	     */		be,pn	%icc,.L77000028
/* 0x0018	  22 */		add	%o4,40,%o4

			.L77000025:
/* 0x001c	  22 */		add	%o3,1,%o3
/* 0x0020	     */		cmp	%o3,32
/* 0x0024	     */		bl,a,pt	%icc,.L900000106
/* 0x0028	  23 */		ld	[%o4],%o5

			.L77000028:
/* 0x002c	  22 */		retl	! Result =  %o0
/* 0x0030	     */		or	%g0,0,%o0
/* 0x0034	   0 */		.type	main,2
/* 0x0034	   0 */		.size	main,(.-main)
/* 0x0034	   0 */		.global	__fsr_init_value
/* 0x0034	     */		 __fsr_init_value=0
---

In loop area, they are both ptr+sizeof(struct).

Now, we can get a conclusion that current compiler can do optimize the index accessing.
:)

Ingo, if you have any different opinion, it will be appreciated that you can share. :)

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html