lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <dc93d5299169a33e00fc35a4c5f29ea72764bce0.camel@perches.com>
Date:   Sun, 23 Feb 2020 18:48:38 -0800
From:   Joe Perches <joe@...ches.com>
To:     Alexey Dobriyan <adobriyan@...il.com>
Cc:     akpm@...ux-foundation.org, linux-kernel@...r.kernel.org,
        linux-fsdevel@...r.kernel.org
Subject: Re: [PATCH v3] proc: faster open/read/close with "permanent" files

On Sun, 2020-02-23 at 14:30 +0300, Alexey Dobriyan wrote:
> On Sat, Feb 22, 2020 at 12:39:39PM -0800, Joe Perches wrote:
> > On Sat, 2020-02-22 at 23:15 +0300, Alexey Dobriyan wrote:
> > > Now that "struct proc_ops" exist we can start putting there stuff which
> > > could not fly with VFS "struct file_operations"...
> > > 
> > > Most of fs/proc/inode.c file is dedicated to make open/read/.../close reliable
> > > in the event of disappearing /proc entries which usually happens if module is
> > > getting removed. Files like /proc/cpuinfo which never disappear simply do not
> > > need such protection.
> > > 
> > > Save 2 atomic ops, 1 allocation, 1 free per open/read/close sequence for such
> > > "permanent" files.
> > > 
> > > Enable "permanent" flag for
> > > 
> > > 	/proc/cpuinfo
> > > 	/proc/kmsg
> > > 	/proc/modules
> > > 	/proc/slabinfo
> > > 	/proc/stat
> > > 	/proc/sysvipc/*
> > > 	/proc/swaps
> > > 
> > > More will come once I figure out foolproof way to prevent out module
> > > authors from marking their stuff "permanent" for performance reasons
> > > when it is not.
> > > 
> > > This should help with scalability: benchmark is "read /proc/cpuinfo R times
> > > by N threads scattered over the system".
> > 
> > Is this an actual expected use-case?
> 
> Yes.
> 
> > Is there some additional unnecessary memory consumption
> > in the unscaled systems?
> 
> No, it's the opposite. Less memory usage for everyone and noticeable
> performance improvement for contented case.
> 
> > >  static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
> > >  {
> > >  	struct proc_dir_entry *pde = PDE(file_inode(file));
> > >  	ssize_t rv = -EIO;
> > > -	if (use_pde(pde)) {
> > > -		typeof_member(struct proc_ops, proc_read) read;
> > >  
> > > -		read = pde->proc_ops->proc_read;
> > > -		if (read)
> > > -			rv = read(file, buf, count, ppos);
> > > +	if (pde_is_permanent(pde)) {
> > > +		return pde_read(pde, file, buf, count, ppos);
> > > +	} else if (use_pde(pde)) {
> > > +		rv = pde_read(pde, file, buf, count, ppos);
> > >  		unuse_pde(pde);
> > 
> > Perhaps all the function call duplication could be minimized
> > by using code without direct returns like:
> > 
> > 	rv = pde_read(pde, file, buf, count, pos);
> > 	if (!pde_is_permanent(pde))
> > 		unuse_pde(pde);
> > 
> > 	return rv;
> 
> Function call non-duplication is false goal.

Depends, copy/paste errors are common and object code
size generally increases.

> Surprisingly it makes code bigger:

Not so far as I can tell.  Are you sure?

> 	$ ./scripts/bloat-o-meter ../vmlinux-000 ../obj/vmlinux
> 	add/remove: 0/0 grow/shrink: 1/0 up/down: 10/0 (10)
> 	Function                                     old     new   delta
> 	proc_reg_read                                108     118     +10
> 
> and worse too: "rv" is carried on stack through "unuse_pde" call.

With gcc 9.2.1 x86-64 defconfig:

Changing just proc_reg_read to:

static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
{
	struct proc_dir_entry *pde = PDE(file_inode(file));
	ssize_t rv;

	rv = pde_read(pde, file, buf, count, ppos);
	if (use_pde(pde))
		unuse_pde(pde);

	return rv;
}

gives:

$ size fs/proc/inode.o*
   text	   data	    bss	    dec	    hex	filename
   5448	     28	      0	   5476	   1564	fs/proc/inode.o.suggested
   5526	     28	      0	   5554	   15b2	fs/proc/inode.o.alexey

$ objdump -d fs/proc/inode.o.suggested (grep for proc_reg_read)
0000000000000410 <proc_reg_read>:
 410:	41 54                	push   %r12
 412:	55                   	push   %rbp
 413:	48 8b 47 20          	mov    0x20(%rdi),%rax
 417:	48 8b 68 d0          	mov    -0x30(%rax),%rbp
 41b:	48 8b 45 30          	mov    0x30(%rbp),%rax
 41f:	48 8b 40 10          	mov    0x10(%rax),%rax
 423:	48 85 c0             	test   %rax,%rax
 426:	74 28                	je     450 <proc_reg_read+0x40>
 428:	e8 00 00 00 00       	callq  42d <proc_reg_read+0x1d>
 42d:	49 89 c4             	mov    %rax,%r12
 430:	8b 45 00             	mov    0x0(%rbp),%eax
 433:	85 c0                	test   %eax,%eax
 435:	78 12                	js     449 <proc_reg_read+0x39>
 437:	8d 50 01             	lea    0x1(%rax),%edx
 43a:	f0 0f b1 55 00       	lock cmpxchg %edx,0x0(%rbp)
 43f:	75 f2                	jne    433 <proc_reg_read+0x23>
 441:	48 89 ef             	mov    %rbp,%rdi
 444:	e8 e7 fc ff ff       	callq  130 <unuse_pde>
 449:	4c 89 e0             	mov    %r12,%rax
 44c:	5d                   	pop    %rbp
 44d:	41 5c                	pop    %r12
 44f:	c3                   	retq   
 450:	49 c7 c4 fb ff ff ff 	mov    $0xfffffffffffffffb,%r12
 457:	eb d7                	jmp    430 <proc_reg_read+0x20>
 459:	0f 1f 80 00 00 00 00 	nopl   0x0(%rax)

vs
 
$ objdump -d fs/proc/inode.o.alexey (grep for proc_reg_read)

00000000000006e0 <proc_reg_read>:
 6e0:	41 54                	push   %r12
 6e2:	55                   	push   %rbp
 6e3:	48 8b 47 20          	mov    0x20(%rdi),%rax
 6e7:	48 8b 68 d0          	mov    -0x30(%rax),%rbp
 6eb:	f6 85 aa 00 00 00 01 	testb  $0x1,0xaa(%rbp)
 6f2:	74 15                	je     709 <proc_reg_read+0x29>
 6f4:	48 8b 45 30          	mov    0x30(%rbp),%rax
 6f8:	48 8b 40 10          	mov    0x10(%rax),%rax
 6fc:	48 85 c0             	test   %rax,%rax
 6ff:	74 3f                	je     740 <proc_reg_read+0x60>
 701:	5d                   	pop    %rbp
 702:	41 5c                	pop    %r12
 704:	e9 00 00 00 00       	jmpq   709 <proc_reg_read+0x29>
 709:	8b 45 00             	mov    0x0(%rbp),%eax
 70c:	85 c0                	test   %eax,%eax
 70e:	78 30                	js     740 <proc_reg_read+0x60>
 710:	44 8d 40 01          	lea    0x1(%rax),%r8d
 714:	f0 44 0f b1 45 00    	lock cmpxchg %r8d,0x0(%rbp)
 71a:	75 f0                	jne    70c <proc_reg_read+0x2c>
 71c:	48 8b 45 30          	mov    0x30(%rbp),%rax
 720:	48 8b 40 10          	mov    0x10(%rax),%rax
 724:	48 85 c0             	test   %rax,%rax
 727:	74 20                	je     749 <proc_reg_read+0x69>
 729:	e8 00 00 00 00       	callq  72e <proc_reg_read+0x4e>
 72e:	49 89 c4             	mov    %rax,%r12
 731:	48 89 ef             	mov    %rbp,%rdi
 734:	e8 f7 f9 ff ff       	callq  130 <unuse_pde>
 739:	4c 89 e0             	mov    %r12,%rax
 73c:	5d                   	pop    %rbp
 73d:	41 5c                	pop    %r12
 73f:	c3                   	retq   
 740:	49 c7 c4 fb ff ff ff 	mov    $0xfffffffffffffffb,%r12
 747:	eb f0                	jmp    739 <proc_reg_read+0x59>
 749:	49 c7 c4 fb ff ff ff 	mov    $0xfffffffffffffffb,%r12
 750:	eb df                	jmp    731 <proc_reg_read+0x51>
 752:	66 66 2e 0f 1f 84 00 	data16 nopw %cs:0x0(%rax,%rax,1)
 759:	00 00 00 00 
 75d:	0f 1f 00             	nopl   (%rax)


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ