lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Thu, 08 May 2008 11:24:59 +0800
From:	"Zhang, Yanmin" <yanmin_zhang@...ux.intel.com>
To:	Matthew Wilcox <matthew@....cx>
Cc:	Ingo Molnar <mingo@...e.hu>,
	"J. Bruce Fields" <bfields@...i.umich.edu>,
	LKML <linux-kernel@...r.kernel.org>,
	Alexander Viro <viro@....linux.org.uk>,
	Linus Torvalds <torvalds@...ux-foundation.org>,
	Andrew Morton <akpm@...ux-foundation.org>,
	linux-fsdevel@...r.kernel.org
Subject: Re: AIM7 40% regression with 2.6.26-rc1


On Tue, 2008-05-06 at 10:23 -0600, Matthew Wilcox wrote:
> On Tue, May 06, 2008 at 06:09:34AM -0600, Matthew Wilcox wrote:
> > So the only likely things I can see are:
> > 
> >  - file locks
> >  - fasync
> 
> I've wanted to fix file locks for a while.  Here's a first attempt.
> It was done quickly, so I concede that it may well have bugs in it.
> I found (and fixed) one with LTP.
> 
> It takes *no account* of nfsd, nor remote filesystems.  We need to have
> a serious discussion about their requirements.
I tested it on 8-core stoakley. aim7 result becomes 23% worse than the one of
pure 2.6.26-rc1.

I replied this email in case you have many patches and I might test what you don't
expect.

-yanmin

> 
> diff --git a/fs/locks.c b/fs/locks.c
> index 663c069..cb09765 100644
> --- a/fs/locks.c
> +++ b/fs/locks.c
> @@ -140,6 +140,8 @@ int lease_break_time = 45;
>  #define for_each_lock(inode, lockp) \
>  	for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next)
>  
> +static DEFINE_SPINLOCK(file_lock_lock);
> +
>  static LIST_HEAD(file_lock_list);
>  static LIST_HEAD(blocked_list);
>  
> @@ -510,9 +512,9 @@ static void __locks_delete_block(struct file_lock *waiter)
>   */
>  static void locks_delete_block(struct file_lock *waiter)
>  {
> -	lock_kernel();
> +	spin_lock(&file_lock_lock);
>  	__locks_delete_block(waiter);
> -	unlock_kernel();
> +	spin_unlock(&file_lock_lock);
>  }
>  
>  /* Insert waiter into blocker's block list.
> @@ -649,7 +651,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
>  {
>  	struct file_lock *cfl;
>  
> -	lock_kernel();
> +	spin_lock(&file_lock_lock);
>  	for (cfl = filp->f_path.dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) {
>  		if (!IS_POSIX(cfl))
>  			continue;
> @@ -662,7 +664,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
>  			fl->fl_pid = pid_vnr(cfl->fl_nspid);
>  	} else
>  		fl->fl_type = F_UNLCK;
> -	unlock_kernel();
> +	spin_unlock(&file_lock_lock);
>  	return;
>  }
>  EXPORT_SYMBOL(posix_test_lock);
> @@ -735,18 +737,21 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
>  	int error = 0;
>  	int found = 0;
>  
> -	lock_kernel();
> -	if (request->fl_flags & FL_ACCESS)
> +	if (request->fl_flags & FL_ACCESS) {
> +		spin_lock(&file_lock_lock);
>  		goto find_conflict;
> +	}
>  
>  	if (request->fl_type != F_UNLCK) {
>  		error = -ENOMEM;
> +		
>  		new_fl = locks_alloc_lock();
>  		if (new_fl == NULL)
> -			goto out;
> +			goto out_unlocked;
>  		error = 0;
>  	}
>  
> +	spin_lock(&file_lock_lock);
>  	for_each_lock(inode, before) {
>  		struct file_lock *fl = *before;
>  		if (IS_POSIX(fl))
> @@ -772,10 +777,13 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
>  	 * If a higher-priority process was blocked on the old file lock,
>  	 * give it the opportunity to lock the file.
>  	 */
> -	if (found)
> +	if (found) {
> +		spin_unlock(&file_lock_lock);
>  		cond_resched();
> +		spin_lock(&file_lock_lock);
> +	}
>  
> -find_conflict:
> + find_conflict:
>  	for_each_lock(inode, before) {
>  		struct file_lock *fl = *before;
>  		if (IS_POSIX(fl))
> @@ -796,8 +804,9 @@ find_conflict:
>  	new_fl = NULL;
>  	error = 0;
>  
> -out:
> -	unlock_kernel();
> + out:
> +	spin_unlock(&file_lock_lock);
> + out_unlocked:
>  	if (new_fl)
>  		locks_free_lock(new_fl);
>  	return error;
> @@ -826,7 +835,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
>  		new_fl2 = locks_alloc_lock();
>  	}
>  
> -	lock_kernel();
> +	spin_lock(&file_lock_lock);
>  	if (request->fl_type != F_UNLCK) {
>  		for_each_lock(inode, before) {
>  			fl = *before;
> @@ -994,7 +1003,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
>  		locks_wake_up_blocks(left);
>  	}
>   out:
> -	unlock_kernel();
> +	spin_unlock(&file_lock_lock);
>  	/*
>  	 * Free any unused locks.
>  	 */
> @@ -1069,14 +1078,14 @@ int locks_mandatory_locked(struct inode *inode)
>  	/*
>  	 * Search the lock list for this inode for any POSIX locks.
>  	 */
> -	lock_kernel();
> +	spin_lock(&file_lock_lock);
>  	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
>  		if (!IS_POSIX(fl))
>  			continue;
>  		if (fl->fl_owner != owner)
>  			break;
>  	}
> -	unlock_kernel();
> +	spin_unlock(&file_lock_lock);
>  	return fl ? -EAGAIN : 0;
>  }
>  
> @@ -1190,7 +1199,7 @@ int __break_lease(struct inode *inode, unsigned int mode)
>  
>  	new_fl = lease_alloc(NULL, mode & FMODE_WRITE ? F_WRLCK : F_RDLCK);
>  
> -	lock_kernel();
> +	spin_lock(&file_lock_lock);
>  
>  	time_out_leases(inode);
>  
> @@ -1251,8 +1260,10 @@ restart:
>  			break_time++;
>  	}
>  	locks_insert_block(flock, new_fl);
> +	spin_unlock(&file_lock_lock);
>  	error = wait_event_interruptible_timeout(new_fl->fl_wait,
>  						!new_fl->fl_next, break_time);
> +	spin_lock(&file_lock_lock);
>  	__locks_delete_block(new_fl);
>  	if (error >= 0) {
>  		if (error == 0)
> @@ -1266,8 +1277,8 @@ restart:
>  		error = 0;
>  	}
>  
> -out:
> -	unlock_kernel();
> + out:
> +	spin_unlock(&file_lock_lock);
>  	if (!IS_ERR(new_fl))
>  		locks_free_lock(new_fl);
>  	return error;
> @@ -1323,7 +1334,7 @@ int fcntl_getlease(struct file *filp)
>  	struct file_lock *fl;
>  	int type = F_UNLCK;
>  
> -	lock_kernel();
> +	spin_lock(&file_lock_lock);
>  	time_out_leases(filp->f_path.dentry->d_inode);
>  	for (fl = filp->f_path.dentry->d_inode->i_flock; fl && IS_LEASE(fl);
>  			fl = fl->fl_next) {
> @@ -1332,7 +1343,7 @@ int fcntl_getlease(struct file *filp)
>  			break;
>  		}
>  	}
> -	unlock_kernel();
> +	spin_unlock(&file_lock_lock);
>  	return type;
>  }
>  
> @@ -1363,6 +1374,7 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
>  	if (error)
>  		return error;
>  
> +	spin_lock(&file_lock_lock);
>  	time_out_leases(inode);
>  
>  	BUG_ON(!(*flp)->fl_lmops->fl_break);
> @@ -1370,10 +1382,11 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
>  	lease = *flp;
>  
>  	if (arg != F_UNLCK) {
> +		spin_unlock(&file_lock_lock);
>  		error = -ENOMEM;
>  		new_fl = locks_alloc_lock();
>  		if (new_fl == NULL)
> -			goto out;
> +			goto out_unlocked;
>  
>  		error = -EAGAIN;
>  		if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
> @@ -1382,6 +1395,7 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
>  		    && ((atomic_read(&dentry->d_count) > 1)
>  			|| (atomic_read(&inode->i_count) > 1)))
>  			goto out;
> +		spin_lock(&file_lock_lock);
>  	}
>  
>  	/*
> @@ -1429,11 +1443,14 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
>  
>  	locks_copy_lock(new_fl, lease);
>  	locks_insert_lock(before, new_fl);
> +	spin_unlock(&file_lock_lock);
>  
>  	*flp = new_fl;
>  	return 0;
>  
> -out:
> + out:
> +	spin_unlock(&file_lock_lock);
> + out_unlocked:
>  	if (new_fl != NULL)
>  		locks_free_lock(new_fl);
>  	return error;
> @@ -1471,12 +1488,10 @@ int vfs_setlease(struct file *filp, long arg, struct file_lock **lease)
>  {
>  	int error;
>  
> -	lock_kernel();
>  	if (filp->f_op && filp->f_op->setlease)
>  		error = filp->f_op->setlease(filp, arg, lease);
>  	else
>  		error = generic_setlease(filp, arg, lease);
> -	unlock_kernel();
>  
>  	return error;
>  }
> @@ -1503,12 +1518,11 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
>  	if (error)
>  		return error;
>  
> -	lock_kernel();
> -
>  	error = vfs_setlease(filp, arg, &flp);
>  	if (error || arg == F_UNLCK)
> -		goto out_unlock;
> +		return error;
>  
> +	lock_kernel();
>  	error = fasync_helper(fd, filp, 1, &flp->fl_fasync);
>  	if (error < 0) {
>  		/* remove lease just inserted by setlease */
> @@ -1519,7 +1533,7 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
>  	}
>  
>  	error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
> -out_unlock:
> + out_unlock:
>  	unlock_kernel();
>  	return error;
>  }
> @@ -2024,7 +2038,7 @@ void locks_remove_flock(struct file *filp)
>  			fl.fl_ops->fl_release_private(&fl);
>  	}
>  
> -	lock_kernel();
> +	spin_lock(&file_lock_lock);
>  	before = &inode->i_flock;
>  
>  	while ((fl = *before) != NULL) {
> @@ -2042,7 +2056,7 @@ void locks_remove_flock(struct file *filp)
>   		}
>  		before = &fl->fl_next;
>  	}
> -	unlock_kernel();
> +	spin_unlock(&file_lock_lock);
>  }
>  
>  /**
> @@ -2057,12 +2071,12 @@ posix_unblock_lock(struct file *filp, struct file_lock *waiter)
>  {
>  	int status = 0;
>  
> -	lock_kernel();
> +	spin_lock(&file_lock_lock);
>  	if (waiter->fl_next)
>  		__locks_delete_block(waiter);
>  	else
>  		status = -ENOENT;
> -	unlock_kernel();
> +	spin_unlock(&file_lock_lock);
>  	return status;
>  }
>  
> @@ -2175,7 +2189,7 @@ static int locks_show(struct seq_file *f, void *v)
>  
>  static void *locks_start(struct seq_file *f, loff_t *pos)
>  {
> -	lock_kernel();
> +	spin_lock(&file_lock_lock);
>  	f->private = (void *)1;
>  	return seq_list_start(&file_lock_list, *pos);
>  }
> @@ -2187,7 +2201,7 @@ static void *locks_next(struct seq_file *f, void *v, loff_t *pos)
>  
>  static void locks_stop(struct seq_file *f, void *v)
>  {
> -	unlock_kernel();
> +	spin_unlock(&file_lock_lock);
>  }
>  
>  struct seq_operations locks_seq_operations = {
> @@ -2215,7 +2229,7 @@ int lock_may_read(struct inode *inode, loff_t start, unsigned long len)
>  {
>  	struct file_lock *fl;
>  	int result = 1;
> -	lock_kernel();
> +	spin_lock(&file_lock_lock);
>  	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
>  		if (IS_POSIX(fl)) {
>  			if (fl->fl_type == F_RDLCK)
> @@ -2232,7 +2246,7 @@ int lock_may_read(struct inode *inode, loff_t start, unsigned long len)
>  		result = 0;
>  		break;
>  	}
> -	unlock_kernel();
> +	spin_unlock(&file_lock_lock);
>  	return result;
>  }
>  
> @@ -2255,7 +2269,7 @@ int lock_may_write(struct inode *inode, loff_t start, unsigned long len)
>  {
>  	struct file_lock *fl;
>  	int result = 1;
> -	lock_kernel();
> +	spin_lock(&file_lock_lock);
>  	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
>  		if (IS_POSIX(fl)) {
>  			if ((fl->fl_end < start) || (fl->fl_start > (start + len)))
> @@ -2270,7 +2284,7 @@ int lock_may_write(struct inode *inode, loff_t start, unsigned long len)
>  		result = 0;
>  		break;
>  	}
> -	unlock_kernel();
> +	spin_unlock(&file_lock_lock);
>  	return result;
>  }
>  
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ