[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Pine.LNX.4.64.1207281242000.30415@file.rdu.redhat.com>
Date: Sat, 28 Jul 2012 12:42:34 -0400 (EDT)
From: Mikulas Patocka <mpatocka@...hat.com>
To: Jeff Moyer <jmoyer@...hat.com>
cc: Jan Kara <jack@...e.cz>, Alexander Viro <viro@...iv.linux.org.uk>,
Jens Axboe <axboe@...nel.dk>,
"Alasdair G. Kergon" <agk@...hat.com>,
linux-kernel@...r.kernel.org, linux-fsdevel@...r.kernel.org,
dm-devel@...hat.com, lwoodman@...hat.com,
Andrea Arcangeli <aarcange@...hat.com>,
kosaki.motohiro@...fujitsu.com
Subject: [PATCH 3/3] blockdev: turn a rw semaphore into a percpu rw semaphore
blockdev: turn a rw semaphore into a percpu rw semaphore
This avoids cache line bouncing when many processes lock the semaphore
for read.
Partially based on a patch by Jeff Moyer <jmoyer@...hat.com>.
Signed-off-by: Mikulas Patocka <mpatocka@...hat.com>
---
fs/block_dev.c | 30 ++++++++++++++++++++----------
include/linux/fs.h | 3 ++-
2 files changed, 22 insertions(+), 11 deletions(-)
Index: linux-3.5-fast/fs/block_dev.c
===================================================================
--- linux-3.5-fast.orig/fs/block_dev.c 2012-07-28 18:32:10.000000000 +0200
+++ linux-3.5-fast/fs/block_dev.c 2012-07-28 18:32:12.000000000 +0200
@@ -127,7 +127,7 @@ int set_blocksize(struct block_device *b
return -EINVAL;
/* Prevent starting I/O or mapping the device */
- down_write(&bdev->bd_block_size_semaphore);
+ percpu_down_write(&bdev->bd_block_size_semaphore);
/* Check that the block device is not memory mapped */
mapping = bdev->bd_inode->i_mapping;
@@ -135,7 +135,7 @@ int set_blocksize(struct block_device *b
if (!prio_tree_empty(&mapping->i_mmap) ||
!list_empty(&mapping->i_mmap_nonlinear)) {
mutex_unlock(&mapping->i_mmap_mutex);
- up_write(&bdev->bd_block_size_semaphore);
+ percpu_up_write(&bdev->bd_block_size_semaphore);
return -EBUSY;
}
mutex_unlock(&mapping->i_mmap_mutex);
@@ -148,7 +148,7 @@ int set_blocksize(struct block_device *b
kill_bdev(bdev);
}
- up_write(&bdev->bd_block_size_semaphore);
+ percpu_up_write(&bdev->bd_block_size_semaphore);
return 0;
}
@@ -460,6 +460,12 @@ static struct inode *bdev_alloc_inode(st
struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
if (!ei)
return NULL;
+
+ if (unlikely(percpu_init_rwsem(&ei->bdev.bd_block_size_semaphore))) {
+ kmem_cache_free(bdev_cachep, ei);
+ return NULL;
+ }
+
return &ei->vfs_inode;
}
@@ -468,6 +474,8 @@ static void bdev_i_callback(struct rcu_h
struct inode *inode = container_of(head, struct inode, i_rcu);
struct bdev_inode *bdi = BDEV_I(inode);
+ percpu_free_rwsem(&bdi->bdev.bd_block_size_semaphore);
+
kmem_cache_free(bdev_cachep, bdi);
}
@@ -491,7 +499,6 @@ static void init_once(void *foo)
inode_init_once(&ei->vfs_inode);
/* Initialize mutex for freeze. */
mutex_init(&bdev->bd_fsfreeze_mutex);
- init_rwsem(&bdev->bd_block_size_semaphore);
}
static inline void __bd_forget(struct inode *inode)
@@ -1592,12 +1599,13 @@ ssize_t blkdev_aio_read(struct kiocb *io
{
ssize_t ret;
struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
+ percpu_rwsem_ptr p;
- down_read(&bdev->bd_block_size_semaphore);
+ p = percpu_down_read(&bdev->bd_block_size_semaphore);
ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
- up_read(&bdev->bd_block_size_semaphore);
+ percpu_up_read(&bdev->bd_block_size_semaphore, p);
return ret;
}
@@ -1616,10 +1624,11 @@ ssize_t blkdev_aio_write(struct kiocb *i
struct file *file = iocb->ki_filp;
struct block_device *bdev = I_BDEV(file->f_mapping->host);
ssize_t ret;
+ percpu_rwsem_ptr p;
BUG_ON(iocb->ki_pos != pos);
- down_read(&bdev->bd_block_size_semaphore);
+ p = percpu_down_read(&bdev->bd_block_size_semaphore);
ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
if (ret > 0 || ret == -EIOCBQUEUED) {
@@ -1630,7 +1639,7 @@ ssize_t blkdev_aio_write(struct kiocb *i
ret = err;
}
- up_read(&bdev->bd_block_size_semaphore);
+ percpu_up_read(&bdev->bd_block_size_semaphore, p);
return ret;
}
@@ -1640,12 +1649,13 @@ int blkdev_mmap(struct file *file, struc
{
int ret;
struct block_device *bdev = I_BDEV(file->f_mapping->host);
+ percpu_rwsem_ptr p;
- down_read(&bdev->bd_block_size_semaphore);
+ p = percpu_down_read(&bdev->bd_block_size_semaphore);
ret = generic_file_mmap(file, vma);
- up_read(&bdev->bd_block_size_semaphore);
+ percpu_up_read(&bdev->bd_block_size_semaphore, p);
return ret;
}
Index: linux-3.5-fast/include/linux/fs.h
===================================================================
--- linux-3.5-fast.orig/include/linux/fs.h 2012-07-28 18:32:10.000000000 +0200
+++ linux-3.5-fast/include/linux/fs.h 2012-07-28 18:32:12.000000000 +0200
@@ -10,6 +10,7 @@
#include <linux/ioctl.h>
#include <linux/blk_types.h>
#include <linux/types.h>
+#include <linux/percpu-rwsem.h>
/*
* It's silly to have NR_OPEN bigger than NR_FILE, but you can change
@@ -714,7 +715,7 @@ struct block_device {
/* Mutex for freeze */
struct mutex bd_fsfreeze_mutex;
/* A semaphore that prevents I/O while block size is being changed */
- struct rw_semaphore bd_block_size_semaphore;
+ struct percpu_rw_semaphore bd_block_size_semaphore;
};
/*
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists