[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20120208162925.GA19392@google.com>
Date: Wed, 8 Feb 2012 08:29:25 -0800
From: Tejun Heo <tj@...nel.org>
To: Shaohua Li <shaohua.li@...el.com>
Cc: Linus Torvalds <torvalds@...ux-foundation.org>,
Jens Axboe <axboe@...nel.dk>, Vivek Goyal <vgoyal@...hat.com>,
lkml <linux-kernel@...r.kernel.org>,
Knut Petersen <Knut_Petersen@...nline.de>, mroos@...ux.ee
Subject: Re: [PATCH] block: strip out locking optimization in
put_io_context()
Hello, Shaohua.
On Wed, Feb 08, 2012 at 04:29:53PM +0800, Shaohua Li wrote:
> > the test adds mem=4G in a 2 sockets 16 CPU machine.
> > just make several copy of kernel source in tmpfs (so there is swap
> > depending on your
> > memory size) and run kernel build in the kernel source in the meaning time.
> > there is only one swap device which is a rotating disk.
> >
> > I'll test both patches soon.
>
> Tried all the options, the regression still exists. Any new idea?
> I'll spend some time on it if I can get anything
Can you please try the following one? Thanks a lot!
---
block/blk-cgroup.c | 2
block/blk-core.c | 2
block/blk-ioc.c | 99 +++++++++++++---------------------------------
block/cfq-iosched.c | 2
fs/ioprio.c | 2
include/linux/iocontext.h | 8 +--
kernel/fork.c | 2
7 files changed, 37 insertions(+), 80 deletions(-)
Index: work/block/blk-ioc.c
===================================================================
--- work.orig/block/blk-ioc.c
+++ work/block/blk-ioc.c
@@ -8,9 +8,12 @@
#include <linux/blkdev.h>
#include <linux/bootmem.h> /* for max_pfn/max_low_pfn */
#include <linux/slab.h>
+#include <linux/rwlock.h>
#include "blk.h"
+static DEFINE_RWLOCK(ioc_clear_rwlock);
+
/*
* For io context allocations
*/
@@ -45,7 +48,7 @@ static void ioc_exit_icq(struct io_cq *i
struct request_queue *q = icq->q;
struct elevator_type *et = q->elevator->type;
- lockdep_assert_held(&ioc->lock);
+ //lockdep_assert_held(&ioc->lock);
lockdep_assert_held(q->queue_lock);
radix_tree_delete(&ioc->icq_tree, icq->q->id);
@@ -71,63 +74,6 @@ static void ioc_exit_icq(struct io_cq *i
call_rcu(&icq->__rcu_head, icq_free_icq_rcu);
}
-/*
- * Slow path for ioc release in put_io_context(). Performs double-lock
- * dancing to unlink all icq's and then frees ioc.
- */
-static void ioc_release_fn(struct work_struct *work)
-{
- struct io_context *ioc = container_of(work, struct io_context,
- release_work);
- struct request_queue *last_q = NULL;
-
- spin_lock_irq(&ioc->lock);
-
- while (!hlist_empty(&ioc->icq_list)) {
- struct io_cq *icq = hlist_entry(ioc->icq_list.first,
- struct io_cq, ioc_node);
- struct request_queue *this_q = icq->q;
-
- if (this_q != last_q) {
- /*
- * Need to switch to @this_q. Once we release
- * @ioc->lock, it can go away along with @cic.
- * Hold on to it.
- */
- __blk_get_queue(this_q);
-
- /*
- * blk_put_queue() might sleep thanks to kobject
- * idiocy. Always release both locks, put and
- * restart.
- */
- if (last_q) {
- spin_unlock(last_q->queue_lock);
- spin_unlock_irq(&ioc->lock);
- blk_put_queue(last_q);
- } else {
- spin_unlock_irq(&ioc->lock);
- }
-
- last_q = this_q;
- spin_lock_irq(this_q->queue_lock);
- spin_lock(&ioc->lock);
- continue;
- }
- ioc_exit_icq(icq);
- }
-
- if (last_q) {
- spin_unlock(last_q->queue_lock);
- spin_unlock_irq(&ioc->lock);
- blk_put_queue(last_q);
- } else {
- spin_unlock_irq(&ioc->lock);
- }
-
- kmem_cache_free(iocontext_cachep, ioc);
-}
-
/**
* put_io_context - put a reference of io_context
* @ioc: io_context to put
@@ -135,7 +81,7 @@ static void ioc_release_fn(struct work_s
* Decrement reference count of @ioc and release it if the count reaches
* zero.
*/
-void put_io_context(struct io_context *ioc)
+void put_io_context(struct io_context *ioc, struct request_queue *locked_q)
{
unsigned long flags;
@@ -144,16 +90,26 @@ void put_io_context(struct io_context *i
BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
- /*
- * Releasing ioc requires reverse order double locking and we may
- * already be holding a queue_lock. Do it asynchronously from wq.
- */
- if (atomic_long_dec_and_test(&ioc->refcount)) {
- spin_lock_irqsave(&ioc->lock, flags);
- if (!hlist_empty(&ioc->icq_list))
- schedule_work(&ioc->release_work);
- spin_unlock_irqrestore(&ioc->lock, flags);
+ if (!atomic_long_dec_and_test(&ioc->refcount))
+ return;
+
+ read_lock_irqsave(&ioc_clear_rwlock, flags);
+
+ while (!hlist_empty(&ioc->icq_list)) {
+ struct io_cq *icq = hlist_entry(ioc->icq_list.first,
+ struct io_cq, ioc_node);
+ struct request_queue *q = icq->q;
+
+ if (q != locked_q)
+ spin_lock_nested(q->queue_lock, 1);
+
+ ioc_exit_icq(icq);
+
+ if (q != locked_q)
+ spin_unlock(q->queue_lock);
}
+
+ read_unlock_irqrestore(&ioc_clear_rwlock, flags);
}
EXPORT_SYMBOL(put_io_context);
@@ -168,7 +124,7 @@ void exit_io_context(struct task_struct
task_unlock(task);
atomic_dec(&ioc->nr_tasks);
- put_io_context(ioc);
+ put_io_context(ioc, NULL);
}
/**
@@ -181,6 +137,8 @@ void ioc_clear_queue(struct request_queu
{
lockdep_assert_held(q->queue_lock);
+ write_lock(&ioc_clear_rwlock);
+
while (!list_empty(&q->icq_list)) {
struct io_cq *icq = list_entry(q->icq_list.next,
struct io_cq, q_node);
@@ -190,6 +148,8 @@ void ioc_clear_queue(struct request_queu
ioc_exit_icq(icq);
spin_unlock(&ioc->lock);
}
+
+ write_unlock(&ioc_clear_rwlock);
}
void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_flags,
@@ -208,7 +168,6 @@ void create_io_context_slowpath(struct t
spin_lock_init(&ioc->lock);
INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC | __GFP_HIGH);
INIT_HLIST_HEAD(&ioc->icq_list);
- INIT_WORK(&ioc->release_work, ioc_release_fn);
/*
* Try to install. ioc shouldn't be installed if someone else
Index: work/include/linux/iocontext.h
===================================================================
--- work.orig/include/linux/iocontext.h
+++ work/include/linux/iocontext.h
@@ -3,7 +3,6 @@
#include <linux/radix-tree.h>
#include <linux/rcupdate.h>
-#include <linux/workqueue.h>
enum {
ICQ_IOPRIO_CHANGED,
@@ -113,8 +112,6 @@ struct io_context {
struct radix_tree_root icq_tree;
struct io_cq __rcu *icq_hint;
struct hlist_head icq_list;
-
- struct work_struct release_work;
};
static inline struct io_context *ioc_task_link(struct io_context *ioc)
@@ -133,7 +130,7 @@ static inline struct io_context *ioc_tas
struct task_struct;
#ifdef CONFIG_BLOCK
-void put_io_context(struct io_context *ioc);
+void put_io_context(struct io_context *ioc, struct request_queue *locked_q);
void exit_io_context(struct task_struct *task);
struct io_context *get_task_io_context(struct task_struct *task,
gfp_t gfp_flags, int node);
@@ -141,7 +138,8 @@ void ioc_ioprio_changed(struct io_contex
void ioc_cgroup_changed(struct io_context *ioc);
#else
struct io_context;
-static inline void put_io_context(struct io_context *ioc) { }
+static inline void put_io_context(struct io_context *ioc,
+ struct request_queue *locked_q) { }
static inline void exit_io_context(struct task_struct *task) { }
#endif
Index: work/block/blk-cgroup.c
===================================================================
--- work.orig/block/blk-cgroup.c
+++ work/block/blk-cgroup.c
@@ -1659,7 +1659,7 @@ static void blkiocg_attach(struct cgroup
ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
if (ioc) {
ioc_cgroup_changed(ioc);
- put_io_context(ioc);
+ put_io_context(ioc, NULL);
}
}
}
Index: work/block/blk-core.c
===================================================================
--- work.orig/block/blk-core.c
+++ work/block/blk-core.c
@@ -642,7 +642,7 @@ static inline void blk_free_request(stru
if (rq->cmd_flags & REQ_ELVPRIV) {
elv_put_request(q, rq);
if (rq->elv.icq)
- put_io_context(rq->elv.icq->ioc);
+ put_io_context(rq->elv.icq->ioc, q);
}
mempool_free(rq, q->rq.rq_pool);
Index: work/block/cfq-iosched.c
===================================================================
--- work.orig/block/cfq-iosched.c
+++ work/block/cfq-iosched.c
@@ -1787,7 +1787,7 @@ __cfq_slice_expired(struct cfq_data *cfq
cfqd->active_queue = NULL;
if (cfqd->active_cic) {
- put_io_context(cfqd->active_cic->icq.ioc);
+ put_io_context(cfqd->active_cic->icq.ioc, cfqd->queue);
cfqd->active_cic = NULL;
}
}
Index: work/fs/ioprio.c
===================================================================
--- work.orig/fs/ioprio.c
+++ work/fs/ioprio.c
@@ -51,7 +51,7 @@ int set_task_ioprio(struct task_struct *
ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
if (ioc) {
ioc_ioprio_changed(ioc, ioprio);
- put_io_context(ioc);
+ put_io_context(ioc, NULL);
}
return err;
Index: work/kernel/fork.c
===================================================================
--- work.orig/kernel/fork.c
+++ work/kernel/fork.c
@@ -890,7 +890,7 @@ static int copy_io(unsigned long clone_f
return -ENOMEM;
new_ioc->ioprio = ioc->ioprio;
- put_io_context(new_ioc);
+ put_io_context(new_ioc, NULL);
}
#endif
return 0;
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists