[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4C65B285.4050800@ds.jp.nec.com>
Date: Fri, 13 Aug 2010 17:00:53 -0400
From: Munehiro Ikeda <m-ikeda@...jp.nec.com>
To: Nauman Rafique <nauman@...gle.com>
CC: linux-kernel@...r.kernel.org, Vivek Goyal <vgoyal@...hat.com>,
Ryo Tsuruta <ryov@...inux.co.jp>, taka@...inux.co.jp,
kamezawa.hiroyu@...fujitsu.com,
Andrea Righi <righi.andrea@...il.com>,
Gui Jianfeng <guijianfeng@...fujitsu.com>,
akpm@...ux-foundation.org, balbir@...ux.vnet.ibm.com
Subject: Re: [RFC][PATCH 10/11] blkiocg async: Async queue per cfq_group
Nauman Rafique wrote, on 08/12/2010 09:24 PM:
> Muuhh,
> I do not understand the motivation behind making cfq_io_context per
> cgroup. We can extract cgroup id from bio, use that to lookup cgroup
> and its async queues. What am I missing?
What cgroup ID can suggest is only cgroup to which the thread belongs,
but not the thread itself. This means that IO priority and IO prio-class
can't be determined by cgroup ID.
The pointers of async queues are held in cfqg->async_cfqq[class][prio].
It is impossible to find out which queue is appropriate by only cgroup
ID if considering IO priority.
Frankly speaking, I'm not 100% confident if IO priority should be
applied on async write IOs, but anyway, I made up my mind to keep the
current scheme.
Do I make sense? If you have any other idea, I am glad to hear.
Thanks,
Muuhh
>
> On Thu, Jul 8, 2010 at 8:22 PM, Munehiro Ikeda<m-ikeda@...jp.nec.com> wrote:
>> This is the main body for async capability of blkio controller.
>> The basic ideas are
>> - To move async queues from cfq_data to cfq_group, and
>> - To associate cfq_io_context with cfq_group
>>
>> Each cfq_io_context, which was created per an io_context
>> per a device, is now created per an io_context per a cfq_group.
>> Each cfq_group is created per a cgroup per a device, so
>> cfq_io_context is now resulted in to be created per
>> an io_context per a cgroup per a device.
>>
>> To protect link between cfq_io_context and cfq_group,
>> locking code is modified in several parts.
>>
>> ToDo:
>> - Lock protection still might be imperfect and more investigation
>> is needed.
>>
>> - cic_index of root cfq_group (cfqd->root_group.cic_index) is not
>> removed and lasts beyond elevator switching.
>> This issues should be solved.
>>
>> Signed-off-by: Munehiro "Muuhh" Ikeda<m-ikeda@...jp.nec.com>
>> ---
>> block/cfq-iosched.c | 277 ++++++++++++++++++++++++++++-----------------
>> include/linux/iocontext.h | 2 +-
>> 2 files changed, 175 insertions(+), 104 deletions(-)
>>
>> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
>> index 68f76e9..4186c30 100644
>> --- a/block/cfq-iosched.c
>> +++ b/block/cfq-iosched.c
>> @@ -191,10 +191,23 @@ struct cfq_group {
>> struct cfq_rb_root service_trees[2][3];
>> struct cfq_rb_root service_tree_idle;
>>
>> + /*
>> + * async queue for each priority case
>> + */
>> + struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR];
>> + struct cfq_queue *async_idle_cfqq;
>> +
>> unsigned long saved_workload_slice;
>> enum wl_type_t saved_workload;
>> enum wl_prio_t saved_serving_prio;
>> struct blkio_group blkg;
>> + struct cfq_data *cfqd;
>> +
>> + /* lock for cic_list */
>> + spinlock_t lock;
>> + unsigned int cic_index;
>> + struct list_head cic_list;
>> +
>> #ifdef CONFIG_CFQ_GROUP_IOSCHED
>> struct hlist_node cfqd_node;
>> atomic_t ref;
>> @@ -254,12 +267,6 @@ struct cfq_data {
>> struct cfq_queue *active_queue;
>> struct cfq_io_context *active_cic;
>>
>> - /*
>> - * async queue for each priority case
>> - */
>> - struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR];
>> - struct cfq_queue *async_idle_cfqq;
>> -
>> sector_t last_position;
>>
>> /*
>> @@ -275,8 +282,6 @@ struct cfq_data {
>> unsigned int cfq_latency;
>> unsigned int cfq_group_isolation;
>>
>> - unsigned int cic_index;
>> - struct list_head cic_list;
>>
>> /*
>> * Fallback dummy cfqq for extreme OOM conditions
>> @@ -418,10 +423,16 @@ static inline int cfqg_busy_async_queues(struct cfq_data *cfqd,
>> }
>>
>> static void cfq_dispatch_insert(struct request_queue *, struct request *);
>> +static void __cfq_exit_single_io_context(struct cfq_data *,
>> + struct cfq_group *,
>> + struct cfq_io_context *);
>> static struct cfq_queue *cfq_get_queue(struct cfq_data *, bool,
>> - struct io_context *, gfp_t);
>> + struct cfq_io_context *, gfp_t);
>> static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *,
>> - struct io_context *);
>> + struct cfq_group *,
>> + struct io_context *);
>> +static void cfq_put_async_queues(struct cfq_group *);
>> +static int cfq_alloc_cic_index(void);
>>
>> static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic,
>> bool is_sync)
>> @@ -438,17 +449,28 @@ static inline void cic_set_cfqq(struct cfq_io_context *cic,
>> #define CIC_DEAD_KEY 1ul
>> #define CIC_DEAD_INDEX_SHIFT 1
>>
>> -static inline void *cfqd_dead_key(struct cfq_data *cfqd)
>> +static inline void *cfqg_dead_key(struct cfq_group *cfqg)
>> {
>> - return (void *)(cfqd->cic_index<< CIC_DEAD_INDEX_SHIFT | CIC_DEAD_KEY);
>> + return (void *)(cfqg->cic_index<< CIC_DEAD_INDEX_SHIFT | CIC_DEAD_KEY);
>> +}
>> +
>> +static inline struct cfq_group *cic_to_cfqg(struct cfq_io_context *cic)
>> +{
>> + struct cfq_group *cfqg = cic->key;
>> +
>> + if (unlikely((unsigned long) cfqg& CIC_DEAD_KEY))
>> + cfqg = NULL;
>> +
>> + return cfqg;
>> }
>>
>> static inline struct cfq_data *cic_to_cfqd(struct cfq_io_context *cic)
>> {
>> - struct cfq_data *cfqd = cic->key;
>> + struct cfq_data *cfqd = NULL;
>> + struct cfq_group *cfqg = cic_to_cfqg(cic);
>>
>> - if (unlikely((unsigned long) cfqd& CIC_DEAD_KEY))
>> - return NULL;
>> + if (likely(cfqg))
>> + cfqd = cfqg->cfqd;
>>
>> return cfqd;
>> }
>> @@ -959,12 +981,12 @@ cfq_update_blkio_group_weight(struct blkio_group *blkg, unsigned int weight)
>> }
>>
>> static struct cfq_group *
>> -cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
>> +cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct blkio_cgroup *blkcg,
>> + int create)
>> {
>> - struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
>> struct cfq_group *cfqg = NULL;
>> void *key = cfqd;
>> - int i, j;
>> + int i, j, idx;
>> struct cfq_rb_root *st;
>> struct backing_dev_info *bdi =&cfqd->queue->backing_dev_info;
>> unsigned int major, minor;
>> @@ -978,14 +1000,21 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
>> if (cfqg || !create)
>> goto done;
>>
>> + idx = cfq_alloc_cic_index();
>> + if (idx< 0)
>> + goto done;
>> +
>> cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, cfqd->queue->node);
>> if (!cfqg)
>> - goto done;
>> + goto err;
>>
>> for_each_cfqg_st(cfqg, i, j, st)
>> *st = CFQ_RB_ROOT;
>> RB_CLEAR_NODE(&cfqg->rb_node);
>>
>> + spin_lock_init(&cfqg->lock);
>> + INIT_LIST_HEAD(&cfqg->cic_list);
>> +
>> /*
>> * Take the initial reference that will be released on destroy
>> * This can be thought of a joint reference by cgroup and
>> @@ -1002,7 +1031,14 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
>>
>> /* Add group on cfqd list */
>> hlist_add_head(&cfqg->cfqd_node,&cfqd->cfqg_list);
>> + cfqg->cfqd = cfqd;
>> + cfqg->cic_index = idx;
>> + goto done;
>>
>> +err:
>> + spin_lock(&cic_index_lock);
>> + ida_remove(&cic_index_ida, idx);
>> + spin_unlock(&cic_index_lock);
>> done:
>> return cfqg;
>> }
>> @@ -1095,10 +1131,6 @@ static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg)
>>
>> static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
>> {
>> - /* Currently, all async queues are mapped to root group */
>> - if (!cfq_cfqq_sync(cfqq))
>> - cfqg =&cfqq->cfqd->root_group;
>> -
>> cfqq->cfqg = cfqg;
>> /* cfqq reference on cfqg */
>> atomic_inc(&cfqq->cfqg->ref);
>> @@ -1114,6 +1146,16 @@ static void cfq_put_cfqg(struct cfq_group *cfqg)
>> return;
>> for_each_cfqg_st(cfqg, i, j, st)
>> BUG_ON(!RB_EMPTY_ROOT(&st->rb) || st->active != NULL);
>> +
>> + /**
>> + * ToDo:
>> + * root_group never reaches here and cic_index is never
>> + * removed. Unused cic_index lasts by elevator switching.
>> + */
>> + spin_lock(&cic_index_lock);
>> + ida_remove(&cic_index_ida, cfqg->cic_index);
>> + spin_unlock(&cic_index_lock);
>> +
>> kfree(cfqg);
>> }
>>
>> @@ -1122,6 +1164,15 @@ static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
>> /* Something wrong if we are trying to remove same group twice */
>> BUG_ON(hlist_unhashed(&cfqg->cfqd_node));
>>
>> + while (!list_empty(&cfqg->cic_list)) {
>> + struct cfq_io_context *cic = list_entry(cfqg->cic_list.next,
>> + struct cfq_io_context,
>> + group_list);
>> +
>> + __cfq_exit_single_io_context(cfqd, cfqg, cic);
>> + }
>> +
>> + cfq_put_async_queues(cfqg);
>> hlist_del_init(&cfqg->cfqd_node);
>>
>> /*
>> @@ -1497,10 +1548,12 @@ static struct request *
>> cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio)
>> {
>> struct task_struct *tsk = current;
>> + struct cfq_group *cfqg;
>> struct cfq_io_context *cic;
>> struct cfq_queue *cfqq;
>>
>> - cic = cfq_cic_lookup(cfqd, tsk->io_context);
>> + cfqg = cfq_get_cfqg(cfqd, bio, 0);
>> + cic = cfq_cic_lookup(cfqd, cfqg, tsk->io_context);
>> if (!cic)
>> return NULL;
>>
>> @@ -1611,6 +1664,7 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
>> struct bio *bio)
>> {
>> struct cfq_data *cfqd = q->elevator->elevator_data;
>> + struct cfq_group *cfqg;
>> struct cfq_io_context *cic;
>> struct cfq_queue *cfqq;
>>
>> @@ -1624,7 +1678,8 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
>> * Lookup the cfqq that this bio will be queued with. Allow
>> * merge only if rq is queued there.
>> */
>> - cic = cfq_cic_lookup(cfqd, current->io_context);
>> + cfqg = cfq_get_cfqg(cfqd, bio, 0);
>> + cic = cfq_cic_lookup(cfqd, cfqg, current->io_context);
>> if (!cic)
>> return false;
>>
>> @@ -2667,17 +2722,18 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
>> }
>>
>> static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
>> + struct cfq_group *cfqg,
>> struct cfq_io_context *cic)
>> {
>> struct io_context *ioc = cic->ioc;
>>
>> - list_del_init(&cic->queue_list);
>> + list_del_init(&cic->group_list);
>>
>> /*
>> * Make sure dead mark is seen for dead queues
>> */
>> smp_wmb();
>> - cic->key = cfqd_dead_key(cfqd);
>> + cic->key = cfqg_dead_key(cfqg);
>>
>> if (ioc->ioc_data == cic)
>> rcu_assign_pointer(ioc->ioc_data, NULL);
>> @@ -2696,23 +2752,23 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
>> static void cfq_exit_single_io_context(struct io_context *ioc,
>> struct cfq_io_context *cic)
>> {
>> - struct cfq_data *cfqd = cic_to_cfqd(cic);
>> + struct cfq_group *cfqg = cic_to_cfqg(cic);
>>
>> - if (cfqd) {
>> - struct request_queue *q = cfqd->queue;
>> + if (cfqg) {
>> + struct cfq_data *cfqd = cfqg->cfqd;
>> unsigned long flags;
>>
>> - spin_lock_irqsave(q->queue_lock, flags);
>> + spin_lock_irqsave(&cfqg->lock, flags);
>>
>> /*
>> * Ensure we get a fresh copy of the ->key to prevent
>> * race between exiting task and queue
>> */
>> smp_read_barrier_depends();
>> - if (cic->key == cfqd)
>> - __cfq_exit_single_io_context(cfqd, cic);
>> + if (cic->key == cfqg)
>> + __cfq_exit_single_io_context(cfqd, cfqg, cic);
>>
>> - spin_unlock_irqrestore(q->queue_lock, flags);
>> + spin_unlock_irqrestore(&cfqg->lock, flags);
>> }
>> }
>>
>> @@ -2734,7 +2790,7 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
>> cfqd->queue->node);
>> if (cic) {
>> cic->last_end_request = jiffies;
>> - INIT_LIST_HEAD(&cic->queue_list);
>> + INIT_LIST_HEAD(&cic->group_list);
>> INIT_HLIST_NODE(&cic->cic_list);
>> cic->dtor = cfq_free_io_context;
>> cic->exit = cfq_exit_io_context;
>> @@ -2801,8 +2857,7 @@ static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic)
>> cfqq = cic->cfqq[BLK_RW_ASYNC];
>> if (cfqq) {
>> struct cfq_queue *new_cfqq;
>> - new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic->ioc,
>> - GFP_ATOMIC);
>> + new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic, GFP_ATOMIC);
>> if (new_cfqq) {
>> cic->cfqq[BLK_RW_ASYNC] = new_cfqq;
>> cfq_put_queue(cfqq);
>> @@ -2879,16 +2934,14 @@ static void cfq_ioc_set_cgroup(struct io_context *ioc)
>>
>> static struct cfq_queue *
>> cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync,
>> - struct io_context *ioc, gfp_t gfp_mask)
>> + struct cfq_io_context *cic, gfp_t gfp_mask)
>> {
>> struct cfq_queue *cfqq, *new_cfqq = NULL;
>> - struct cfq_io_context *cic;
>> + struct io_context *ioc = cic->ioc;
>> struct cfq_group *cfqg;
>>
>> retry:
>> - cfqg = cfq_get_cfqg(cfqd, NULL, 1);
>> - cic = cfq_cic_lookup(cfqd, ioc);
>> - /* cic always exists here */
>> + cfqg = cic_to_cfqg(cic);
>> cfqq = cic_to_cfqq(cic, is_sync);
>>
>> /*
>> @@ -2930,36 +2983,38 @@ retry:
>> }
>>
>> static struct cfq_queue **
>> -cfq_async_queue_prio(struct cfq_data *cfqd, int ioprio_class, int ioprio)
>> +cfq_async_queue_prio(struct cfq_group *cfqg, int ioprio_class, int ioprio)
>> {
>> switch (ioprio_class) {
>> case IOPRIO_CLASS_RT:
>> - return&cfqd->async_cfqq[0][ioprio];
>> + return&cfqg->async_cfqq[0][ioprio];
>> case IOPRIO_CLASS_BE:
>> - return&cfqd->async_cfqq[1][ioprio];
>> + return&cfqg->async_cfqq[1][ioprio];
>> case IOPRIO_CLASS_IDLE:
>> - return&cfqd->async_idle_cfqq;
>> + return&cfqg->async_idle_cfqq;
>> default:
>> BUG();
>> }
>> }
>>
>> static struct cfq_queue *
>> -cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc,
>> +cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_context *cic,
>> gfp_t gfp_mask)
>> {
>> + struct io_context *ioc = cic->ioc;
>> const int ioprio = task_ioprio(ioc);
>> const int ioprio_class = task_ioprio_class(ioc);
>> + struct cfq_group *cfqg = cic_to_cfqg(cic);
>> struct cfq_queue **async_cfqq = NULL;
>> struct cfq_queue *cfqq = NULL;
>>
>> if (!is_sync) {
>> - async_cfqq = cfq_async_queue_prio(cfqd, ioprio_class, ioprio);
>> + async_cfqq = cfq_async_queue_prio(cfqg, ioprio_class, ioprio);
>> cfqq = *async_cfqq;
>> }
>>
>> if (!cfqq)
>> - cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask);
>> + cfqq = cfq_find_alloc_queue(cfqd, is_sync, cic, gfp_mask);
>>
>> /*
>> * pin the queue now that it's allocated, scheduler exit will prune it
>> @@ -2977,19 +3032,19 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc,
>> * We drop cfq io contexts lazily, so we may find a dead one.
>> */
>> static void
>> -cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc,
>> - struct cfq_io_context *cic)
>> +cfq_drop_dead_cic(struct cfq_data *cfqd, struct cfq_group *cfqg,
>> + struct io_context *ioc, struct cfq_io_context *cic)
>> {
>> unsigned long flags;
>>
>> - WARN_ON(!list_empty(&cic->queue_list));
>> - BUG_ON(cic->key != cfqd_dead_key(cfqd));
>> + WARN_ON(!list_empty(&cic->group_list));
>> + BUG_ON(cic->key != cfqg_dead_key(cfqg));
>>
>> spin_lock_irqsave(&ioc->lock, flags);
>>
>> BUG_ON(ioc->ioc_data == cic);
>>
>> - radix_tree_delete(&ioc->radix_root, cfqd->cic_index);
>> + radix_tree_delete(&ioc->radix_root, cfqg->cic_index);
>> hlist_del_rcu(&cic->cic_list);
>> spin_unlock_irqrestore(&ioc->lock, flags);
>>
>> @@ -2997,11 +3052,14 @@ cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc,
>> }
>>
>> static struct cfq_io_context *
>> -cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
>> +cfq_cic_lookup(struct cfq_data *cfqd, struct cfq_group *cfqg,
>> + struct io_context *ioc)
>> {
>> struct cfq_io_context *cic;
>> unsigned long flags;
>>
>> + if (!cfqg)
>> + return NULL;
>> if (unlikely(!ioc))
>> return NULL;
>>
>> @@ -3011,18 +3069,18 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
>> * we maintain a last-hit cache, to avoid browsing over the tree
>> */
>> cic = rcu_dereference(ioc->ioc_data);
>> - if (cic&& cic->key == cfqd) {
>> + if (cic&& cic->key == cfqg) {
>> rcu_read_unlock();
>> return cic;
>> }
>>
>> do {
>> - cic = radix_tree_lookup(&ioc->radix_root, cfqd->cic_index);
>> + cic = radix_tree_lookup(&ioc->radix_root, cfqg->cic_index);
>> rcu_read_unlock();
>> if (!cic)
>> break;
>> - if (unlikely(cic->key != cfqd)) {
>> - cfq_drop_dead_cic(cfqd, ioc, cic);
>> + if (unlikely(cic->key != cfqg)) {
>> + cfq_drop_dead_cic(cfqd, cfqg, ioc, cic);
>> rcu_read_lock();
>> continue;
>> }
>> @@ -3037,24 +3095,25 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
>> }
>>
>> /*
>> - * Add cic into ioc, using cfqd as the search key. This enables us to lookup
>> + * Add cic into ioc, using cfqg as the search key. This enables us to lookup
>> * the process specific cfq io context when entered from the block layer.
>> - * Also adds the cic to a per-cfqd list, used when this queue is removed.
>> + * Also adds the cic to a per-cfqg list, used when the group is removed.
>> + * request_queue lock must be held.
>> */
>> -static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
>> - struct cfq_io_context *cic, gfp_t gfp_mask)
>> +static int cfq_cic_link(struct cfq_data *cfqd, struct cfq_group *cfqg,
>> + struct io_context *ioc, struct cfq_io_context *cic)
>> {
>> unsigned long flags;
>> int ret;
>>
>> - ret = radix_tree_preload(gfp_mask);
>> + ret = radix_tree_preload(GFP_ATOMIC);
>> if (!ret) {
>> cic->ioc = ioc;
>> - cic->key = cfqd;
>> + cic->key = cfqg;
>>
>> spin_lock_irqsave(&ioc->lock, flags);
>> ret = radix_tree_insert(&ioc->radix_root,
>> - cfqd->cic_index, cic);
>> + cfqg->cic_index, cic);
>> if (!ret)
>> hlist_add_head_rcu(&cic->cic_list,&ioc->cic_list);
>> spin_unlock_irqrestore(&ioc->lock, flags);
>> @@ -3062,9 +3121,9 @@ static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
>> radix_tree_preload_end();
>>
>> if (!ret) {
>> - spin_lock_irqsave(cfqd->queue->queue_lock, flags);
>> - list_add(&cic->queue_list,&cfqd->cic_list);
>> - spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
>> + spin_lock_irqsave(&cfqg->lock, flags);
>> + list_add(&cic->group_list,&cfqg->cic_list);
>> + spin_unlock_irqrestore(&cfqg->lock, flags);
>> }
>> }
>>
>> @@ -3080,10 +3139,12 @@ static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
>> * than one device managed by cfq.
>> */
>> static struct cfq_io_context *
>> -cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
>> +cfq_get_io_context(struct cfq_data *cfqd, struct bio *bio, gfp_t gfp_mask)
>> {
>> struct io_context *ioc = NULL;
>> struct cfq_io_context *cic;
>> + struct cfq_group *cfqg, *cfqg2;
>> + unsigned long flags;
>>
>> might_sleep_if(gfp_mask& __GFP_WAIT);
>>
>> @@ -3091,18 +3152,38 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
>> if (!ioc)
>> return NULL;
>>
>> - cic = cfq_cic_lookup(cfqd, ioc);
>> + spin_lock_irqsave(cfqd->queue->queue_lock, flags);
>> +retry_cfqg:
>> + cfqg = cfq_get_cfqg(cfqd, bio, 1);
>> +retry_cic:
>> + cic = cfq_cic_lookup(cfqd, cfqg, ioc);
>> if (cic)
>> goto out;
>> + spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
>>
>> cic = cfq_alloc_io_context(cfqd, gfp_mask);
>> if (cic == NULL)
>> goto err;
>>
>> - if (cfq_cic_link(cfqd, ioc, cic, gfp_mask))
>> + spin_lock_irqsave(cfqd->queue->queue_lock, flags);
>> +
>> + /* check the consistency breakage during unlock period */
>> + cfqg2 = cfq_get_cfqg(cfqd, bio, 0);
>> + if (cfqg != cfqg2) {
>> + cfq_cic_free(cic);
>> + if (!cfqg2)
>> + goto retry_cfqg;
>> + else {
>> + cfqg = cfqg2;
>> + goto retry_cic;
>> + }
>> + }
>> +
>> + if (cfq_cic_link(cfqd, cfqg, ioc, cic))
>> goto err_free;
>>
>> out:
>> + spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
>> smp_read_barrier_depends();
>> if (unlikely(ioc->ioprio_changed))
>> cfq_ioc_set_ioprio(ioc);
>> @@ -3113,6 +3194,7 @@ out:
>> #endif
>> return cic;
>> err_free:
>> + spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
>> cfq_cic_free(cic);
>> err:
>> put_io_context(ioc);
>> @@ -3537,6 +3619,7 @@ static inline int __cfq_may_queue(struct cfq_queue *cfqq)
>> static int cfq_may_queue(struct request_queue *q, struct bio *bio, int rw)
>> {
>> struct cfq_data *cfqd = q->elevator->elevator_data;
>> + struct cfq_group *cfqg;
>> struct task_struct *tsk = current;
>> struct cfq_io_context *cic;
>> struct cfq_queue *cfqq;
>> @@ -3547,7 +3630,8 @@ static int cfq_may_queue(struct request_queue *q, struct bio *bio, int rw)
>> * so just lookup a possibly existing queue, or return 'may queue'
>> * if that fails
>> */
>> - cic = cfq_cic_lookup(cfqd, tsk->io_context);
>> + cfqg = cfq_get_cfqg(cfqd, bio, 0);
>> + cic = cfq_cic_lookup(cfqd, cfqg, tsk->io_context);
>> if (!cic)
>> return ELV_MQUEUE_MAY;
>>
>> @@ -3636,7 +3720,7 @@ cfq_set_request(struct request_queue *q, struct request *rq, struct bio *bio,
>>
>> might_sleep_if(gfp_mask& __GFP_WAIT);
>>
>> - cic = cfq_get_io_context(cfqd, gfp_mask);
>> + cic = cfq_get_io_context(cfqd, bio, gfp_mask);
>>
>> spin_lock_irqsave(q->queue_lock, flags);
>>
>> @@ -3646,7 +3730,7 @@ cfq_set_request(struct request_queue *q, struct request *rq, struct bio *bio,
>> new_queue:
>> cfqq = cic_to_cfqq(cic, is_sync);
>> if (!cfqq || cfqq ==&cfqd->oom_cfqq) {
>> - cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask);
>> + cfqq = cfq_get_queue(cfqd, is_sync, cic, gfp_mask);
>> cic_set_cfqq(cic, cfqq, is_sync);
>> } else {
>> /*
>> @@ -3762,19 +3846,19 @@ static void cfq_shutdown_timer_wq(struct cfq_data *cfqd)
>> cancel_work_sync(&cfqd->unplug_work);
>> }
>>
>> -static void cfq_put_async_queues(struct cfq_data *cfqd)
>> +static void cfq_put_async_queues(struct cfq_group *cfqg)
>> {
>> int i;
>>
>> for (i = 0; i< IOPRIO_BE_NR; i++) {
>> - if (cfqd->async_cfqq[0][i])
>> - cfq_put_queue(cfqd->async_cfqq[0][i]);
>> - if (cfqd->async_cfqq[1][i])
>> - cfq_put_queue(cfqd->async_cfqq[1][i]);
>> + if (cfqg->async_cfqq[0][i])
>> + cfq_put_queue(cfqg->async_cfqq[0][i]);
>> + if (cfqg->async_cfqq[1][i])
>> + cfq_put_queue(cfqg->async_cfqq[1][i]);
>> }
>>
>> - if (cfqd->async_idle_cfqq)
>> - cfq_put_queue(cfqd->async_idle_cfqq);
>> + if (cfqg->async_idle_cfqq)
>> + cfq_put_queue(cfqg->async_idle_cfqq);
>> }
>>
>> static void cfq_cfqd_free(struct rcu_head *head)
>> @@ -3794,15 +3878,6 @@ static void cfq_exit_queue(struct elevator_queue *e)
>> if (cfqd->active_queue)
>> __cfq_slice_expired(cfqd, cfqd->active_queue, 0);
>>
>> - while (!list_empty(&cfqd->cic_list)) {
>> - struct cfq_io_context *cic = list_entry(cfqd->cic_list.next,
>> - struct cfq_io_context,
>> - queue_list);
>> -
>> - __cfq_exit_single_io_context(cfqd, cic);
>> - }
>> -
>> - cfq_put_async_queues(cfqd);
>> cfq_release_cfq_groups(cfqd);
>> cfq_blkiocg_del_blkio_group(&cfqd->root_group.blkg);
>>
>> @@ -3810,10 +3885,6 @@ static void cfq_exit_queue(struct elevator_queue *e)
>>
>> cfq_shutdown_timer_wq(cfqd);
>>
>> - spin_lock(&cic_index_lock);
>> - ida_remove(&cic_index_ida, cfqd->cic_index);
>> - spin_unlock(&cic_index_lock);
>> -
>> /* Wait for cfqg->blkg->key accessors to exit their grace periods. */
>> call_rcu(&cfqd->rcu, cfq_cfqd_free);
>> }
>> @@ -3823,7 +3894,7 @@ static int cfq_alloc_cic_index(void)
>> int index, error;
>>
>> do {
>> - if (!ida_pre_get(&cic_index_ida, GFP_KERNEL))
>> + if (!ida_pre_get(&cic_index_ida, GFP_ATOMIC))
>> return -ENOMEM;
>>
>> spin_lock(&cic_index_lock);
>> @@ -3839,20 +3910,18 @@ static int cfq_alloc_cic_index(void)
>> static void *cfq_init_queue(struct request_queue *q)
>> {
>> struct cfq_data *cfqd;
>> - int i, j;
>> + int i, j, idx;
>> struct cfq_group *cfqg;
>> struct cfq_rb_root *st;
>>
>> - i = cfq_alloc_cic_index();
>> - if (i< 0)
>> + idx = cfq_alloc_cic_index();
>> + if (idx< 0)
>> return NULL;
>>
>> cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
>> if (!cfqd)
>> return NULL;
>>
>> - cfqd->cic_index = i;
>> -
>> /* Init root service tree */
>> cfqd->grp_service_tree = CFQ_RB_ROOT;
>>
>> @@ -3861,6 +3930,9 @@ static void *cfq_init_queue(struct request_queue *q)
>> for_each_cfqg_st(cfqg, i, j, st)
>> *st = CFQ_RB_ROOT;
>> RB_CLEAR_NODE(&cfqg->rb_node);
>> + cfqg->cfqd = cfqd;
>> + cfqg->cic_index = idx;
>> + INIT_LIST_HEAD(&cfqg->cic_list);
>>
>> /* Give preference to root group over other groups */
>> cfqg->weight = 2*BLKIO_WEIGHT_DEFAULT;
>> @@ -3874,6 +3946,7 @@ static void *cfq_init_queue(struct request_queue *q)
>> rcu_read_lock();
>> cfq_blkiocg_add_blkio_group(&blkio_root_cgroup,&cfqg->blkg,
>> (void *)cfqd, 0);
>> + hlist_add_head(&cfqg->cfqd_node,&cfqd->cfqg_list);
>> rcu_read_unlock();
>> #endif
>> /*
>> @@ -3893,8 +3966,6 @@ static void *cfq_init_queue(struct request_queue *q)
>> atomic_inc(&cfqd->oom_cfqq.ref);
>> cfq_link_cfqq_cfqg(&cfqd->oom_cfqq,&cfqd->root_group);
>>
>> - INIT_LIST_HEAD(&cfqd->cic_list);
>> -
>> cfqd->queue = q;
>>
>> init_timer(&cfqd->idle_slice_timer);
>> diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
>> index 64d5291..6c05b54 100644
>> --- a/include/linux/iocontext.h
>> +++ b/include/linux/iocontext.h
>> @@ -18,7 +18,7 @@ struct cfq_io_context {
>> unsigned long ttime_samples;
>> unsigned long ttime_mean;
>>
>> - struct list_head queue_list;
>> + struct list_head group_list;
>> struct hlist_node cic_list;
>>
>> void (*dtor)(struct io_context *); /* destructor */
>> --
>> 1.6.2.5
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>> the body of a message to majordomo@...r.kernel.org
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>> Please read the FAQ at http://www.tux.org/lkml/
>>
>
--
IKEDA, Munehiro
NEC Corporation of America
m-ikeda@...jp.nec.com
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists