[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <AANLkTinFZ12kS7p4zigKRBg2f66L+CcBgec6w8qFGXKH@mail.gmail.com>
Date:	Thu, 12 Aug 2010 18:24:49 -0700
From:	Nauman Rafique <nauman@...gle.com>
To:	Munehiro Ikeda <m-ikeda@...jp.nec.com>
Cc:	linux-kernel@...r.kernel.org, jens.axboe@...cle.com,
	Vivek Goyal <vgoyal@...hat.com>,
	Ryo Tsuruta <ryov@...inux.co.jp>, taka@...inux.co.jp,
	kamezawa.hiroyu@...fujitsu.com,
	Andrea Righi <righi.andrea@...il.com>,
	Gui Jianfeng <guijianfeng@...fujitsu.com>,
	akpm@...ux-foundation.org, balbir@...ux.vnet.ibm.com
Subject: Re: [RFC][PATCH 10/11] blkiocg async: Async queue per cfq_group
Muuhh,
I do not understand the motivation behind making cfq_io_context per
cgroup. We can extract cgroup id from bio, use that to lookup cgroup
and its async queues. What am I missing?
On Thu, Jul 8, 2010 at 8:22 PM, Munehiro Ikeda <m-ikeda@...jp.nec.com> wrote:
> This is the main body for async capability of blkio controller.
> The basic ideas are
>  - To move async queues from cfq_data to cfq_group, and
>  - To associate cfq_io_context with cfq_group
>
> Each cfq_io_context, which was created per an io_context
> per a device, is now created per an io_context per a cfq_group.
> Each cfq_group is created per a cgroup per a device, so
> cfq_io_context is now resulted in to be created per
> an io_context per a cgroup per a device.
>
> To protect link between cfq_io_context and cfq_group,
> locking code is modified in several parts.
>
> ToDo:
> - Lock protection still might be imperfect and more investigation
>  is needed.
>
> - cic_index of root cfq_group (cfqd->root_group.cic_index) is not
>  removed and lasts beyond elevator switching.
>  This issues should be solved.
>
> Signed-off-by: Munehiro "Muuhh" Ikeda <m-ikeda@...jp.nec.com>
> ---
>  block/cfq-iosched.c       |  277 ++++++++++++++++++++++++++++-----------------
>  include/linux/iocontext.h |    2 +-
>  2 files changed, 175 insertions(+), 104 deletions(-)
>
> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
> index 68f76e9..4186c30 100644
> --- a/block/cfq-iosched.c
> +++ b/block/cfq-iosched.c
> @@ -191,10 +191,23 @@ struct cfq_group {
>        struct cfq_rb_root service_trees[2][3];
>        struct cfq_rb_root service_tree_idle;
>
> +       /*
> +        * async queue for each priority case
> +        */
> +       struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR];
> +       struct cfq_queue *async_idle_cfqq;
> +
>        unsigned long saved_workload_slice;
>        enum wl_type_t saved_workload;
>        enum wl_prio_t saved_serving_prio;
>        struct blkio_group blkg;
> +       struct cfq_data *cfqd;
> +
> +       /* lock for cic_list */
> +       spinlock_t lock;
> +       unsigned int cic_index;
> +       struct list_head cic_list;
> +
>  #ifdef CONFIG_CFQ_GROUP_IOSCHED
>        struct hlist_node cfqd_node;
>        atomic_t ref;
> @@ -254,12 +267,6 @@ struct cfq_data {
>        struct cfq_queue *active_queue;
>        struct cfq_io_context *active_cic;
>
> -       /*
> -        * async queue for each priority case
> -        */
> -       struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR];
> -       struct cfq_queue *async_idle_cfqq;
> -
>        sector_t last_position;
>
>        /*
> @@ -275,8 +282,6 @@ struct cfq_data {
>        unsigned int cfq_latency;
>        unsigned int cfq_group_isolation;
>
> -       unsigned int cic_index;
> -       struct list_head cic_list;
>
>        /*
>         * Fallback dummy cfqq for extreme OOM conditions
> @@ -418,10 +423,16 @@ static inline int cfqg_busy_async_queues(struct cfq_data *cfqd,
>  }
>
>  static void cfq_dispatch_insert(struct request_queue *, struct request *);
> +static void __cfq_exit_single_io_context(struct cfq_data *,
> +                                        struct cfq_group *,
> +                                        struct cfq_io_context *);
>  static struct cfq_queue *cfq_get_queue(struct cfq_data *, bool,
> -                                      struct io_context *, gfp_t);
> +                                      struct cfq_io_context *, gfp_t);
>  static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *,
> -                                               struct io_context *);
> +                                            struct cfq_group *,
> +                                            struct io_context *);
> +static void cfq_put_async_queues(struct cfq_group *);
> +static int cfq_alloc_cic_index(void);
>
>  static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic,
>                                            bool is_sync)
> @@ -438,17 +449,28 @@ static inline void cic_set_cfqq(struct cfq_io_context *cic,
>  #define CIC_DEAD_KEY   1ul
>  #define CIC_DEAD_INDEX_SHIFT   1
>
> -static inline void *cfqd_dead_key(struct cfq_data *cfqd)
> +static inline void *cfqg_dead_key(struct cfq_group *cfqg)
>  {
> -       return (void *)(cfqd->cic_index << CIC_DEAD_INDEX_SHIFT | CIC_DEAD_KEY);
> +       return (void *)(cfqg->cic_index << CIC_DEAD_INDEX_SHIFT | CIC_DEAD_KEY);
> +}
> +
> +static inline struct cfq_group *cic_to_cfqg(struct cfq_io_context *cic)
> +{
> +       struct cfq_group *cfqg = cic->key;
> +
> +       if (unlikely((unsigned long) cfqg & CIC_DEAD_KEY))
> +               cfqg = NULL;
> +
> +       return cfqg;
>  }
>
>  static inline struct cfq_data *cic_to_cfqd(struct cfq_io_context *cic)
>  {
> -       struct cfq_data *cfqd = cic->key;
> +       struct cfq_data *cfqd = NULL;
> +       struct cfq_group *cfqg = cic_to_cfqg(cic);
>
> -       if (unlikely((unsigned long) cfqd & CIC_DEAD_KEY))
> -               return NULL;
> +       if (likely(cfqg))
> +               cfqd =  cfqg->cfqd;
>
>        return cfqd;
>  }
> @@ -959,12 +981,12 @@ cfq_update_blkio_group_weight(struct blkio_group *blkg, unsigned int weight)
>  }
>
>  static struct cfq_group *
> -cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
> +cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct blkio_cgroup *blkcg,
> +                   int create)
>  {
> -       struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
>        struct cfq_group *cfqg = NULL;
>        void *key = cfqd;
> -       int i, j;
> +       int i, j, idx;
>        struct cfq_rb_root *st;
>        struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info;
>        unsigned int major, minor;
> @@ -978,14 +1000,21 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
>        if (cfqg || !create)
>                goto done;
>
> +       idx = cfq_alloc_cic_index();
> +       if (idx < 0)
> +               goto done;
> +
>        cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, cfqd->queue->node);
>        if (!cfqg)
> -               goto done;
> +               goto err;
>
>        for_each_cfqg_st(cfqg, i, j, st)
>                *st = CFQ_RB_ROOT;
>        RB_CLEAR_NODE(&cfqg->rb_node);
>
> +       spin_lock_init(&cfqg->lock);
> +       INIT_LIST_HEAD(&cfqg->cic_list);
> +
>        /*
>         * Take the initial reference that will be released on destroy
>         * This can be thought of a joint reference by cgroup and
> @@ -1002,7 +1031,14 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
>
>        /* Add group on cfqd list */
>        hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
> +       cfqg->cfqd = cfqd;
> +       cfqg->cic_index = idx;
> +       goto done;
>
> +err:
> +       spin_lock(&cic_index_lock);
> +       ida_remove(&cic_index_ida, idx);
> +       spin_unlock(&cic_index_lock);
>  done:
>        return cfqg;
>  }
> @@ -1095,10 +1131,6 @@ static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg)
>
>  static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
>  {
> -       /* Currently, all async queues are mapped to root group */
> -       if (!cfq_cfqq_sync(cfqq))
> -               cfqg = &cfqq->cfqd->root_group;
> -
>        cfqq->cfqg = cfqg;
>        /* cfqq reference on cfqg */
>        atomic_inc(&cfqq->cfqg->ref);
> @@ -1114,6 +1146,16 @@ static void cfq_put_cfqg(struct cfq_group *cfqg)
>                return;
>        for_each_cfqg_st(cfqg, i, j, st)
>                BUG_ON(!RB_EMPTY_ROOT(&st->rb) || st->active != NULL);
> +
> +       /**
> +        * ToDo:
> +        * root_group never reaches here and cic_index is never
> +        * removed.  Unused cic_index lasts by elevator switching.
> +        */
> +       spin_lock(&cic_index_lock);
> +       ida_remove(&cic_index_ida, cfqg->cic_index);
> +       spin_unlock(&cic_index_lock);
> +
>        kfree(cfqg);
>  }
>
> @@ -1122,6 +1164,15 @@ static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
>        /* Something wrong if we are trying to remove same group twice */
>        BUG_ON(hlist_unhashed(&cfqg->cfqd_node));
>
> +       while (!list_empty(&cfqg->cic_list)) {
> +               struct cfq_io_context *cic = list_entry(cfqg->cic_list.next,
> +                                                       struct cfq_io_context,
> +                                                       group_list);
> +
> +               __cfq_exit_single_io_context(cfqd, cfqg, cic);
> +       }
> +
> +       cfq_put_async_queues(cfqg);
>        hlist_del_init(&cfqg->cfqd_node);
>
>        /*
> @@ -1497,10 +1548,12 @@ static struct request *
>  cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio)
>  {
>        struct task_struct *tsk = current;
> +       struct cfq_group *cfqg;
>        struct cfq_io_context *cic;
>        struct cfq_queue *cfqq;
>
> -       cic = cfq_cic_lookup(cfqd, tsk->io_context);
> +       cfqg = cfq_get_cfqg(cfqd, bio, 0);
> +       cic = cfq_cic_lookup(cfqd, cfqg, tsk->io_context);
>        if (!cic)
>                return NULL;
>
> @@ -1611,6 +1664,7 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
>                           struct bio *bio)
>  {
>        struct cfq_data *cfqd = q->elevator->elevator_data;
> +       struct cfq_group *cfqg;
>        struct cfq_io_context *cic;
>        struct cfq_queue *cfqq;
>
> @@ -1624,7 +1678,8 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
>         * Lookup the cfqq that this bio will be queued with. Allow
>         * merge only if rq is queued there.
>         */
> -       cic = cfq_cic_lookup(cfqd, current->io_context);
> +       cfqg = cfq_get_cfqg(cfqd, bio, 0);
> +       cic = cfq_cic_lookup(cfqd, cfqg, current->io_context);
>        if (!cic)
>                return false;
>
> @@ -2667,17 +2722,18 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
>  }
>
>  static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
> +                                        struct cfq_group *cfqg,
>                                         struct cfq_io_context *cic)
>  {
>        struct io_context *ioc = cic->ioc;
>
> -       list_del_init(&cic->queue_list);
> +       list_del_init(&cic->group_list);
>
>        /*
>         * Make sure dead mark is seen for dead queues
>         */
>        smp_wmb();
> -       cic->key = cfqd_dead_key(cfqd);
> +       cic->key = cfqg_dead_key(cfqg);
>
>        if (ioc->ioc_data == cic)
>                rcu_assign_pointer(ioc->ioc_data, NULL);
> @@ -2696,23 +2752,23 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
>  static void cfq_exit_single_io_context(struct io_context *ioc,
>                                       struct cfq_io_context *cic)
>  {
> -       struct cfq_data *cfqd = cic_to_cfqd(cic);
> +       struct cfq_group *cfqg = cic_to_cfqg(cic);
>
> -       if (cfqd) {
> -               struct request_queue *q = cfqd->queue;
> +       if (cfqg) {
> +               struct cfq_data *cfqd = cfqg->cfqd;
>                unsigned long flags;
>
> -               spin_lock_irqsave(q->queue_lock, flags);
> +               spin_lock_irqsave(&cfqg->lock, flags);
>
>                /*
>                 * Ensure we get a fresh copy of the ->key to prevent
>                 * race between exiting task and queue
>                 */
>                smp_read_barrier_depends();
> -               if (cic->key == cfqd)
> -                       __cfq_exit_single_io_context(cfqd, cic);
> +               if (cic->key == cfqg)
> +                       __cfq_exit_single_io_context(cfqd, cfqg, cic);
>
> -               spin_unlock_irqrestore(q->queue_lock, flags);
> +               spin_unlock_irqrestore(&cfqg->lock, flags);
>        }
>  }
>
> @@ -2734,7 +2790,7 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
>                                                        cfqd->queue->node);
>        if (cic) {
>                cic->last_end_request = jiffies;
> -               INIT_LIST_HEAD(&cic->queue_list);
> +               INIT_LIST_HEAD(&cic->group_list);
>                INIT_HLIST_NODE(&cic->cic_list);
>                cic->dtor = cfq_free_io_context;
>                cic->exit = cfq_exit_io_context;
> @@ -2801,8 +2857,7 @@ static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic)
>        cfqq = cic->cfqq[BLK_RW_ASYNC];
>        if (cfqq) {
>                struct cfq_queue *new_cfqq;
> -               new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic->ioc,
> -                                               GFP_ATOMIC);
> +               new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic, GFP_ATOMIC);
>                if (new_cfqq) {
>                        cic->cfqq[BLK_RW_ASYNC] = new_cfqq;
>                        cfq_put_queue(cfqq);
> @@ -2879,16 +2934,14 @@ static void cfq_ioc_set_cgroup(struct io_context *ioc)
>
>  static struct cfq_queue *
>  cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync,
> -                    struct io_context *ioc, gfp_t gfp_mask)
> +                    struct cfq_io_context *cic, gfp_t gfp_mask)
>  {
>        struct cfq_queue *cfqq, *new_cfqq = NULL;
> -       struct cfq_io_context *cic;
> +       struct io_context *ioc = cic->ioc;
>        struct cfq_group *cfqg;
>
>  retry:
> -       cfqg = cfq_get_cfqg(cfqd, NULL, 1);
> -       cic = cfq_cic_lookup(cfqd, ioc);
> -       /* cic always exists here */
> +       cfqg = cic_to_cfqg(cic);
>        cfqq = cic_to_cfqq(cic, is_sync);
>
>        /*
> @@ -2930,36 +2983,38 @@ retry:
>  }
>
>  static struct cfq_queue **
> -cfq_async_queue_prio(struct cfq_data *cfqd, int ioprio_class, int ioprio)
> +cfq_async_queue_prio(struct cfq_group *cfqg, int ioprio_class, int ioprio)
>  {
>        switch (ioprio_class) {
>        case IOPRIO_CLASS_RT:
> -               return &cfqd->async_cfqq[0][ioprio];
> +               return &cfqg->async_cfqq[0][ioprio];
>        case IOPRIO_CLASS_BE:
> -               return &cfqd->async_cfqq[1][ioprio];
> +               return &cfqg->async_cfqq[1][ioprio];
>        case IOPRIO_CLASS_IDLE:
> -               return &cfqd->async_idle_cfqq;
> +               return &cfqg->async_idle_cfqq;
>        default:
>                BUG();
>        }
>  }
>
>  static struct cfq_queue *
> -cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc,
> +cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_context *cic,
>              gfp_t gfp_mask)
>  {
> +       struct io_context *ioc = cic->ioc;
>        const int ioprio = task_ioprio(ioc);
>        const int ioprio_class = task_ioprio_class(ioc);
> +       struct cfq_group *cfqg = cic_to_cfqg(cic);
>        struct cfq_queue **async_cfqq = NULL;
>        struct cfq_queue *cfqq = NULL;
>
>        if (!is_sync) {
> -               async_cfqq = cfq_async_queue_prio(cfqd, ioprio_class, ioprio);
> +               async_cfqq = cfq_async_queue_prio(cfqg, ioprio_class, ioprio);
>                cfqq = *async_cfqq;
>        }
>
>        if (!cfqq)
> -               cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask);
> +               cfqq = cfq_find_alloc_queue(cfqd, is_sync, cic, gfp_mask);
>
>        /*
>         * pin the queue now that it's allocated, scheduler exit will prune it
> @@ -2977,19 +3032,19 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc,
>  * We drop cfq io contexts lazily, so we may find a dead one.
>  */
>  static void
> -cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc,
> -                 struct cfq_io_context *cic)
> +cfq_drop_dead_cic(struct cfq_data *cfqd, struct cfq_group *cfqg,
> +                 struct io_context *ioc, struct cfq_io_context *cic)
>  {
>        unsigned long flags;
>
> -       WARN_ON(!list_empty(&cic->queue_list));
> -       BUG_ON(cic->key != cfqd_dead_key(cfqd));
> +       WARN_ON(!list_empty(&cic->group_list));
> +       BUG_ON(cic->key != cfqg_dead_key(cfqg));
>
>        spin_lock_irqsave(&ioc->lock, flags);
>
>        BUG_ON(ioc->ioc_data == cic);
>
> -       radix_tree_delete(&ioc->radix_root, cfqd->cic_index);
> +       radix_tree_delete(&ioc->radix_root, cfqg->cic_index);
>        hlist_del_rcu(&cic->cic_list);
>        spin_unlock_irqrestore(&ioc->lock, flags);
>
> @@ -2997,11 +3052,14 @@ cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc,
>  }
>
>  static struct cfq_io_context *
> -cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
> +cfq_cic_lookup(struct cfq_data *cfqd, struct cfq_group *cfqg,
> +              struct io_context *ioc)
>  {
>        struct cfq_io_context *cic;
>        unsigned long flags;
>
> +       if (!cfqg)
> +               return NULL;
>        if (unlikely(!ioc))
>                return NULL;
>
> @@ -3011,18 +3069,18 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
>         * we maintain a last-hit cache, to avoid browsing over the tree
>         */
>        cic = rcu_dereference(ioc->ioc_data);
> -       if (cic && cic->key == cfqd) {
> +       if (cic && cic->key == cfqg) {
>                rcu_read_unlock();
>                return cic;
>        }
>
>        do {
> -               cic = radix_tree_lookup(&ioc->radix_root, cfqd->cic_index);
> +               cic = radix_tree_lookup(&ioc->radix_root, cfqg->cic_index);
>                rcu_read_unlock();
>                if (!cic)
>                        break;
> -               if (unlikely(cic->key != cfqd)) {
> -                       cfq_drop_dead_cic(cfqd, ioc, cic);
> +               if (unlikely(cic->key != cfqg)) {
> +                       cfq_drop_dead_cic(cfqd, cfqg, ioc, cic);
>                        rcu_read_lock();
>                        continue;
>                }
> @@ -3037,24 +3095,25 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
>  }
>
>  /*
> - * Add cic into ioc, using cfqd as the search key. This enables us to lookup
> + * Add cic into ioc, using cfqg as the search key. This enables us to lookup
>  * the process specific cfq io context when entered from the block layer.
> - * Also adds the cic to a per-cfqd list, used when this queue is removed.
> + * Also adds the cic to a per-cfqg list, used when the group is removed.
> + * request_queue lock must be held.
>  */
> -static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
> -                       struct cfq_io_context *cic, gfp_t gfp_mask)
> +static int cfq_cic_link(struct cfq_data *cfqd, struct cfq_group *cfqg,
> +                       struct io_context *ioc, struct cfq_io_context *cic)
>  {
>        unsigned long flags;
>        int ret;
>
> -       ret = radix_tree_preload(gfp_mask);
> +       ret = radix_tree_preload(GFP_ATOMIC);
>        if (!ret) {
>                cic->ioc = ioc;
> -               cic->key = cfqd;
> +               cic->key = cfqg;
>
>                spin_lock_irqsave(&ioc->lock, flags);
>                ret = radix_tree_insert(&ioc->radix_root,
> -                                               cfqd->cic_index, cic);
> +                                               cfqg->cic_index, cic);
>                if (!ret)
>                        hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list);
>                spin_unlock_irqrestore(&ioc->lock, flags);
> @@ -3062,9 +3121,9 @@ static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
>                radix_tree_preload_end();
>
>                if (!ret) {
> -                       spin_lock_irqsave(cfqd->queue->queue_lock, flags);
> -                       list_add(&cic->queue_list, &cfqd->cic_list);
> -                       spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
> +                       spin_lock_irqsave(&cfqg->lock, flags);
> +                       list_add(&cic->group_list, &cfqg->cic_list);
> +                       spin_unlock_irqrestore(&cfqg->lock, flags);
>                }
>        }
>
> @@ -3080,10 +3139,12 @@ static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
>  * than one device managed by cfq.
>  */
>  static struct cfq_io_context *
> -cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
> +cfq_get_io_context(struct cfq_data *cfqd, struct bio *bio, gfp_t gfp_mask)
>  {
>        struct io_context *ioc = NULL;
>        struct cfq_io_context *cic;
> +       struct cfq_group *cfqg, *cfqg2;
> +       unsigned long flags;
>
>        might_sleep_if(gfp_mask & __GFP_WAIT);
>
> @@ -3091,18 +3152,38 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
>        if (!ioc)
>                return NULL;
>
> -       cic = cfq_cic_lookup(cfqd, ioc);
> +       spin_lock_irqsave(cfqd->queue->queue_lock, flags);
> +retry_cfqg:
> +       cfqg = cfq_get_cfqg(cfqd, bio, 1);
> +retry_cic:
> +       cic = cfq_cic_lookup(cfqd, cfqg, ioc);
>        if (cic)
>                goto out;
> +       spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
>
>        cic = cfq_alloc_io_context(cfqd, gfp_mask);
>        if (cic == NULL)
>                goto err;
>
> -       if (cfq_cic_link(cfqd, ioc, cic, gfp_mask))
> +       spin_lock_irqsave(cfqd->queue->queue_lock, flags);
> +
> +       /* check the consistency breakage during unlock period */
> +       cfqg2 = cfq_get_cfqg(cfqd, bio, 0);
> +       if (cfqg != cfqg2) {
> +               cfq_cic_free(cic);
> +               if (!cfqg2)
> +                       goto retry_cfqg;
> +               else {
> +                       cfqg = cfqg2;
> +                       goto retry_cic;
> +               }
> +       }
> +
> +       if (cfq_cic_link(cfqd, cfqg, ioc, cic))
>                goto err_free;
>
>  out:
> +       spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
>        smp_read_barrier_depends();
>        if (unlikely(ioc->ioprio_changed))
>                cfq_ioc_set_ioprio(ioc);
> @@ -3113,6 +3194,7 @@ out:
>  #endif
>        return cic;
>  err_free:
> +       spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
>        cfq_cic_free(cic);
>  err:
>        put_io_context(ioc);
> @@ -3537,6 +3619,7 @@ static inline int __cfq_may_queue(struct cfq_queue *cfqq)
>  static int cfq_may_queue(struct request_queue *q, struct bio *bio, int rw)
>  {
>        struct cfq_data *cfqd = q->elevator->elevator_data;
> +       struct cfq_group *cfqg;
>        struct task_struct *tsk = current;
>        struct cfq_io_context *cic;
>        struct cfq_queue *cfqq;
> @@ -3547,7 +3630,8 @@ static int cfq_may_queue(struct request_queue *q, struct bio *bio, int rw)
>         * so just lookup a possibly existing queue, or return 'may queue'
>         * if that fails
>         */
> -       cic = cfq_cic_lookup(cfqd, tsk->io_context);
> +       cfqg = cfq_get_cfqg(cfqd, bio, 0);
> +       cic = cfq_cic_lookup(cfqd, cfqg, tsk->io_context);
>        if (!cic)
>                return ELV_MQUEUE_MAY;
>
> @@ -3636,7 +3720,7 @@ cfq_set_request(struct request_queue *q, struct request *rq, struct bio *bio,
>
>        might_sleep_if(gfp_mask & __GFP_WAIT);
>
> -       cic = cfq_get_io_context(cfqd, gfp_mask);
> +       cic = cfq_get_io_context(cfqd, bio, gfp_mask);
>
>        spin_lock_irqsave(q->queue_lock, flags);
>
> @@ -3646,7 +3730,7 @@ cfq_set_request(struct request_queue *q, struct request *rq, struct bio *bio,
>  new_queue:
>        cfqq = cic_to_cfqq(cic, is_sync);
>        if (!cfqq || cfqq == &cfqd->oom_cfqq) {
> -               cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask);
> +               cfqq = cfq_get_queue(cfqd, is_sync, cic, gfp_mask);
>                cic_set_cfqq(cic, cfqq, is_sync);
>        } else {
>                /*
> @@ -3762,19 +3846,19 @@ static void cfq_shutdown_timer_wq(struct cfq_data *cfqd)
>        cancel_work_sync(&cfqd->unplug_work);
>  }
>
> -static void cfq_put_async_queues(struct cfq_data *cfqd)
> +static void cfq_put_async_queues(struct cfq_group *cfqg)
>  {
>        int i;
>
>        for (i = 0; i < IOPRIO_BE_NR; i++) {
> -               if (cfqd->async_cfqq[0][i])
> -                       cfq_put_queue(cfqd->async_cfqq[0][i]);
> -               if (cfqd->async_cfqq[1][i])
> -                       cfq_put_queue(cfqd->async_cfqq[1][i]);
> +               if (cfqg->async_cfqq[0][i])
> +                       cfq_put_queue(cfqg->async_cfqq[0][i]);
> +               if (cfqg->async_cfqq[1][i])
> +                       cfq_put_queue(cfqg->async_cfqq[1][i]);
>        }
>
> -       if (cfqd->async_idle_cfqq)
> -               cfq_put_queue(cfqd->async_idle_cfqq);
> +       if (cfqg->async_idle_cfqq)
> +               cfq_put_queue(cfqg->async_idle_cfqq);
>  }
>
>  static void cfq_cfqd_free(struct rcu_head *head)
> @@ -3794,15 +3878,6 @@ static void cfq_exit_queue(struct elevator_queue *e)
>        if (cfqd->active_queue)
>                __cfq_slice_expired(cfqd, cfqd->active_queue, 0);
>
> -       while (!list_empty(&cfqd->cic_list)) {
> -               struct cfq_io_context *cic = list_entry(cfqd->cic_list.next,
> -                                                       struct cfq_io_context,
> -                                                       queue_list);
> -
> -               __cfq_exit_single_io_context(cfqd, cic);
> -       }
> -
> -       cfq_put_async_queues(cfqd);
>        cfq_release_cfq_groups(cfqd);
>        cfq_blkiocg_del_blkio_group(&cfqd->root_group.blkg);
>
> @@ -3810,10 +3885,6 @@ static void cfq_exit_queue(struct elevator_queue *e)
>
>        cfq_shutdown_timer_wq(cfqd);
>
> -       spin_lock(&cic_index_lock);
> -       ida_remove(&cic_index_ida, cfqd->cic_index);
> -       spin_unlock(&cic_index_lock);
> -
>        /* Wait for cfqg->blkg->key accessors to exit their grace periods. */
>        call_rcu(&cfqd->rcu, cfq_cfqd_free);
>  }
> @@ -3823,7 +3894,7 @@ static int cfq_alloc_cic_index(void)
>        int index, error;
>
>        do {
> -               if (!ida_pre_get(&cic_index_ida, GFP_KERNEL))
> +               if (!ida_pre_get(&cic_index_ida, GFP_ATOMIC))
>                        return -ENOMEM;
>
>                spin_lock(&cic_index_lock);
> @@ -3839,20 +3910,18 @@ static int cfq_alloc_cic_index(void)
>  static void *cfq_init_queue(struct request_queue *q)
>  {
>        struct cfq_data *cfqd;
> -       int i, j;
> +       int i, j, idx;
>        struct cfq_group *cfqg;
>        struct cfq_rb_root *st;
>
> -       i = cfq_alloc_cic_index();
> -       if (i < 0)
> +       idx = cfq_alloc_cic_index();
> +       if (idx < 0)
>                return NULL;
>
>        cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
>        if (!cfqd)
>                return NULL;
>
> -       cfqd->cic_index = i;
> -
>        /* Init root service tree */
>        cfqd->grp_service_tree = CFQ_RB_ROOT;
>
> @@ -3861,6 +3930,9 @@ static void *cfq_init_queue(struct request_queue *q)
>        for_each_cfqg_st(cfqg, i, j, st)
>                *st = CFQ_RB_ROOT;
>        RB_CLEAR_NODE(&cfqg->rb_node);
> +       cfqg->cfqd = cfqd;
> +       cfqg->cic_index = idx;
> +       INIT_LIST_HEAD(&cfqg->cic_list);
>
>        /* Give preference to root group over other groups */
>        cfqg->weight = 2*BLKIO_WEIGHT_DEFAULT;
> @@ -3874,6 +3946,7 @@ static void *cfq_init_queue(struct request_queue *q)
>        rcu_read_lock();
>        cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg,
>                                        (void *)cfqd, 0);
> +       hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
>        rcu_read_unlock();
>  #endif
>        /*
> @@ -3893,8 +3966,6 @@ static void *cfq_init_queue(struct request_queue *q)
>        atomic_inc(&cfqd->oom_cfqq.ref);
>        cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group);
>
> -       INIT_LIST_HEAD(&cfqd->cic_list);
> -
>        cfqd->queue = q;
>
>        init_timer(&cfqd->idle_slice_timer);
> diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
> index 64d5291..6c05b54 100644
> --- a/include/linux/iocontext.h
> +++ b/include/linux/iocontext.h
> @@ -18,7 +18,7 @@ struct cfq_io_context {
>        unsigned long ttime_samples;
>        unsigned long ttime_mean;
>
> -       struct list_head queue_list;
> +       struct list_head group_list;
>        struct hlist_node cic_list;
>
>        void (*dtor)(struct io_context *); /* destructor */
> --
> 1.6.2.5
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Powered by blists - more mailing lists
 
