lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <AANLkTi=4V6XDbqjMBwNaLE9B+h1V-2hGPZFTXNsjbSpA@mail.gmail.com>
Date:	Fri, 13 Aug 2010 16:01:05 -0700
From:	Nauman Rafique <nauman@...gle.com>
To:	Munehiro Ikeda <m-ikeda@...jp.nec.com>
Cc:	linux-kernel@...r.kernel.org, Vivek Goyal <vgoyal@...hat.com>,
	Ryo Tsuruta <ryov@...inux.co.jp>, taka@...inux.co.jp,
	kamezawa.hiroyu@...fujitsu.com,
	Andrea Righi <righi.andrea@...il.com>,
	Gui Jianfeng <guijianfeng@...fujitsu.com>,
	akpm@...ux-foundation.org, balbir@...ux.vnet.ibm.com
Subject: Re: [RFC][PATCH 10/11] blkiocg async: Async queue per cfq_group

On Fri, Aug 13, 2010 at 2:00 PM, Munehiro Ikeda <m-ikeda@...jp.nec.com> wrote:
> Nauman Rafique wrote, on 08/12/2010 09:24 PM:
>>
>> Muuhh,
>> I do not understand the motivation behind making cfq_io_context per
>> cgroup. We can extract cgroup id from bio, use that to lookup cgroup
>> and its async queues. What am I missing?
>
> What cgroup ID can suggest is only cgroup to which the thread belongs,
> but not the thread itself.  This means that IO priority and IO prio-class
> can't be determined by cgroup ID.

One way to do it would be to get ioprio and class from the context
that is used to submit the async request. IO priority and class is
tied to a thread anyways. And the io context of that thread can be
used to retrieve those values. Even if a thread is submitting IOs to
different cgroups, I don't see how you can apply different IO priority
and class to its async IOs for different cgroups. Please let me know
if it does not make sense.

> The pointers of async queues are held in cfqg->async_cfqq[class][prio].
> It is impossible to find out which queue is appropriate by only cgroup
> ID if considering IO priority.
>
> Frankly speaking, I'm not 100% confident if IO priority should be
> applied on async write IOs, but anyway, I made up my mind to keep the
> current scheme.
>
> Do I make sense?  If you have any other idea, I am glad to hear.
>
>
> Thanks,
> Muuhh
>
>
>>
>> On Thu, Jul 8, 2010 at 8:22 PM, Munehiro Ikeda<m-ikeda@...jp.nec.com>
>>  wrote:
>>>
>>> This is the main body for async capability of blkio controller.
>>> The basic ideas are
>>>  - To move async queues from cfq_data to cfq_group, and
>>>  - To associate cfq_io_context with cfq_group
>>>
>>> Each cfq_io_context, which was created per an io_context
>>> per a device, is now created per an io_context per a cfq_group.
>>> Each cfq_group is created per a cgroup per a device, so
>>> cfq_io_context is now resulted in to be created per
>>> an io_context per a cgroup per a device.
>>>
>>> To protect link between cfq_io_context and cfq_group,
>>> locking code is modified in several parts.
>>>
>>> ToDo:
>>> - Lock protection still might be imperfect and more investigation
>>>  is needed.
>>>
>>> - cic_index of root cfq_group (cfqd->root_group.cic_index) is not
>>>  removed and lasts beyond elevator switching.
>>>  This issues should be solved.
>>>
>>> Signed-off-by: Munehiro "Muuhh" Ikeda<m-ikeda@...jp.nec.com>
>>> ---
>>>  block/cfq-iosched.c       |  277
>>> ++++++++++++++++++++++++++++-----------------
>>>  include/linux/iocontext.h |    2 +-
>>>  2 files changed, 175 insertions(+), 104 deletions(-)
>>>
>>> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
>>> index 68f76e9..4186c30 100644
>>> --- a/block/cfq-iosched.c
>>> +++ b/block/cfq-iosched.c
>>> @@ -191,10 +191,23 @@ struct cfq_group {
>>>        struct cfq_rb_root service_trees[2][3];
>>>        struct cfq_rb_root service_tree_idle;
>>>
>>> +       /*
>>> +        * async queue for each priority case
>>> +        */
>>> +       struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR];
>>> +       struct cfq_queue *async_idle_cfqq;
>>> +
>>>        unsigned long saved_workload_slice;
>>>        enum wl_type_t saved_workload;
>>>        enum wl_prio_t saved_serving_prio;
>>>        struct blkio_group blkg;
>>> +       struct cfq_data *cfqd;
>>> +
>>> +       /* lock for cic_list */
>>> +       spinlock_t lock;
>>> +       unsigned int cic_index;
>>> +       struct list_head cic_list;
>>> +
>>>  #ifdef CONFIG_CFQ_GROUP_IOSCHED
>>>        struct hlist_node cfqd_node;
>>>        atomic_t ref;
>>> @@ -254,12 +267,6 @@ struct cfq_data {
>>>        struct cfq_queue *active_queue;
>>>        struct cfq_io_context *active_cic;
>>>
>>> -       /*
>>> -        * async queue for each priority case
>>> -        */
>>> -       struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR];
>>> -       struct cfq_queue *async_idle_cfqq;
>>> -
>>>        sector_t last_position;
>>>
>>>        /*
>>> @@ -275,8 +282,6 @@ struct cfq_data {
>>>        unsigned int cfq_latency;
>>>        unsigned int cfq_group_isolation;
>>>
>>> -       unsigned int cic_index;
>>> -       struct list_head cic_list;
>>>
>>>        /*
>>>         * Fallback dummy cfqq for extreme OOM conditions
>>> @@ -418,10 +423,16 @@ static inline int cfqg_busy_async_queues(struct
>>> cfq_data *cfqd,
>>>  }
>>>
>>>  static void cfq_dispatch_insert(struct request_queue *, struct request
>>> *);
>>> +static void __cfq_exit_single_io_context(struct cfq_data *,
>>> +                                        struct cfq_group *,
>>> +                                        struct cfq_io_context *);
>>>  static struct cfq_queue *cfq_get_queue(struct cfq_data *, bool,
>>> -                                      struct io_context *, gfp_t);
>>> +                                      struct cfq_io_context *, gfp_t);
>>>  static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *,
>>> -                                               struct io_context *);
>>> +                                            struct cfq_group *,
>>> +                                            struct io_context *);
>>> +static void cfq_put_async_queues(struct cfq_group *);
>>> +static int cfq_alloc_cic_index(void);
>>>
>>>  static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic,
>>>                                            bool is_sync)
>>> @@ -438,17 +449,28 @@ static inline void cic_set_cfqq(struct
>>> cfq_io_context *cic,
>>>  #define CIC_DEAD_KEY   1ul
>>>  #define CIC_DEAD_INDEX_SHIFT   1
>>>
>>> -static inline void *cfqd_dead_key(struct cfq_data *cfqd)
>>> +static inline void *cfqg_dead_key(struct cfq_group *cfqg)
>>>  {
>>> -       return (void *)(cfqd->cic_index<<  CIC_DEAD_INDEX_SHIFT |
>>> CIC_DEAD_KEY);
>>> +       return (void *)(cfqg->cic_index<<  CIC_DEAD_INDEX_SHIFT |
>>> CIC_DEAD_KEY);
>>> +}
>>> +
>>> +static inline struct cfq_group *cic_to_cfqg(struct cfq_io_context *cic)
>>> +{
>>> +       struct cfq_group *cfqg = cic->key;
>>> +
>>> +       if (unlikely((unsigned long) cfqg&  CIC_DEAD_KEY))
>>> +               cfqg = NULL;
>>> +
>>> +       return cfqg;
>>>  }
>>>
>>>  static inline struct cfq_data *cic_to_cfqd(struct cfq_io_context *cic)
>>>  {
>>> -       struct cfq_data *cfqd = cic->key;
>>> +       struct cfq_data *cfqd = NULL;
>>> +       struct cfq_group *cfqg = cic_to_cfqg(cic);
>>>
>>> -       if (unlikely((unsigned long) cfqd&  CIC_DEAD_KEY))
>>> -               return NULL;
>>> +       if (likely(cfqg))
>>> +               cfqd =  cfqg->cfqd;
>>>
>>>        return cfqd;
>>>  }
>>> @@ -959,12 +981,12 @@ cfq_update_blkio_group_weight(struct blkio_group
>>> *blkg, unsigned int weight)
>>>  }
>>>
>>>  static struct cfq_group *
>>> -cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int
>>> create)
>>> +cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct blkio_cgroup *blkcg,
>>> +                   int create)
>>>  {
>>> -       struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
>>>        struct cfq_group *cfqg = NULL;
>>>        void *key = cfqd;
>>> -       int i, j;
>>> +       int i, j, idx;
>>>        struct cfq_rb_root *st;
>>>        struct backing_dev_info *bdi =&cfqd->queue->backing_dev_info;
>>>        unsigned int major, minor;
>>> @@ -978,14 +1000,21 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct
>>> cgroup *cgroup, int create)
>>>        if (cfqg || !create)
>>>                goto done;
>>>
>>> +       idx = cfq_alloc_cic_index();
>>> +       if (idx<  0)
>>> +               goto done;
>>> +
>>>        cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, cfqd->queue->node);
>>>        if (!cfqg)
>>> -               goto done;
>>> +               goto err;
>>>
>>>        for_each_cfqg_st(cfqg, i, j, st)
>>>                *st = CFQ_RB_ROOT;
>>>        RB_CLEAR_NODE(&cfqg->rb_node);
>>>
>>> +       spin_lock_init(&cfqg->lock);
>>> +       INIT_LIST_HEAD(&cfqg->cic_list);
>>> +
>>>        /*
>>>         * Take the initial reference that will be released on destroy
>>>         * This can be thought of a joint reference by cgroup and
>>> @@ -1002,7 +1031,14 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct
>>> cgroup *cgroup, int create)
>>>
>>>        /* Add group on cfqd list */
>>>        hlist_add_head(&cfqg->cfqd_node,&cfqd->cfqg_list);
>>> +       cfqg->cfqd = cfqd;
>>> +       cfqg->cic_index = idx;
>>> +       goto done;
>>>
>>> +err:
>>> +       spin_lock(&cic_index_lock);
>>> +       ida_remove(&cic_index_ida, idx);
>>> +       spin_unlock(&cic_index_lock);
>>>  done:
>>>        return cfqg;
>>>  }
>>> @@ -1095,10 +1131,6 @@ static inline struct cfq_group
>>> *cfq_ref_get_cfqg(struct cfq_group *cfqg)
>>>
>>>  static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group
>>> *cfqg)
>>>  {
>>> -       /* Currently, all async queues are mapped to root group */
>>> -       if (!cfq_cfqq_sync(cfqq))
>>> -               cfqg =&cfqq->cfqd->root_group;
>>> -
>>>        cfqq->cfqg = cfqg;
>>>        /* cfqq reference on cfqg */
>>>        atomic_inc(&cfqq->cfqg->ref);
>>> @@ -1114,6 +1146,16 @@ static void cfq_put_cfqg(struct cfq_group *cfqg)
>>>                return;
>>>        for_each_cfqg_st(cfqg, i, j, st)
>>>                BUG_ON(!RB_EMPTY_ROOT(&st->rb) || st->active != NULL);
>>> +
>>> +       /**
>>> +        * ToDo:
>>> +        * root_group never reaches here and cic_index is never
>>> +        * removed.  Unused cic_index lasts by elevator switching.
>>> +        */
>>> +       spin_lock(&cic_index_lock);
>>> +       ida_remove(&cic_index_ida, cfqg->cic_index);
>>> +       spin_unlock(&cic_index_lock);
>>> +
>>>        kfree(cfqg);
>>>  }
>>>
>>> @@ -1122,6 +1164,15 @@ static void cfq_destroy_cfqg(struct cfq_data
>>> *cfqd, struct cfq_group *cfqg)
>>>        /* Something wrong if we are trying to remove same group twice */
>>>        BUG_ON(hlist_unhashed(&cfqg->cfqd_node));
>>>
>>> +       while (!list_empty(&cfqg->cic_list)) {
>>> +               struct cfq_io_context *cic =
>>> list_entry(cfqg->cic_list.next,
>>> +                                                       struct
>>> cfq_io_context,
>>> +                                                       group_list);
>>> +
>>> +               __cfq_exit_single_io_context(cfqd, cfqg, cic);
>>> +       }
>>> +
>>> +       cfq_put_async_queues(cfqg);
>>>        hlist_del_init(&cfqg->cfqd_node);
>>>
>>>        /*
>>> @@ -1497,10 +1548,12 @@ static struct request *
>>>  cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio)
>>>  {
>>>        struct task_struct *tsk = current;
>>> +       struct cfq_group *cfqg;
>>>        struct cfq_io_context *cic;
>>>        struct cfq_queue *cfqq;
>>>
>>> -       cic = cfq_cic_lookup(cfqd, tsk->io_context);
>>> +       cfqg = cfq_get_cfqg(cfqd, bio, 0);
>>> +       cic = cfq_cic_lookup(cfqd, cfqg, tsk->io_context);
>>>        if (!cic)
>>>                return NULL;
>>>
>>> @@ -1611,6 +1664,7 @@ static int cfq_allow_merge(struct request_queue *q,
>>> struct request *rq,
>>>                           struct bio *bio)
>>>  {
>>>        struct cfq_data *cfqd = q->elevator->elevator_data;
>>> +       struct cfq_group *cfqg;
>>>        struct cfq_io_context *cic;
>>>        struct cfq_queue *cfqq;
>>>
>>> @@ -1624,7 +1678,8 @@ static int cfq_allow_merge(struct request_queue *q,
>>> struct request *rq,
>>>         * Lookup the cfqq that this bio will be queued with. Allow
>>>         * merge only if rq is queued there.
>>>         */
>>> -       cic = cfq_cic_lookup(cfqd, current->io_context);
>>> +       cfqg = cfq_get_cfqg(cfqd, bio, 0);
>>> +       cic = cfq_cic_lookup(cfqd, cfqg, current->io_context);
>>>        if (!cic)
>>>                return false;
>>>
>>> @@ -2667,17 +2722,18 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd,
>>> struct cfq_queue *cfqq)
>>>  }
>>>
>>>  static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
>>> +                                        struct cfq_group *cfqg,
>>>                                         struct cfq_io_context *cic)
>>>  {
>>>        struct io_context *ioc = cic->ioc;
>>>
>>> -       list_del_init(&cic->queue_list);
>>> +       list_del_init(&cic->group_list);
>>>
>>>        /*
>>>         * Make sure dead mark is seen for dead queues
>>>         */
>>>        smp_wmb();
>>> -       cic->key = cfqd_dead_key(cfqd);
>>> +       cic->key = cfqg_dead_key(cfqg);
>>>
>>>        if (ioc->ioc_data == cic)
>>>                rcu_assign_pointer(ioc->ioc_data, NULL);
>>> @@ -2696,23 +2752,23 @@ static void __cfq_exit_single_io_context(struct
>>> cfq_data *cfqd,
>>>  static void cfq_exit_single_io_context(struct io_context *ioc,
>>>                                       struct cfq_io_context *cic)
>>>  {
>>> -       struct cfq_data *cfqd = cic_to_cfqd(cic);
>>> +       struct cfq_group *cfqg = cic_to_cfqg(cic);
>>>
>>> -       if (cfqd) {
>>> -               struct request_queue *q = cfqd->queue;
>>> +       if (cfqg) {
>>> +               struct cfq_data *cfqd = cfqg->cfqd;
>>>                unsigned long flags;
>>>
>>> -               spin_lock_irqsave(q->queue_lock, flags);
>>> +               spin_lock_irqsave(&cfqg->lock, flags);
>>>
>>>                /*
>>>                 * Ensure we get a fresh copy of the ->key to prevent
>>>                 * race between exiting task and queue
>>>                 */
>>>                smp_read_barrier_depends();
>>> -               if (cic->key == cfqd)
>>> -                       __cfq_exit_single_io_context(cfqd, cic);
>>> +               if (cic->key == cfqg)
>>> +                       __cfq_exit_single_io_context(cfqd, cfqg, cic);
>>>
>>> -               spin_unlock_irqrestore(q->queue_lock, flags);
>>> +               spin_unlock_irqrestore(&cfqg->lock, flags);
>>>        }
>>>  }
>>>
>>> @@ -2734,7 +2790,7 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t
>>> gfp_mask)
>>>
>>>  cfqd->queue->node);
>>>        if (cic) {
>>>                cic->last_end_request = jiffies;
>>> -               INIT_LIST_HEAD(&cic->queue_list);
>>> +               INIT_LIST_HEAD(&cic->group_list);
>>>                INIT_HLIST_NODE(&cic->cic_list);
>>>                cic->dtor = cfq_free_io_context;
>>>                cic->exit = cfq_exit_io_context;
>>> @@ -2801,8 +2857,7 @@ static void changed_ioprio(struct io_context *ioc,
>>> struct cfq_io_context *cic)
>>>        cfqq = cic->cfqq[BLK_RW_ASYNC];
>>>        if (cfqq) {
>>>                struct cfq_queue *new_cfqq;
>>> -               new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic->ioc,
>>> -                                               GFP_ATOMIC);
>>> +               new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic,
>>> GFP_ATOMIC);
>>>                if (new_cfqq) {
>>>                        cic->cfqq[BLK_RW_ASYNC] = new_cfqq;
>>>                        cfq_put_queue(cfqq);
>>> @@ -2879,16 +2934,14 @@ static void cfq_ioc_set_cgroup(struct io_context
>>> *ioc)
>>>
>>>  static struct cfq_queue *
>>>  cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync,
>>> -                    struct io_context *ioc, gfp_t gfp_mask)
>>> +                    struct cfq_io_context *cic, gfp_t gfp_mask)
>>>  {
>>>        struct cfq_queue *cfqq, *new_cfqq = NULL;
>>> -       struct cfq_io_context *cic;
>>> +       struct io_context *ioc = cic->ioc;
>>>        struct cfq_group *cfqg;
>>>
>>>  retry:
>>> -       cfqg = cfq_get_cfqg(cfqd, NULL, 1);
>>> -       cic = cfq_cic_lookup(cfqd, ioc);
>>> -       /* cic always exists here */
>>> +       cfqg = cic_to_cfqg(cic);
>>>        cfqq = cic_to_cfqq(cic, is_sync);
>>>
>>>        /*
>>> @@ -2930,36 +2983,38 @@ retry:
>>>  }
>>>
>>>  static struct cfq_queue **
>>> -cfq_async_queue_prio(struct cfq_data *cfqd, int ioprio_class, int
>>> ioprio)
>>> +cfq_async_queue_prio(struct cfq_group *cfqg, int ioprio_class, int
>>> ioprio)
>>>  {
>>>        switch (ioprio_class) {
>>>        case IOPRIO_CLASS_RT:
>>> -               return&cfqd->async_cfqq[0][ioprio];
>>> +               return&cfqg->async_cfqq[0][ioprio];
>>>        case IOPRIO_CLASS_BE:
>>> -               return&cfqd->async_cfqq[1][ioprio];
>>> +               return&cfqg->async_cfqq[1][ioprio];
>>>        case IOPRIO_CLASS_IDLE:
>>> -               return&cfqd->async_idle_cfqq;
>>> +               return&cfqg->async_idle_cfqq;
>>>        default:
>>>                BUG();
>>>        }
>>>  }
>>>
>>>  static struct cfq_queue *
>>> -cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context
>>> *ioc,
>>> +cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_context
>>> *cic,
>>>              gfp_t gfp_mask)
>>>  {
>>> +       struct io_context *ioc = cic->ioc;
>>>        const int ioprio = task_ioprio(ioc);
>>>        const int ioprio_class = task_ioprio_class(ioc);
>>> +       struct cfq_group *cfqg = cic_to_cfqg(cic);
>>>        struct cfq_queue **async_cfqq = NULL;
>>>        struct cfq_queue *cfqq = NULL;
>>>
>>>        if (!is_sync) {
>>> -               async_cfqq = cfq_async_queue_prio(cfqd, ioprio_class,
>>> ioprio);
>>> +               async_cfqq = cfq_async_queue_prio(cfqg, ioprio_class,
>>> ioprio);
>>>                cfqq = *async_cfqq;
>>>        }
>>>
>>>        if (!cfqq)
>>> -               cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc,
>>> gfp_mask);
>>> +               cfqq = cfq_find_alloc_queue(cfqd, is_sync, cic,
>>> gfp_mask);
>>>
>>>        /*
>>>         * pin the queue now that it's allocated, scheduler exit will
>>> prune it
>>> @@ -2977,19 +3032,19 @@ cfq_get_queue(struct cfq_data *cfqd, bool
>>> is_sync, struct io_context *ioc,
>>>  * We drop cfq io contexts lazily, so we may find a dead one.
>>>  */
>>>  static void
>>> -cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc,
>>> -                 struct cfq_io_context *cic)
>>> +cfq_drop_dead_cic(struct cfq_data *cfqd, struct cfq_group *cfqg,
>>> +                 struct io_context *ioc, struct cfq_io_context *cic)
>>>  {
>>>        unsigned long flags;
>>>
>>> -       WARN_ON(!list_empty(&cic->queue_list));
>>> -       BUG_ON(cic->key != cfqd_dead_key(cfqd));
>>> +       WARN_ON(!list_empty(&cic->group_list));
>>> +       BUG_ON(cic->key != cfqg_dead_key(cfqg));
>>>
>>>        spin_lock_irqsave(&ioc->lock, flags);
>>>
>>>        BUG_ON(ioc->ioc_data == cic);
>>>
>>> -       radix_tree_delete(&ioc->radix_root, cfqd->cic_index);
>>> +       radix_tree_delete(&ioc->radix_root, cfqg->cic_index);
>>>        hlist_del_rcu(&cic->cic_list);
>>>        spin_unlock_irqrestore(&ioc->lock, flags);
>>>
>>> @@ -2997,11 +3052,14 @@ cfq_drop_dead_cic(struct cfq_data *cfqd, struct
>>> io_context *ioc,
>>>  }
>>>
>>>  static struct cfq_io_context *
>>> -cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
>>> +cfq_cic_lookup(struct cfq_data *cfqd, struct cfq_group *cfqg,
>>> +              struct io_context *ioc)
>>>  {
>>>        struct cfq_io_context *cic;
>>>        unsigned long flags;
>>>
>>> +       if (!cfqg)
>>> +               return NULL;
>>>        if (unlikely(!ioc))
>>>                return NULL;
>>>
>>> @@ -3011,18 +3069,18 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct
>>> io_context *ioc)
>>>         * we maintain a last-hit cache, to avoid browsing over the tree
>>>         */
>>>        cic = rcu_dereference(ioc->ioc_data);
>>> -       if (cic&&  cic->key == cfqd) {
>>> +       if (cic&&  cic->key == cfqg) {
>>>                rcu_read_unlock();
>>>                return cic;
>>>        }
>>>
>>>        do {
>>> -               cic = radix_tree_lookup(&ioc->radix_root,
>>> cfqd->cic_index);
>>> +               cic = radix_tree_lookup(&ioc->radix_root,
>>> cfqg->cic_index);
>>>                rcu_read_unlock();
>>>                if (!cic)
>>>                        break;
>>> -               if (unlikely(cic->key != cfqd)) {
>>> -                       cfq_drop_dead_cic(cfqd, ioc, cic);
>>> +               if (unlikely(cic->key != cfqg)) {
>>> +                       cfq_drop_dead_cic(cfqd, cfqg, ioc, cic);
>>>                        rcu_read_lock();
>>>                        continue;
>>>                }
>>> @@ -3037,24 +3095,25 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct
>>> io_context *ioc)
>>>  }
>>>
>>>  /*
>>> - * Add cic into ioc, using cfqd as the search key. This enables us to
>>> lookup
>>> + * Add cic into ioc, using cfqg as the search key. This enables us to
>>> lookup
>>>  * the process specific cfq io context when entered from the block layer.
>>> - * Also adds the cic to a per-cfqd list, used when this queue is
>>> removed.
>>> + * Also adds the cic to a per-cfqg list, used when the group is removed.
>>> + * request_queue lock must be held.
>>>  */
>>> -static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
>>> -                       struct cfq_io_context *cic, gfp_t gfp_mask)
>>> +static int cfq_cic_link(struct cfq_data *cfqd, struct cfq_group *cfqg,
>>> +                       struct io_context *ioc, struct cfq_io_context
>>> *cic)
>>>  {
>>>        unsigned long flags;
>>>        int ret;
>>>
>>> -       ret = radix_tree_preload(gfp_mask);
>>> +       ret = radix_tree_preload(GFP_ATOMIC);
>>>        if (!ret) {
>>>                cic->ioc = ioc;
>>> -               cic->key = cfqd;
>>> +               cic->key = cfqg;
>>>
>>>                spin_lock_irqsave(&ioc->lock, flags);
>>>                ret = radix_tree_insert(&ioc->radix_root,
>>> -                                               cfqd->cic_index, cic);
>>> +                                               cfqg->cic_index, cic);
>>>                if (!ret)
>>>                        hlist_add_head_rcu(&cic->cic_list,&ioc->cic_list);
>>>                spin_unlock_irqrestore(&ioc->lock, flags);
>>> @@ -3062,9 +3121,9 @@ static int cfq_cic_link(struct cfq_data *cfqd,
>>> struct io_context *ioc,
>>>                radix_tree_preload_end();
>>>
>>>                if (!ret) {
>>> -                       spin_lock_irqsave(cfqd->queue->queue_lock,
>>> flags);
>>> -                       list_add(&cic->queue_list,&cfqd->cic_list);
>>> -                       spin_unlock_irqrestore(cfqd->queue->queue_lock,
>>> flags);
>>> +                       spin_lock_irqsave(&cfqg->lock, flags);
>>> +                       list_add(&cic->group_list,&cfqg->cic_list);
>>> +                       spin_unlock_irqrestore(&cfqg->lock, flags);
>>>                }
>>>        }
>>>
>>> @@ -3080,10 +3139,12 @@ static int cfq_cic_link(struct cfq_data *cfqd,
>>> struct io_context *ioc,
>>>  * than one device managed by cfq.
>>>  */
>>>  static struct cfq_io_context *
>>> -cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
>>> +cfq_get_io_context(struct cfq_data *cfqd, struct bio *bio, gfp_t
>>> gfp_mask)
>>>  {
>>>        struct io_context *ioc = NULL;
>>>        struct cfq_io_context *cic;
>>> +       struct cfq_group *cfqg, *cfqg2;
>>> +       unsigned long flags;
>>>
>>>        might_sleep_if(gfp_mask&  __GFP_WAIT);
>>>
>>> @@ -3091,18 +3152,38 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t
>>> gfp_mask)
>>>        if (!ioc)
>>>                return NULL;
>>>
>>> -       cic = cfq_cic_lookup(cfqd, ioc);
>>> +       spin_lock_irqsave(cfqd->queue->queue_lock, flags);
>>> +retry_cfqg:
>>> +       cfqg = cfq_get_cfqg(cfqd, bio, 1);
>>> +retry_cic:
>>> +       cic = cfq_cic_lookup(cfqd, cfqg, ioc);
>>>        if (cic)
>>>                goto out;
>>> +       spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
>>>
>>>        cic = cfq_alloc_io_context(cfqd, gfp_mask);
>>>        if (cic == NULL)
>>>                goto err;
>>>
>>> -       if (cfq_cic_link(cfqd, ioc, cic, gfp_mask))
>>> +       spin_lock_irqsave(cfqd->queue->queue_lock, flags);
>>> +
>>> +       /* check the consistency breakage during unlock period */
>>> +       cfqg2 = cfq_get_cfqg(cfqd, bio, 0);
>>> +       if (cfqg != cfqg2) {
>>> +               cfq_cic_free(cic);
>>> +               if (!cfqg2)
>>> +                       goto retry_cfqg;
>>> +               else {
>>> +                       cfqg = cfqg2;
>>> +                       goto retry_cic;
>>> +               }
>>> +       }
>>> +
>>> +       if (cfq_cic_link(cfqd, cfqg, ioc, cic))
>>>                goto err_free;
>>>
>>>  out:
>>> +       spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
>>>        smp_read_barrier_depends();
>>>        if (unlikely(ioc->ioprio_changed))
>>>                cfq_ioc_set_ioprio(ioc);
>>> @@ -3113,6 +3194,7 @@ out:
>>>  #endif
>>>        return cic;
>>>  err_free:
>>> +       spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
>>>        cfq_cic_free(cic);
>>>  err:
>>>        put_io_context(ioc);
>>> @@ -3537,6 +3619,7 @@ static inline int __cfq_may_queue(struct cfq_queue
>>> *cfqq)
>>>  static int cfq_may_queue(struct request_queue *q, struct bio *bio, int
>>> rw)
>>>  {
>>>        struct cfq_data *cfqd = q->elevator->elevator_data;
>>> +       struct cfq_group *cfqg;
>>>        struct task_struct *tsk = current;
>>>        struct cfq_io_context *cic;
>>>        struct cfq_queue *cfqq;
>>> @@ -3547,7 +3630,8 @@ static int cfq_may_queue(struct request_queue *q,
>>> struct bio *bio, int rw)
>>>         * so just lookup a possibly existing queue, or return 'may queue'
>>>         * if that fails
>>>         */
>>> -       cic = cfq_cic_lookup(cfqd, tsk->io_context);
>>> +       cfqg = cfq_get_cfqg(cfqd, bio, 0);
>>> +       cic = cfq_cic_lookup(cfqd, cfqg, tsk->io_context);
>>>        if (!cic)
>>>                return ELV_MQUEUE_MAY;
>>>
>>> @@ -3636,7 +3720,7 @@ cfq_set_request(struct request_queue *q, struct
>>> request *rq, struct bio *bio,
>>>
>>>        might_sleep_if(gfp_mask&  __GFP_WAIT);
>>>
>>> -       cic = cfq_get_io_context(cfqd, gfp_mask);
>>> +       cic = cfq_get_io_context(cfqd, bio, gfp_mask);
>>>
>>>        spin_lock_irqsave(q->queue_lock, flags);
>>>
>>> @@ -3646,7 +3730,7 @@ cfq_set_request(struct request_queue *q, struct
>>> request *rq, struct bio *bio,
>>>  new_queue:
>>>        cfqq = cic_to_cfqq(cic, is_sync);
>>>        if (!cfqq || cfqq ==&cfqd->oom_cfqq) {
>>> -               cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask);
>>> +               cfqq = cfq_get_queue(cfqd, is_sync, cic, gfp_mask);
>>>                cic_set_cfqq(cic, cfqq, is_sync);
>>>        } else {
>>>                /*
>>> @@ -3762,19 +3846,19 @@ static void cfq_shutdown_timer_wq(struct cfq_data
>>> *cfqd)
>>>        cancel_work_sync(&cfqd->unplug_work);
>>>  }
>>>
>>> -static void cfq_put_async_queues(struct cfq_data *cfqd)
>>> +static void cfq_put_async_queues(struct cfq_group *cfqg)
>>>  {
>>>        int i;
>>>
>>>        for (i = 0; i<  IOPRIO_BE_NR; i++) {
>>> -               if (cfqd->async_cfqq[0][i])
>>> -                       cfq_put_queue(cfqd->async_cfqq[0][i]);
>>> -               if (cfqd->async_cfqq[1][i])
>>> -                       cfq_put_queue(cfqd->async_cfqq[1][i]);
>>> +               if (cfqg->async_cfqq[0][i])
>>> +                       cfq_put_queue(cfqg->async_cfqq[0][i]);
>>> +               if (cfqg->async_cfqq[1][i])
>>> +                       cfq_put_queue(cfqg->async_cfqq[1][i]);
>>>        }
>>>
>>> -       if (cfqd->async_idle_cfqq)
>>> -               cfq_put_queue(cfqd->async_idle_cfqq);
>>> +       if (cfqg->async_idle_cfqq)
>>> +               cfq_put_queue(cfqg->async_idle_cfqq);
>>>  }
>>>
>>>  static void cfq_cfqd_free(struct rcu_head *head)
>>> @@ -3794,15 +3878,6 @@ static void cfq_exit_queue(struct elevator_queue
>>> *e)
>>>        if (cfqd->active_queue)
>>>                __cfq_slice_expired(cfqd, cfqd->active_queue, 0);
>>>
>>> -       while (!list_empty(&cfqd->cic_list)) {
>>> -               struct cfq_io_context *cic =
>>> list_entry(cfqd->cic_list.next,
>>> -                                                       struct
>>> cfq_io_context,
>>> -                                                       queue_list);
>>> -
>>> -               __cfq_exit_single_io_context(cfqd, cic);
>>> -       }
>>> -
>>> -       cfq_put_async_queues(cfqd);
>>>        cfq_release_cfq_groups(cfqd);
>>>        cfq_blkiocg_del_blkio_group(&cfqd->root_group.blkg);
>>>
>>> @@ -3810,10 +3885,6 @@ static void cfq_exit_queue(struct elevator_queue
>>> *e)
>>>
>>>        cfq_shutdown_timer_wq(cfqd);
>>>
>>> -       spin_lock(&cic_index_lock);
>>> -       ida_remove(&cic_index_ida, cfqd->cic_index);
>>> -       spin_unlock(&cic_index_lock);
>>> -
>>>        /* Wait for cfqg->blkg->key accessors to exit their grace periods.
>>> */
>>>        call_rcu(&cfqd->rcu, cfq_cfqd_free);
>>>  }
>>> @@ -3823,7 +3894,7 @@ static int cfq_alloc_cic_index(void)
>>>        int index, error;
>>>
>>>        do {
>>> -               if (!ida_pre_get(&cic_index_ida, GFP_KERNEL))
>>> +               if (!ida_pre_get(&cic_index_ida, GFP_ATOMIC))
>>>                        return -ENOMEM;
>>>
>>>                spin_lock(&cic_index_lock);
>>> @@ -3839,20 +3910,18 @@ static int cfq_alloc_cic_index(void)
>>>  static void *cfq_init_queue(struct request_queue *q)
>>>  {
>>>        struct cfq_data *cfqd;
>>> -       int i, j;
>>> +       int i, j, idx;
>>>        struct cfq_group *cfqg;
>>>        struct cfq_rb_root *st;
>>>
>>> -       i = cfq_alloc_cic_index();
>>> -       if (i<  0)
>>> +       idx = cfq_alloc_cic_index();
>>> +       if (idx<  0)
>>>                return NULL;
>>>
>>>        cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO,
>>> q->node);
>>>        if (!cfqd)
>>>                return NULL;
>>>
>>> -       cfqd->cic_index = i;
>>> -
>>>        /* Init root service tree */
>>>        cfqd->grp_service_tree = CFQ_RB_ROOT;
>>>
>>> @@ -3861,6 +3930,9 @@ static void *cfq_init_queue(struct request_queue
>>> *q)
>>>        for_each_cfqg_st(cfqg, i, j, st)
>>>                *st = CFQ_RB_ROOT;
>>>        RB_CLEAR_NODE(&cfqg->rb_node);
>>> +       cfqg->cfqd = cfqd;
>>> +       cfqg->cic_index = idx;
>>> +       INIT_LIST_HEAD(&cfqg->cic_list);
>>>
>>>        /* Give preference to root group over other groups */
>>>        cfqg->weight = 2*BLKIO_WEIGHT_DEFAULT;
>>> @@ -3874,6 +3946,7 @@ static void *cfq_init_queue(struct request_queue
>>> *q)
>>>        rcu_read_lock();
>>>        cfq_blkiocg_add_blkio_group(&blkio_root_cgroup,&cfqg->blkg,
>>>                                        (void *)cfqd, 0);
>>> +       hlist_add_head(&cfqg->cfqd_node,&cfqd->cfqg_list);
>>>        rcu_read_unlock();
>>>  #endif
>>>        /*
>>> @@ -3893,8 +3966,6 @@ static void *cfq_init_queue(struct request_queue
>>> *q)
>>>        atomic_inc(&cfqd->oom_cfqq.ref);
>>>        cfq_link_cfqq_cfqg(&cfqd->oom_cfqq,&cfqd->root_group);
>>>
>>> -       INIT_LIST_HEAD(&cfqd->cic_list);
>>> -
>>>        cfqd->queue = q;
>>>
>>>        init_timer(&cfqd->idle_slice_timer);
>>> diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
>>> index 64d5291..6c05b54 100644
>>> --- a/include/linux/iocontext.h
>>> +++ b/include/linux/iocontext.h
>>> @@ -18,7 +18,7 @@ struct cfq_io_context {
>>>        unsigned long ttime_samples;
>>>        unsigned long ttime_mean;
>>>
>>> -       struct list_head queue_list;
>>> +       struct list_head group_list;
>>>        struct hlist_node cic_list;
>>>
>>>        void (*dtor)(struct io_context *); /* destructor */
>>> --
>>> 1.6.2.5
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-kernel"
>>> in
>>> the body of a message to majordomo@...r.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>> Please read the FAQ at  http://www.tux.org/lkml/
>>>
>>
>
> --
> IKEDA, Munehiro
>  NEC Corporation of America
>    m-ikeda@...jp.nec.com
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ