lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20120217011907.GA15073@google.com>
Date:	Thu, 16 Feb 2012 17:19:07 -0800
From:	Kent Overstreet <koverstreet@...gle.com>
To:	Tejun Heo <tj@...nel.org>
Cc:	axboe@...nel.dk, vgoyal@...hat.com, ctalbott@...gle.com,
	rni@...gle.com, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 7/9] block: implement bio_associate_current()

On Thu, Feb 16, 2012 at 02:37:56PM -0800, Tejun Heo wrote:
> This patch implements bio_associate_current() which associates the
> specified bio with %current.  The bio will record the associated ioc
> and blkcg at that point and block layer will use the recorded ones
> regardless of which task actually ends up issuing the bio.  bio
> release puts the associated ioc and blkcg.

Excellent.

Why not have bio_associate_current() called from submit_bio()? I would
expect that's what we want most of the time, and the places it's not
(mainly writeback) calling it before submit_bio() would do the right
thing.

It'd make things more consistent - rq_ioc() could be dropped, and
incorrect usage would be more obvious.

> It grabs and remembers ioc and blkcg instead of the task itself
> because task may already be dead by the time the bio is issued making
> ioc and blkcg inaccessible and those are all block layer cares about.
> 
> elevator_set_req_fn() is updated such that the bio elvdata is being
> allocated for is available to the elevator.
> 
> This doesn't update block cgroup policies yet.  Further patches will
> implement the support.
> 
> Signed-off-by: Tejun Heo <tj@...nel.org>
> Cc: Vivek Goyal <vgoyal@...hat.com>
> Cc: Kent Overstreet <koverstreet@...gle.com>
> ---
>  block/blk-core.c          |   30 +++++++++++++++++-----
>  block/cfq-iosched.c       |    3 +-
>  block/elevator.c          |    5 ++-
>  fs/bio.c                  |   61 +++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/bio.h       |    8 ++++++
>  include/linux/blk_types.h |   10 +++++++
>  include/linux/elevator.h  |    6 +++-
>  7 files changed, 111 insertions(+), 12 deletions(-)
> 
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 195c5f7..e6a4f90 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -695,7 +695,7 @@ static inline void blk_free_request(struct request_queue *q, struct request *rq)
>  }
>  
>  static struct request *
> -blk_alloc_request(struct request_queue *q, struct io_cq *icq,
> +blk_alloc_request(struct request_queue *q, struct bio *bio, struct io_cq *icq,
>  		  unsigned int flags, gfp_t gfp_mask)
>  {
>  	struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
> @@ -709,7 +709,7 @@ blk_alloc_request(struct request_queue *q, struct io_cq *icq,
>  
>  	if (flags & REQ_ELVPRIV) {
>  		rq->elv.icq = icq;
> -		if (unlikely(elv_set_request(q, rq, gfp_mask))) {
> +		if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) {
>  			mempool_free(rq, q->rq.rq_pool);
>  			return NULL;
>  		}
> @@ -809,6 +809,20 @@ static bool blk_rq_should_init_elevator(struct bio *bio)
>  }
>  
>  /**
> + * rq_ioc - determine io_context for request allocation
> + * @bio: request being allocated is for this bio (can be %NULL)
> + *
> + * Determine io_context to use for request allocation for @bio.  May return
> + * %NULL if %current->io_context doesn't exist.
> + */
> +static struct io_context *rq_ioc(struct bio *bio)
> +{
> +	if (bio && bio->bi_ioc)
> +		return bio->bi_ioc;
> +	return current->io_context;
> +}
> +
> +/**
>   * get_request - get a free request
>   * @q: request_queue to allocate request from
>   * @rw_flags: RW and SYNC flags
> @@ -835,7 +849,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
>  	int may_queue;
>  retry:
>  	et = q->elevator->type;
> -	ioc = current->io_context;
> +	ioc = rq_ioc(bio);
>  
>  	if (unlikely(blk_queue_dead(q)))
>  		return NULL;
> @@ -918,14 +932,16 @@ retry:
>  
>  	/* create icq if missing */
>  	if ((rw_flags & REQ_ELVPRIV) && unlikely(et->icq_cache && !icq)) {
> -		ioc = create_io_context(gfp_mask, q->node);
> -		if (ioc)
> -			icq = ioc_create_icq(ioc, q, gfp_mask);
> +		create_io_context(gfp_mask, q->node);
> +		ioc = rq_ioc(bio);
> +		if (!ioc)
> +			goto fail_alloc;
> +		icq = ioc_create_icq(ioc, q, gfp_mask);
>  		if (!icq)
>  			goto fail_alloc;
>  	}
>  
> -	rq = blk_alloc_request(q, icq, rw_flags, gfp_mask);
> +	rq = blk_alloc_request(q, bio, icq, rw_flags, gfp_mask);
>  	if (unlikely(!rq))
>  		goto fail_alloc;
>  
> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
> index 00e28a3..b2aabe8 100644
> --- a/block/cfq-iosched.c
> +++ b/block/cfq-iosched.c
> @@ -3299,7 +3299,8 @@ split_cfqq(struct cfq_io_cq *cic, struct cfq_queue *cfqq)
>   * Allocate cfq data structures associated with this request.
>   */
>  static int
> -cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
> +cfq_set_request(struct request_queue *q, struct request *rq, struct bio *bio,
> +		gfp_t gfp_mask)
>  {
>  	struct cfq_data *cfqd = q->elevator->elevator_data;
>  	struct cfq_io_cq *cic = icq_to_cic(rq->elv.icq);
> diff --git a/block/elevator.c b/block/elevator.c
> index 06d9869..6315a27 100644
> --- a/block/elevator.c
> +++ b/block/elevator.c
> @@ -663,12 +663,13 @@ struct request *elv_former_request(struct request_queue *q, struct request *rq)
>  	return NULL;
>  }
>  
> -int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
> +int elv_set_request(struct request_queue *q, struct request *rq,
> +		    struct bio *bio, gfp_t gfp_mask)
>  {
>  	struct elevator_queue *e = q->elevator;
>  
>  	if (e->type->ops.elevator_set_req_fn)
> -		return e->type->ops.elevator_set_req_fn(q, rq, gfp_mask);
> +		return e->type->ops.elevator_set_req_fn(q, rq, bio, gfp_mask);
>  	return 0;
>  }
>  
> diff --git a/fs/bio.c b/fs/bio.c
> index b980ecd..142214b 100644
> --- a/fs/bio.c
> +++ b/fs/bio.c
> @@ -19,12 +19,14 @@
>  #include <linux/swap.h>
>  #include <linux/bio.h>
>  #include <linux/blkdev.h>
> +#include <linux/iocontext.h>
>  #include <linux/slab.h>
>  #include <linux/init.h>
>  #include <linux/kernel.h>
>  #include <linux/module.h>
>  #include <linux/mempool.h>
>  #include <linux/workqueue.h>
> +#include <linux/cgroup.h>
>  #include <scsi/sg.h>		/* for struct sg_iovec */
>  
>  #include <trace/events/block.h>
> @@ -418,6 +420,7 @@ void bio_put(struct bio *bio)
>  	 * last put frees it
>  	 */
>  	if (atomic_dec_and_test(&bio->bi_cnt)) {
> +		bio_disassociate_task(bio);
>  		bio->bi_next = NULL;
>  		bio->bi_destructor(bio);
>  	}
> @@ -1641,6 +1644,64 @@ bad:
>  }
>  EXPORT_SYMBOL(bioset_create);
>  
> +#ifdef CONFIG_BLK_CGROUP
> +/**
> + * bio_associate_current - associate a bio with %current
> + * @bio: target bio
> + *
> + * Associate @bio with %current if it hasn't been associated yet.  Block
> + * layer will treat @bio as if it were issued by %current no matter which
> + * task actually issues it.
> + *
> + * This function takes an extra reference of @task's io_context and blkcg
> + * which will be put when @bio is released.  The caller must own @bio,
> + * ensure %current->io_context exists, and is responsible for synchronizing
> + * calls to this function.
> + */
> +int bio_associate_current(struct bio *bio)
> +{
> +	struct io_context *ioc;
> +	struct cgroup_subsys_state *css;
> +
> +	if (bio->bi_ioc)
> +		return -EBUSY;
> +
> +	ioc = current->io_context;
> +	if (!ioc)
> +		return -ENOENT;
> +
> +	/* acquire active ref on @ioc and associate */
> +	get_io_context_active(ioc);
> +	bio->bi_ioc = ioc;
> +
> +	/* associate blkcg if exists */
> +	rcu_read_lock();
> +	css = task_subsys_state(current, blkio_subsys_id);
> +	if (css && css_tryget(css))
> +		bio->bi_css = css;
> +	rcu_read_unlock();
> +
> +	return 0;
> +}
> +
> +/**
> + * bio_disassociate_task - undo bio_associate_current()
> + * @bio: target bio
> + */
> +void bio_disassociate_task(struct bio *bio)
> +{
> +	if (bio->bi_ioc) {
> +		put_io_context(bio->bi_ioc);
> +		bio->bi_ioc = NULL;
> +	}
> +	if (bio->bi_css) {
> +		css_put(bio->bi_css);
> +		bio->bi_css = NULL;
> +	}
> +}
> +
> +#endif /* CONFIG_BLK_CGROUP */
> +
>  static void __init biovec_init_slabs(void)
>  {
>  	int i;
> diff --git a/include/linux/bio.h b/include/linux/bio.h
> index 129a9c0..692d3d5 100644
> --- a/include/linux/bio.h
> +++ b/include/linux/bio.h
> @@ -268,6 +268,14 @@ extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set
>  extern void bvec_free_bs(struct bio_set *, struct bio_vec *, unsigned int);
>  extern unsigned int bvec_nr_vecs(unsigned short idx);
>  
> +#ifdef CONFIG_BLK_CGROUP
> +int bio_associate_current(struct bio *bio);
> +void bio_disassociate_task(struct bio *bio);
> +#else	/* CONFIG_BLK_CGROUP */
> +static inline int bio_associate_current(struct bio *bio) { return -ENOENT; }
> +static inline void bio_disassociate_task(struct bio *bio) { }
> +#endif	/* CONFIG_BLK_CGROUP */
> +
>  /*
>   * bio_set is used to allow other portions of the IO system to
>   * allocate their own private memory pools for bio and iovec structures.
> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
> index 4053cbd..0edb65d 100644
> --- a/include/linux/blk_types.h
> +++ b/include/linux/blk_types.h
> @@ -14,6 +14,8 @@ struct bio;
>  struct bio_integrity_payload;
>  struct page;
>  struct block_device;
> +struct io_context;
> +struct cgroup_subsys_state;
>  typedef void (bio_end_io_t) (struct bio *, int);
>  typedef void (bio_destructor_t) (struct bio *);
>  
> @@ -66,6 +68,14 @@ struct bio {
>  	bio_end_io_t		*bi_end_io;
>  
>  	void			*bi_private;
> +#ifdef CONFIG_BLK_CGROUP
> +	/*
> +	 * Optional ioc and css associated with this bio.  Put on bio
> +	 * release.  Read comment on top of bio_associate_current().
> +	 */
> +	struct io_context	*bi_ioc;
> +	struct cgroup_subsys_state *bi_css;
> +#endif
>  #if defined(CONFIG_BLK_DEV_INTEGRITY)
>  	struct bio_integrity_payload *bi_integrity;  /* data integrity */
>  #endif
> diff --git a/include/linux/elevator.h b/include/linux/elevator.h
> index 97fb255..c03af76 100644
> --- a/include/linux/elevator.h
> +++ b/include/linux/elevator.h
> @@ -28,7 +28,8 @@ typedef int (elevator_may_queue_fn) (struct request_queue *, int);
>  
>  typedef void (elevator_init_icq_fn) (struct io_cq *);
>  typedef void (elevator_exit_icq_fn) (struct io_cq *);
> -typedef int (elevator_set_req_fn) (struct request_queue *, struct request *, gfp_t);
> +typedef int (elevator_set_req_fn) (struct request_queue *, struct request *,
> +				   struct bio *, gfp_t);
>  typedef void (elevator_put_req_fn) (struct request *);
>  typedef void (elevator_activate_req_fn) (struct request_queue *, struct request *);
>  typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct request *);
> @@ -129,7 +130,8 @@ extern void elv_unregister_queue(struct request_queue *q);
>  extern int elv_may_queue(struct request_queue *, int);
>  extern void elv_abort_queue(struct request_queue *);
>  extern void elv_completed_request(struct request_queue *, struct request *);
> -extern int elv_set_request(struct request_queue *, struct request *, gfp_t);
> +extern int elv_set_request(struct request_queue *q, struct request *rq,
> +			   struct bio *bio, gfp_t gfp_mask);
>  extern void elv_put_request(struct request_queue *, struct request *);
>  extern void elv_drain_elevator(struct request_queue *);
>  
> -- 
> 1.7.7.3
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ