lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Sun, 12 Jul 2015 14:00:38 -0400
From:	Tejun Heo <tj@...nel.org>
To:	axboe@...nel.dk
Cc:	linux-kernel@...r.kernel.org, kernel-team@...com,
	vgoyal@...hat.com, avanzini.arianna@...il.com,
	Tejun Heo <tj@...nel.org>
Subject: [PATCH 05/10] blkcg: consolidate blkg creation in blkcg_bio_issue_check()

blkg (blkcg_gq) currently is created by blkcg policies invoking
blkg_lookup_create() which ends up repeating about the same code in
different policies.  Theoretically, this can avoid the overhead of
looking and/or creating blkg's if blkcg is enabled but no policy is in
use; however, the cost of blkg lookup / creation is very low
especially if only the root blkcg is in use which is highly likely if
no blkcg policy is in active use - it boils down to a single very
predictable conditional and surrounding RCU protection.

This patch consolidates blkg creation to a new function
blkcg_bio_issue_check() which is called during bio issue from
generic_make_request_checks().  blkcg_bio_issue_check() is now the
only function which tries to create missing blkg's.  The subsequent
policy and request_list operations just perform blkg_lookup() and if
missing falls back to the root.

* blk_get_rl() no longer tries to create blkg.  It uses blkg_lookup()
  instead of blkg_lookup_create().

* blk_throtl_bio() is now called from blkcg_bio_issue_check() with rcu
  read locked and blkg already looked up.  Both throtl_lookup_tg() and
  throtl_lookup_create_tg() are dropped.

* cfq is similarly updated.  cfq_lookup_create_cfqg() is dropped and
  cfq_get_queue() now directly uses blkg_lookup() and falls back to
  oom_cfqg on failure.

This consolidates blkg handling and avoids unnecessary blkg creation
retries under memory pressure.  In addition, this provides a common
bio entry point into blkcg where things like common accounting can be
performed.

Signed-off-by: Tejun Heo <tj@...nel.org>
Cc: Vivek Goyal <vgoyal@...hat.com>
Cc: Arianna Avanzini <avanzini.arianna@...il.com>
---
 block/blk-cgroup.c         |  2 +-
 block/blk-core.c           |  4 +--
 block/blk-throttle.c       | 72 ++++------------------------------------------
 block/blk.h                |  5 ----
 block/cfq-iosched.c        | 37 ++++--------------------
 include/linux/blk-cgroup.h | 40 ++++++++++++++++++++++++--
 6 files changed, 52 insertions(+), 108 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index c817da0..5bab2a0 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -153,6 +153,7 @@ struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
 
 	return NULL;
 }
+EXPORT_SYMBOL_GPL(blkg_lookup_slowpath);
 
 /*
  * If @new_blkg is %NULL, this function tries to allocate a new one as
@@ -295,7 +296,6 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
 			return blkg;
 	}
 }
-EXPORT_SYMBOL_GPL(blkg_lookup_create);
 
 static void blkg_destroy(struct blkcg_gq *blkg)
 {
diff --git a/block/blk-core.c b/block/blk-core.c
index 627ed0c..140094f 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1889,8 +1889,8 @@ generic_make_request_checks(struct bio *bio)
 	 */
 	create_io_context(GFP_ATOMIC, q->node);
 
-	if (blk_throtl_bio(q, bio))
-		return false;	/* throttled, will be resubmitted later */
+	if (!blkcg_bio_issue_check(q, bio))
+		return false;
 
 	trace_block_bio_queue(q, bio);
 	return true;
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 900a777..29c22ed 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -182,11 +182,6 @@ static inline struct blkcg_gq *tg_to_blkg(struct throtl_grp *tg)
 	return pd_to_blkg(&tg->pd);
 }
 
-static inline struct throtl_grp *td_root_tg(struct throtl_data *td)
-{
-	return blkg_to_tg(td->queue->root_blkg);
-}
-
 /**
  * sq_to_tg - return the throl_grp the specified service queue belongs to
  * @sq: the throtl_service_queue of interest
@@ -449,39 +444,6 @@ static void throtl_pd_reset_stats(struct blkg_policy_data *pd)
 	}
 }
 
-static struct throtl_grp *throtl_lookup_tg(struct throtl_data *td,
-					   struct blkcg *blkcg)
-{
-	return blkg_to_tg(blkg_lookup(blkcg, td->queue));
-}
-
-static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td,
-						  struct blkcg *blkcg)
-{
-	struct request_queue *q = td->queue;
-	struct throtl_grp *tg = NULL;
-
-	/*
-	 * This is the common case when there are no blkcgs.  Avoid lookup
-	 * in this case
-	 */
-	if (blkcg == &blkcg_root) {
-		tg = td_root_tg(td);
-	} else {
-		struct blkcg_gq *blkg;
-
-		blkg = blkg_lookup_create(blkcg, q);
-
-		/* if %NULL and @q is alive, fall back to root_tg */
-		if (!IS_ERR(blkg))
-			tg = blkg_to_tg(blkg);
-		else
-			tg = td_root_tg(td);
-	}
-
-	return tg;
-}
-
 static struct throtl_grp *
 throtl_rb_first(struct throtl_service_queue *parent_sq)
 {
@@ -1403,46 +1365,26 @@ static struct blkcg_policy blkcg_policy_throtl = {
 	.pd_reset_stats_fn	= throtl_pd_reset_stats,
 };
 
-bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
+bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
+		    struct bio *bio)
 {
-	struct throtl_data *td = q->td;
 	struct throtl_qnode *qn = NULL;
-	struct throtl_grp *tg;
+	struct throtl_grp *tg = blkg_to_tg(blkg ?: q->root_blkg);
 	struct throtl_service_queue *sq;
 	bool rw = bio_data_dir(bio);
-	struct blkcg *blkcg;
 	bool throttled = false;
 
+	WARN_ON_ONCE(!rcu_read_lock_held());
+
 	/* see throtl_charge_bio() */
-	if (bio->bi_rw & REQ_THROTTLED)
+	if ((bio->bi_rw & REQ_THROTTLED) || !tg->has_rules[rw])
 		goto out;
 
-	/*
-	 * A throtl_grp pointer retrieved under rcu can be used to access
-	 * basic fields like stats and io rates. If a group has no rules,
-	 * just update the dispatch stats in lockless manner and return.
-	 */
-	rcu_read_lock();
-	blkcg = bio_blkcg(bio);
-	tg = throtl_lookup_tg(td, blkcg);
-	if (tg) {
-		if (!tg->has_rules[rw]) {
-			throtl_update_dispatch_stats(tg_to_blkg(tg),
-					bio->bi_iter.bi_size, bio->bi_rw);
-			goto out_unlock_rcu;
-		}
-	}
-
-	/*
-	 * Either group has not been allocated yet or it is not an unlimited
-	 * IO group
-	 */
 	spin_lock_irq(q->queue_lock);
 
 	if (unlikely(blk_queue_bypass(q)))
 		goto out_unlock;
 
-	tg = throtl_lookup_create_tg(td, blkcg);
 	sq = &tg->service_queue;
 
 	while (true) {
@@ -1507,8 +1449,6 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
 
 out_unlock:
 	spin_unlock_irq(q->queue_lock);
-out_unlock_rcu:
-	rcu_read_unlock();
 out:
 	/*
 	 * As multiple blk-throtls may stack in the same issue path, we
diff --git a/block/blk.h b/block/blk.h
index 026d959..d905b26 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -266,15 +266,10 @@ static inline struct io_context *create_io_context(gfp_t gfp_mask, int node)
  * Internal throttling interface
  */
 #ifdef CONFIG_BLK_DEV_THROTTLING
-extern bool blk_throtl_bio(struct request_queue *q, struct bio *bio);
 extern void blk_throtl_drain(struct request_queue *q);
 extern int blk_throtl_init(struct request_queue *q);
 extern void blk_throtl_exit(struct request_queue *q);
 #else /* CONFIG_BLK_DEV_THROTTLING */
-static inline bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
-{
-	return false;
-}
 static inline void blk_throtl_drain(struct request_queue *q) { }
 static inline int blk_throtl_init(struct request_queue *q) { return 0; }
 static inline void blk_throtl_exit(struct request_queue *q) { }
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index a4429b3..b73e353 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1683,30 +1683,6 @@ static void cfq_pd_reset_stats(struct blkg_policy_data *pd)
 	cfqg_stats_reset(&cfqg->dead_stats);
 }
 
-/*
- * Search for the cfq group current task belongs to. request_queue lock must
- * be held.
- */
-static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd,
-						struct blkcg *blkcg)
-{
-	struct request_queue *q = cfqd->queue;
-	struct cfq_group *cfqg = NULL;
-
-	/* avoid lookup for the common case where there's no blkcg */
-	if (blkcg == &blkcg_root) {
-		cfqg = cfqd->root_group;
-	} else {
-		struct blkcg_gq *blkg;
-
-		blkg = blkg_lookup_create(blkcg, q);
-		if (!IS_ERR(blkg))
-			cfqg = blkg_to_cfqg(blkg);
-	}
-
-	return cfqg;
-}
-
 static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
 {
 	cfqq->cfqg = cfqg;
@@ -2108,12 +2084,6 @@ static struct cftype cfq_blkcg_files[] = {
 	{ }	/* terminate */
 };
 #else /* GROUP_IOSCHED */
-static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd,
-						struct blkcg *blkcg)
-{
-	return cfqd->root_group;
-}
-
 static inline void
 cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) {
 	cfqq->cfqg = cfqg;
@@ -3711,16 +3681,19 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic,
 {
 	int ioprio_class = IOPRIO_PRIO_CLASS(cic->ioprio);
 	int ioprio = IOPRIO_PRIO_DATA(cic->ioprio);
+	struct blkcg_gq *blkg;
 	struct cfq_queue **async_cfqq = NULL;
 	struct cfq_queue *cfqq;
 	struct cfq_group *cfqg;
 
 	rcu_read_lock();
-	cfqg = cfq_lookup_create_cfqg(cfqd, bio_blkcg(bio));
-	if (!cfqg) {
+
+	blkg = blkg_lookup(bio_blkcg(bio), cfqd->queue);
+	if (!blkg) {
 		cfqq = &cfqd->oom_cfqq;
 		goto out;
 	}
+	cfqg = blkg_to_cfqg(blkg);
 
 	if (!is_sync) {
 		if (!ioprio_valid(cic->ioprio)) {
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 0609bce..ca9432f 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -421,8 +421,8 @@ static inline struct request_list *blk_get_rl(struct request_queue *q,
 	 * or if either the blkcg or queue is going away.  Fall back to
 	 * root_rl in such cases.
 	 */
-	blkg = blkg_lookup_create(blkcg, q);
-	if (unlikely(IS_ERR(blkg)))
+	blkg = blkg_lookup(blkcg, q);
+	if (unlikely(!blkg))
 		goto root_rl;
 
 	blkg_get(blkg);
@@ -636,6 +636,39 @@ static inline void blkg_rwstat_merge(struct blkg_rwstat *to,
 	u64_stats_update_end(&to->syncp);
 }
 
+#ifdef CONFIG_BLK_DEV_THROTTLING
+extern bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
+			   struct bio *bio);
+#else
+static inline bool blk_throtl_bio(struct request_queue *q, struct blkgc_gq *blkg,
+				  struct bio *bio) { return false; }
+#endif
+
+static inline bool blkcg_bio_issue_check(struct request_queue *q,
+					 struct bio *bio)
+{
+	struct blkcg *blkcg;
+	struct blkcg_gq *blkg;
+	bool throtl = false;
+
+	rcu_read_lock();
+	blkcg = bio_blkcg(bio);
+
+	blkg = blkg_lookup(blkcg, q);
+	if (unlikely(!blkg)) {
+		spin_lock_irq(q->queue_lock);
+		blkg = blkg_lookup_create(blkcg, q);
+		if (IS_ERR(blkg))
+			blkg = NULL;
+		spin_unlock_irq(q->queue_lock);
+	}
+
+	throtl = blk_throtl_bio(q, blkg, bio);
+
+	rcu_read_unlock();
+	return !throtl;
+}
+
 #else	/* CONFIG_BLK_CGROUP */
 
 struct blkcg {
@@ -689,6 +722,9 @@ static inline void blk_put_rl(struct request_list *rl) { }
 static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { }
 static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; }
 
+static inline bool blkcg_bio_issue_check(struct request_queue *q,
+					 struct bio *bio) { return true; }
+
 #define blk_queue_for_each_rl(rl, q)	\
 	for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
 
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ