[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <Yi/Us896/ftt5l4f@slm.duckdns.org>
Date: Mon, 14 Mar 2022 13:50:11 -1000
From: Tejun Heo <tj@...nel.org>
To: Jens Axboe <axboe@...nel.dk>
Cc: linux-block@...r.kernel.org, kernel-team@...com,
linux-kernel@...r.kernel.org, Josef Bacik <josef@...icpanda.com>
Subject: [PATCH v2 for-5.18/block] block: don't merge across cgroup
boundaries if blkcg is enabled
blk-iocost and iolatency are cgroup aware rq-qos policies but they didn't
disable merges across different cgroups. This obviously can lead to
accounting and control errors but more importantly to priority inversions -
e.g. an IO which belongs to a higher priority cgroup or IO class may end up
getting throttled incorrectly because it gets merged to an IO issued from a
low priority cgroup.
Fix it by adding blk_cgroup_mergeable() which is called from merge paths and
rejects cross-cgroup and cross-issue_as_root merges.
v2: Dropped conditional enabling. Always disallow cross-blkcg merges for
simpilcity. While this may spuriously prevent some merges for cases
where blkcg is enabled but no control is applied, that is a small cross
section.
Signed-off-by: Tejun Heo <tj@...nel.org>
Fixes: d70675121546 ("block: introduce blk-iolatency io controller")
Cc: stable@...r.kernel.org # v4.19+
Cc: Josef Bacik <jbacik@...com>
---
block/blk-merge.c | 11 +++++++++++
include/linux/blk-cgroup.h | 17 +++++++++++++++++
2 files changed, 28 insertions(+)
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -9,6 +9,7 @@
#include <linux/blk-integrity.h>
#include <linux/scatterlist.h>
#include <linux/part_stat.h>
+#include <linux/blk-cgroup.h>
#include <trace/events/block.h>
@@ -600,6 +601,9 @@ static inline unsigned int blk_rq_get_ma
static inline int ll_new_hw_segment(struct request *req, struct bio *bio,
unsigned int nr_phys_segs)
{
+ if (!blk_cgroup_mergeable(req, bio))
+ goto no_merge;
+
if (blk_integrity_merge_bio(req->q, req, bio) == false)
goto no_merge;
@@ -696,6 +700,9 @@ static int ll_merge_requests_fn(struct r
if (total_phys_segments > blk_rq_get_max_segments(req))
return 0;
+ if (!blk_cgroup_mergeable(req, next->bio))
+ return 0;
+
if (blk_integrity_merge_rq(q, req, next) == false)
return 0;
@@ -904,6 +911,10 @@ bool blk_rq_merge_ok(struct request *rq,
if (bio_data_dir(bio) != rq_data_dir(rq))
return false;
+ /* don't merge across cgroup boundaries */
+ if (!blk_cgroup_mergeable(rq, bio))
+ return false;
+
/* only merge integrity protected bio into ditto rq */
if (blk_integrity_merge_bio(rq->q, rq, bio) == false)
return false;
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -21,6 +21,7 @@
#include <linux/seq_file.h>
#include <linux/radix-tree.h>
#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
#include <linux/atomic.h>
#include <linux/kthread.h>
#include <linux/fs.h>
@@ -604,6 +605,21 @@ static inline void blkcg_clear_delay(str
atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
}
+/**
+ * blk_cgroup_mergeable - Determine whether to allow or disallow merges
+ * @rq: request to merge into
+ * @bio: bio to merge
+ *
+ * @bio and @rq should belong to the same cgroup and their issue_as_root should
+ * match. The latter is necessary as we don't want to throttle e.g. a metadata
+ * update because it happens to be next to a regular IO.
+ */
+static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio)
+{
+ return rq->bio->bi_blkg == bio->bi_blkg &&
+ bio_issue_as_root_blkg(rq->bio) == bio_issue_as_root_blkg(bio);
+}
+
void blk_cgroup_bio_start(struct bio *bio);
void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta);
void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay);
@@ -659,6 +675,7 @@ static inline void blkg_put(struct blkcg
static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; }
static inline void blkcg_bio_issue_init(struct bio *bio) { }
static inline void blk_cgroup_bio_start(struct bio *bio) { }
+static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { return true; }
#define blk_queue_for_each_rl(rl, q) \
for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
Powered by blists - more mailing lists