lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250307090152.4095551-1-yukuai1@huaweicloud.com>
Date: Fri,  7 Mar 2025 17:01:52 +0800
From: Yu Kuai <yukuai1@...weicloud.com>
To: ming.lei@...hat.com,
	axboe@...nel.dk,
	tj@...nel.org,
	josef@...icpanda.com
Cc: linux-block@...r.kernel.org,
	linux-kernel@...r.kernel.org,
	cgroups@...r.kernel.org,
	yukuai3@...wei.com,
	yukuai1@...weicloud.com,
	yi.zhang@...wei.com,
	yangerkun@...wei.com
Subject: [PATCH] blk-throttle: support io merge over iops_limit

From: Yu Kuai <yukuai3@...wei.com>

Commit 9f5ede3c01f9 ("block: throttle split bio in case of iops limit")
support to account split IO for iops limit, because block layer provides
io accounting against split bio.

However, io merge is still not handled, while block layer doesn't
account merged io for iops. Fix this problem by decreasing io_disp
if bio is merged, and following IO can use the extra budget. If io merge
concurrent with iops throttling, it's not handled if one more or one
less bio is dispatched, this is fine because as long as new slice is not
started, blk-throttle already preserve one extra slice for deviation,
and it's not worth it to handle the case that iops_limit rate is less than
one per slice.

A regression test will be added for this case [1], before this patch,
the test will fail:

+++ /root/blktests-mainline/results/nodev/throtl/007.out.bad
@@ -1,4 +1,4 @@
 Running throtl/007
 1
-1
+11
 Test complete

[1] https://lore.kernel.org/all/20250307080318.3860858-2-yukuai1@huaweicloud.com/

Signed-off-by: Yu Kuai <yukuai3@...wei.com>
---
 block/blk-merge.c    |  3 +++
 block/blk-throttle.c | 20 ++++++++++----------
 block/blk-throttle.h | 19 +++++++++++++++++--
 3 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/block/blk-merge.c b/block/blk-merge.c
index 15cd231d560c..8dc7add7c31e 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -988,6 +988,7 @@ enum bio_merge_status bio_attempt_back_merge(struct request *req,
 
 	trace_block_bio_backmerge(bio);
 	rq_qos_merge(req->q, req, bio);
+	blk_throtl_bio_merge(req->q, bio);
 
 	if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
 		blk_rq_set_mixed_merge(req);
@@ -1025,6 +1026,7 @@ static enum bio_merge_status bio_attempt_front_merge(struct request *req,
 
 	trace_block_bio_frontmerge(bio);
 	rq_qos_merge(req->q, req, bio);
+	blk_throtl_bio_merge(req->q, bio);
 
 	if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
 		blk_rq_set_mixed_merge(req);
@@ -1055,6 +1057,7 @@ static enum bio_merge_status bio_attempt_discard_merge(struct request_queue *q,
 		goto no_merge;
 
 	rq_qos_merge(q, req, bio);
+	blk_throtl_bio_merge(q, bio);
 
 	req->biotail->bi_next = bio;
 	req->biotail = bio;
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 91dab43c65ab..9fd613c79caa 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -477,7 +477,7 @@ static inline void throtl_start_new_slice_with_credit(struct throtl_grp *tg,
 		bool rw, unsigned long start)
 {
 	tg->bytes_disp[rw] = 0;
-	tg->io_disp[rw] = 0;
+	atomic_set(&tg->io_disp[rw], 0);
 
 	/*
 	 * Previous slice has expired. We must have trimmed it after last
@@ -500,7 +500,7 @@ static inline void throtl_start_new_slice(struct throtl_grp *tg, bool rw,
 {
 	if (clear) {
 		tg->bytes_disp[rw] = 0;
-		tg->io_disp[rw] = 0;
+		atomic_set(&tg->io_disp[rw], 0);
 	}
 	tg->slice_start[rw] = jiffies;
 	tg->slice_end[rw] = jiffies + tg->td->throtl_slice;
@@ -623,10 +623,10 @@ static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
 	else
 		tg->bytes_disp[rw] = 0;
 
-	if ((int)tg->io_disp[rw] >= io_trim)
-		tg->io_disp[rw] -= io_trim;
+	if (atomic_read(&tg->io_disp[rw]) >= io_trim)
+		atomic_sub(io_trim, &tg->io_disp[rw]);
 	else
-		tg->io_disp[rw] = 0;
+		atomic_set(&tg->io_disp[rw], 0);
 
 	tg->slice_start[rw] += time_elapsed;
 
@@ -655,9 +655,9 @@ static void __tg_update_carryover(struct throtl_grp *tg, bool rw,
 			tg->bytes_disp[rw];
 	if (iops_limit != UINT_MAX)
 		*ios = calculate_io_allowed(iops_limit, jiffy_elapsed) -
-			tg->io_disp[rw];
+			atomic_read(&tg->io_disp[rw]);
 	tg->bytes_disp[rw] -= *bytes;
-	tg->io_disp[rw] -= *ios;
+	atomic_sub(*ios, &tg->io_disp[rw]);
 }
 
 static void tg_update_carryover(struct throtl_grp *tg)
@@ -691,7 +691,7 @@ static unsigned long tg_within_iops_limit(struct throtl_grp *tg, struct bio *bio
 	/* Round up to the next throttle slice, wait time must be nonzero */
 	jiffy_elapsed_rnd = roundup(jiffy_elapsed + 1, tg->td->throtl_slice);
 	io_allowed = calculate_io_allowed(iops_limit, jiffy_elapsed_rnd);
-	if (io_allowed > 0 && tg->io_disp[rw] + 1 <= io_allowed)
+	if (io_allowed > 0 && atomic_read(&tg->io_disp[rw]) + 1 <= io_allowed)
 		return 0;
 
 	/* Calc approx time to dispatch */
@@ -815,7 +815,7 @@ static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
 	if (!bio_flagged(bio, BIO_BPS_THROTTLED))
 		tg->bytes_disp[rw] += bio_size;
 
-	tg->io_disp[rw]++;
+	atomic_inc(&tg->io_disp[rw]);
 }
 
 /**
@@ -1679,7 +1679,7 @@ bool __blk_throtl_bio(struct bio *bio)
 		   rw == READ ? 'R' : 'W',
 		   tg->bytes_disp[rw], bio->bi_iter.bi_size,
 		   tg_bps_limit(tg, rw),
-		   tg->io_disp[rw], tg_iops_limit(tg, rw),
+		   atomic_read(&tg->io_disp[rw]), tg_iops_limit(tg, rw),
 		   sq->nr_queued[READ], sq->nr_queued[WRITE]);
 
 	td->nr_queued[rw]++;
diff --git a/block/blk-throttle.h b/block/blk-throttle.h
index 7964cc041e06..7e5f50c6bb19 100644
--- a/block/blk-throttle.h
+++ b/block/blk-throttle.h
@@ -104,7 +104,7 @@ struct throtl_grp {
 	/* Number of bytes dispatched in current slice */
 	int64_t bytes_disp[2];
 	/* Number of bio's dispatched in current slice */
-	int io_disp[2];
+	atomic_t io_disp[2];
 
 	/*
 	 * The following two fields are updated when new configuration is
@@ -144,6 +144,8 @@ static inline struct throtl_grp *blkg_to_tg(struct blkcg_gq *blkg)
 static inline void blk_throtl_exit(struct gendisk *disk) { }
 static inline bool blk_throtl_bio(struct bio *bio) { return false; }
 static inline void blk_throtl_cancel_bios(struct gendisk *disk) { }
+static inline void blk_throtl_bio_merge(struct request_queue *q,
+					struct bio *bio) { }
 #else /* CONFIG_BLK_DEV_THROTTLING */
 void blk_throtl_exit(struct gendisk *disk);
 bool __blk_throtl_bio(struct bio *bio);
@@ -189,12 +191,25 @@ static inline bool blk_should_throtl(struct bio *bio)
 
 static inline bool blk_throtl_bio(struct bio *bio)
 {
-
 	if (!blk_should_throtl(bio))
 		return false;
 
 	return __blk_throtl_bio(bio);
 }
+
+static inline void blk_throtl_bio_merge(struct request_queue *q,
+					struct bio *bio)
+{
+	struct throtl_grp *tg;
+	int rw = bio_data_dir(bio);
+
+	if (!blk_throtl_activated(bio->bi_bdev->bd_queue))
+		return;
+
+	tg = blkg_to_tg(bio->bi_blkg);
+	if (tg->has_rules_iops[rw])
+		atomic_dec(&tg->io_disp[rw]);
+}
 #endif /* CONFIG_BLK_DEV_THROTTLING */
 
 #endif
-- 
2.39.2


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ