[<prev] [next>] [day] [month] [year] [list]
Message-Id: <20250418084443.7443-1-zhoujifeng@kylinos.com.cn>
Date: Fri, 18 Apr 2025 16:44:43 +0800
From: Zhou Jifeng <zhoujifeng@...inos.com.cn>
To: colyli@...nel.org,
kent.overstreet@...ux.dev
Cc: linux-bcache@...r.kernel.org,
linux-kernel@...r.kernel.org,
Zhou Jifeng <zhoujifeng@...inos.com.cn>
Subject: [PATCH] bcache: optimize dsync performance in writeback mode with deferred flush
Currently in writeback mode, bcache unconditionally propagates PREFLUSH
requests from upper layers to the backend storage devices. This causes
suboptimal performance for dsync operations due to excessive flush
operations on the backing device.
This patch introduces a deferred flush handling mechanism to improve
dsync efficiency while maintaining data integrity:
1. Delay passing the PREFLUSH request to the backend storage as much as
possible
2. Keep data cached in the cache device
3. Utilize FUA writes during dirty data writeback to ensure persistence
Applicable scenarios:
- Workloads demanding low-latency dsync operations
- Systems with frequent small synchronous writes
Test case:
dd if=/dev/zero of=/dev/bcache0 bs=32k count=200000 oflag=dsync
Hardware setup:
Cache device: Intel Optane SSD 900P
Backing device: ST4000NM000B HDD
Comparison:
- deferred FLUSH enabled
- deferred FLUSH disabled with sequential_cutoff=0
Result:
Enabling deferred FLUSH achieved 5× the performance of the second
scenario.
Signed-off-by: Zhou Jifeng <zhoujifeng@...inos.com.cn>
---
drivers/md/bcache/bcache_ondisk.h | 1 +
drivers/md/bcache/request.c | 19 +++++++++++++++++--
drivers/md/bcache/sysfs.c | 29 +++++++++++++++++++++++++++++
drivers/md/bcache/writeback.c | 7 +++++++
drivers/md/bcache/writeback.h | 3 +++
5 files changed, 57 insertions(+), 2 deletions(-)
diff --git a/drivers/md/bcache/bcache_ondisk.h b/drivers/md/bcache/bcache_ondisk.h
index 6620a7f8fffc..4895217a7fa6 100644
--- a/drivers/md/bcache/bcache_ondisk.h
+++ b/drivers/md/bcache/bcache_ondisk.h
@@ -294,6 +294,7 @@ BITMASK(BDEV_CACHE_MODE, struct cache_sb, flags, 0, 4);
#define CACHE_MODE_WRITEBACK 1U
#define CACHE_MODE_WRITEAROUND 2U
#define CACHE_MODE_NONE 3U
+BITMASK(BDEV_DEFERRED_FLUSH, struct cache_sb, flags, 4, 1);
BITMASK(BDEV_STATE, struct cache_sb, flags, 61, 2);
#define BDEV_STATE_NONE 0U
#define BDEV_STATE_CLEAN 1U
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index af345dc6fde1..7fd76bd49237 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -1030,7 +1030,11 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
bch_writeback_add(dc);
s->iop.bio = bio;
- if (bio->bi_opf & REQ_PREFLUSH) {
+ /* When DEFERRED_FLUSH is enabled, REQ_PREFLUSH is not sent
+ * to the backend disk. Data security is ensured during the
+ * writeback phase.
+ */
+ if ((bio->bi_opf & REQ_PREFLUSH) && !BDEV_DEFERRED_FLUSH(&dc->sb)) {
/*
* Also need to send a flush to the backing
* device.
@@ -1066,14 +1070,25 @@ static CLOSURE_CALLBACK(cached_dev_nodata)
{
closure_type(s, struct search, cl);
struct bio *bio = &s->bio.bio;
+ struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
- if (s->iop.flush_journal)
+ if (s->iop.flush_journal) {
bch_journal_meta(s->iop.c, cl);
+ /* When DEFERRED_FLUSH is turned on, the request is not sent
+ * to the backend disk.
+ */
+ if (BDEV_DEFERRED_FLUSH(&dc->sb)) {
+ s->iop.status = BLK_STS_OK;
+ goto end;
+ }
+ }
+
/* If it's a flush, we send the flush to the backing device too */
bio->bi_end_io = backing_request_endio;
closure_bio_submit(s->iop.c, bio, cl);
+end:
continue_at(cl, cached_dev_bio_complete, NULL);
}
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index e8f696cb58c0..dff84f5bb184 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -151,6 +151,7 @@ rw_attribute(copy_gc_enabled);
rw_attribute(idle_max_writeback_rate);
rw_attribute(gc_after_writeback);
rw_attribute(size);
+rw_attribute(deferred_flush);
static ssize_t bch_snprint_string_list(char *buf,
size_t size,
@@ -283,6 +284,8 @@ SHOW(__bch_cached_dev)
return strlen(buf);
}
+ sysfs_print(deferred_flush, BDEV_DEFERRED_FLUSH(&dc->sb));
+
#undef var
return 0;
}
@@ -295,6 +298,7 @@ STORE(__cached_dev)
ssize_t v;
struct cache_set *c;
struct kobj_uevent_env *env;
+ struct bio flush;
/* no user space access if system is rebooting */
if (bcache_is_reboot)
@@ -383,6 +387,12 @@ STORE(__cached_dev)
SET_BDEV_CACHE_MODE(&dc->sb, v);
bch_write_bdev_super(dc, NULL);
}
+
+ /* It's not the writeback mode that can't enable deferred_flush */
+ if (BDEV_DEFERRED_FLUSH(&dc->sb) && ((unsigned int) v != CACHE_MODE_WRITEBACK)) {
+ SET_BDEV_DEFERRED_FLUSH(&dc->sb, 0);
+ bch_write_bdev_super(dc, NULL);
+ }
}
if (attr == &sysfs_readahead_cache_policy) {
@@ -451,6 +461,24 @@ STORE(__cached_dev)
if (attr == &sysfs_stop)
bcache_device_stop(&dc->disk);
+ if (attr == &sysfs_deferred_flush) {
+ bool deferred_flush = strtoul_or_return(buf);
+
+ if (deferred_flush != BDEV_DEFERRED_FLUSH(&dc->sb)) {
+ if (deferred_flush && (BDEV_CACHE_MODE(&dc->sb) != CACHE_MODE_WRITEBACK)) {
+ pr_err("It's not the writeback mode that can't enable deferred_flush.\n");
+ return size;
+ }
+ SET_BDEV_DEFERRED_FLUSH(&dc->sb, deferred_flush);
+ bch_write_bdev_super(dc, NULL);
+ if (deferred_flush) {
+ bio_init(&flush, dc->bdev, NULL, 0, REQ_OP_WRITE | REQ_PREFLUSH);
+ /* I/O request sent to backing device */
+ submit_bio_wait(&flush);
+ }
+ }
+ }
+
return size;
}
@@ -541,6 +569,7 @@ static struct attribute *bch_cached_dev_attrs[] = {
#endif
&sysfs_backing_dev_name,
&sysfs_backing_dev_uuid,
+ &sysfs_deferred_flush,
NULL
};
ATTRIBUTE_GROUPS(bch_cached_dev);
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 453efbbdc8ee..68bf655f3b96 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -435,6 +435,13 @@ static CLOSURE_CALLBACK(write_dirty)
if (KEY_DIRTY(&w->key)) {
dirty_init(w);
io->bio.bi_opf = REQ_OP_WRITE;
+
+ /* When DEFERRED_FLUSH is enabled, you need to ensure that
+ * data is flushed to disk.
+ */
+ if (BDEV_DEFERRED_FLUSH(&dc->sb))
+ io->bio.bi_opf |= REQ_FUA | REQ_SYNC | REQ_PREFLUSH;
+
io->bio.bi_iter.bi_sector = KEY_START(&w->key);
bio_set_dev(&io->bio, io->dc->bdev);
io->bio.bi_end_io = dirty_endio;
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
index 31df716951f6..1dbecf89fdd3 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@ -117,6 +117,9 @@ static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
bio_sectors(bio)))
return true;
+ if (BDEV_DEFERRED_FLUSH(&dc->sb))
+ return true;
+
if (would_skip)
return false;
--
2.18.1
Powered by blists - more mailing lists