lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250307134933.1033872-8-kent.overstreet@linux.dev>
Date: Fri,  7 Mar 2025 08:49:31 -0500
From: Kent Overstreet <kent.overstreet@...ux.dev>
To: linux-bcachefs@...r.kernel.org,
	linux-kernel@...r.kernel.org
Cc: Kent Overstreet <kent.overstreet@...ux.dev>
Subject: [PATCH 7/7] bcachefs: Implement freeze/thaw

This fills out our blk_holders_ops with freeze and thaw callbacks, for
shutting down IO (generally during a system suspend).

This is implemented completely differently as on other filesystems
since we have a low level synchronization object which conveniently
works well for us - bch_dev.io_ref, normally used for guarding against a
device being offlined while in use.

bch2_dev_get_ioref() now checks if a freeze is in progress if it fails
to get ca->io_ref, and sleeps until complete and ca->io_ref is alive.

We also need a bit of synchronization for freeze/suspend vs. device
online/offline, done with the new bch_dev.io_ref_statechange_lock.

Signed-off-by: Kent Overstreet <kent.overstreet@...ux.dev>
---
 fs/bcachefs/bcachefs.h   |  3 ++
 fs/bcachefs/journal_io.c |  2 +-
 fs/bcachefs/sb-members.c | 49 ++++++++++++++++++++++
 fs/bcachefs/sb-members.h | 20 +--------
 fs/bcachefs/super.c      | 87 +++++++++++++++++++++++++++++++++++++---
 5 files changed, 136 insertions(+), 25 deletions(-)

diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index d2c3f59a668f..d03aa62907ad 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -526,6 +526,9 @@ struct bch_dev {
 	struct completion	ref_completion;
 	struct percpu_ref	io_ref;
 	struct completion	io_ref_completion;
+	struct mutex		io_ref_statechange_lock;
+	unsigned		frozen;
+	wait_queue_head_t	frozen_wait;
 
 	struct bch_fs		*fs;
 
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index a510755a8364..6979fef5c128 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -1769,7 +1769,7 @@ static CLOSURE_CALLBACK(journal_write_submit)
 		struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, WRITE);
 		if (!ca) {
 			/* XXX: fix this */
-			bch_err(c, "missing device for journal write\n");
+			bch_err(c, "missing device for journal write");
 			continue;
 		}
 
diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c
index 116131f95815..2363367cb32d 100644
--- a/fs/bcachefs/sb-members.c
+++ b/fs/bcachefs/sb-members.c
@@ -9,6 +9,55 @@
 #include "sb-members.h"
 #include "super-io.h"
 
+/*
+ * Use of bch2_dev_get_ioref() is subject to deadlocks if used incorrectly, and
+ * we cannot write asserts for correct usage, so: pay attention, because this is
+ * where we implement freeze.
+ *
+ * Waiting on an outstanding freeze to complete will indirectly wait on all
+ * other outstanding io_refs to be released. That means:
+ *
+ * - Don't use bch2_dev_get_ioref() if you already have an io_ref, use
+ *   percpu_ref_get(). Since dev_get_ioref() has tryget() semantics, that's what
+ *   you should be doing anyways.
+ *
+ * - All io_refs must be released without blocking on locks that might be held
+ *   while calling dev_get_ioref(). This is easy to obey since we generally
+ *   release io_refs from endio functions.
+ *
+ */
+struct bch_dev *bch2_dev_get_ioref(struct bch_fs *c, unsigned dev, int rw)
+{
+	might_sleep();
+again:
+	rcu_read_lock();
+	struct bch_dev *ca = bch2_dev_rcu(c, dev);
+	if (likely(ca)) {
+		if (unlikely(!percpu_ref_tryget(&ca->io_ref))) {
+			smp_mb();
+			if (ca->frozen) {
+				bch2_dev_get(ca);
+				rcu_read_unlock();
+
+				wait_event(ca->frozen_wait, !ca->frozen);
+				bch2_dev_put(ca);
+				goto again;
+			}
+			ca = NULL;
+		}
+	}
+	rcu_read_unlock();
+
+	if (ca &&
+	    (ca->mi.state == BCH_MEMBER_STATE_rw ||
+	    (ca->mi.state == BCH_MEMBER_STATE_ro && rw == READ)))
+		return ca;
+
+	if (ca)
+		percpu_ref_put(&ca->io_ref);
+	return NULL;
+}
+
 void bch2_dev_missing(struct bch_fs *c, unsigned dev)
 {
 	if (dev != BCH_SB_MEMBER_INVALID)
diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h
index df91b02ce575..b3359ee63b0e 100644
--- a/fs/bcachefs/sb-members.h
+++ b/fs/bcachefs/sb-members.h
@@ -281,25 +281,7 @@ static inline struct bch_dev *bch2_dev_iterate(struct bch_fs *c, struct bch_dev
 	return bch2_dev_tryget(c, dev_idx);
 }
 
-static inline struct bch_dev *bch2_dev_get_ioref(struct bch_fs *c, unsigned dev, int rw)
-{
-	might_sleep();
-
-	rcu_read_lock();
-	struct bch_dev *ca = bch2_dev_rcu(c, dev);
-	if (ca && !percpu_ref_tryget(&ca->io_ref))
-		ca = NULL;
-	rcu_read_unlock();
-
-	if (ca &&
-	    (ca->mi.state == BCH_MEMBER_STATE_rw ||
-	    (ca->mi.state == BCH_MEMBER_STATE_ro && rw == READ)))
-		return ca;
-
-	if (ca)
-		percpu_ref_put(&ca->io_ref);
-	return NULL;
-}
+struct bch_dev *bch2_dev_get_ioref(struct bch_fs *, unsigned, int);
 
 /* XXX kill, move to struct bch_fs */
 static inline struct bch_devs_mask bch2_online_devs(struct bch_fs *c)
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 05a2dc5ef513..dfdeab7d847c 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -1236,6 +1236,22 @@ static void bch2_dev_free(struct bch_dev *ca)
 	kobject_put(&ca->kobj);
 }
 
+static void bch2_dev_io_ref_stop(struct bch_dev *ca)
+{
+	lockdep_assert_held(&ca->io_ref_statechange_lock);
+
+	reinit_completion(&ca->io_ref_completion);
+	percpu_ref_kill(&ca->io_ref);
+	wait_for_completion(&ca->io_ref_completion);
+}
+
+static void bch2_dev_io_ref_start(struct bch_dev *ca)
+{
+	lockdep_assert_held(&ca->io_ref_statechange_lock);
+
+	percpu_ref_reinit(&ca->io_ref);
+}
+
 static void __bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca)
 {
 
@@ -1246,13 +1262,14 @@ static void __bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca)
 
 	__bch2_dev_read_only(c, ca);
 
-	reinit_completion(&ca->io_ref_completion);
-	percpu_ref_kill(&ca->io_ref);
-	wait_for_completion(&ca->io_ref_completion);
-
 	bch2_dev_unlink(ca);
 
+	mutex_lock(&ca->io_ref_statechange_lock);
+	bch2_dev_io_ref_stop(ca);
+
 	bch2_free_super(&ca->disk_sb);
+	mutex_unlock(&ca->io_ref_statechange_lock);
+
 	bch2_dev_journal_exit(ca);
 }
 
@@ -1334,6 +1351,8 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
 	kobject_init(&ca->kobj, &bch2_dev_ktype);
 	init_completion(&ca->ref_completion);
 	init_completion(&ca->io_ref_completion);
+	mutex_init(&ca->io_ref_statechange_lock);
+	init_waitqueue_head(&ca->frozen_wait);
 
 	INIT_WORK(&ca->io_error_work, bch2_io_error_work);
 
@@ -1428,6 +1447,8 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb)
 	if (ret)
 		return ret;
 
+	mutex_lock(&ca->io_ref_statechange_lock);
+
 	/* Commit: */
 	ca->disk_sb = *sb;
 	memset(sb, 0, sizeof(*sb));
@@ -1441,7 +1462,9 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb)
 
 	ca->dev = ca->disk_sb.bdev->bd_dev;
 
-	percpu_ref_reinit(&ca->io_ref);
+	if (!ca->frozen)
+		bch2_dev_io_ref_start(ca);
+	mutex_unlock(&ca->io_ref_statechange_lock);
 
 	return 0;
 }
@@ -2115,9 +2138,63 @@ static void bch2_fs_bdev_sync(struct block_device *bdev)
 	bch2_ro_ref_put(c);
 }
 
+static int bch2_fs_bdev_freeze(struct block_device *bdev)
+{
+	int ret = -EINVAL;
+	struct bch_fs *c = bdev_get_fs(bdev);
+	if (!c)
+		return ret;
+
+	struct bch_dev *ca = bdev_to_bch_dev(c, bdev);
+	if (!ca)
+		goto err;
+
+	mutex_lock(&ca->io_ref_statechange_lock);
+	ca->frozen++;
+	smp_mb();
+	bch2_dev_io_ref_stop(ca);
+	mutex_unlock(&ca->io_ref_statechange_lock);
+
+	ret = sync_blockdev(bdev);
+
+	bch2_dev_put(ca);
+err:
+	bch2_ro_ref_put(c);
+	return ret;
+}
+
+static int bch2_fs_bdev_thaw(struct block_device *bdev)
+{
+	int ret = -EINVAL;
+	struct bch_fs *c = bdev_get_fs(bdev);
+	if (!c)
+		return ret;
+
+	struct bch_dev *ca = bdev_to_bch_dev(c, bdev);
+	if (!ca)
+		goto err;
+
+	mutex_lock(&ca->io_ref_statechange_lock);
+	if (ca->disk_sb.bdev &&
+	    ca->frozen == 1)
+		bch2_dev_io_ref_start(ca);
+	--ca->frozen;
+	wake_up(&ca->frozen_wait);
+	mutex_unlock(&ca->io_ref_statechange_lock);
+
+	ret = 0;
+
+	bch2_dev_put(ca);
+err:
+	bch2_ro_ref_put(c);
+	return ret;
+}
+
 const struct blk_holder_ops bch2_sb_handle_bdev_ops = {
 	.mark_dead		= bch2_fs_bdev_mark_dead,
 	.sync			= bch2_fs_bdev_sync,
+	.freeze			= bch2_fs_bdev_freeze,
+	.thaw			= bch2_fs_bdev_thaw,
 };
 
 /* Filesystem open: */
-- 
2.47.2


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ