lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250429165018.112999-3-kwolf@redhat.com>
Date: Tue, 29 Apr 2025 18:50:18 +0200
From: Kevin Wolf <kwolf@...hat.com>
To: dm-devel@...ts.linux.dev
Cc: kwolf@...hat.com,
	hreitz@...hat.com,
	mpatocka@...hat.com,
	snitzer@...nel.org,
	bmarzins@...hat.com,
	linux-kernel@...r.kernel.org
Subject: [PATCH 2/2] dm mpath: Interface for explicit probing of active paths

Multipath cannot directly provide failover for ioctls in the kernel
because it doesn't know what each ioctl means and which result could
indicate a path error. Userspace generally knows what the ioctl it
issued means and if it might be a path error, but neither does it know
which path the ioctl took nor does it necessarily have the privileges to
fail a path using the control device.

In order to allow userspace to address this situation, implement a
DM_MPATH_PROBE_PATHS ioctl that prompts the dm-mpath driver to probe all
active paths in the current path group to see whether they still work,
and fail them if not. If this returns success, userspace can retry the
ioctl and expect that the previously hit bad path is now failed (or
working again).

The immediate motivation for this is the use of SG_IO in QEMU for SCSI
passthrough. Following a failed SG_IO ioctl, QEMU will trigger probing
to ensure that all active paths are actually alive, so that retrying
SG_IO at least has a lower chance of failing due to a path error.
However, the problem is broader than just SG_IO (it affects any ioctl),
and if applications need failover support for other ioctls, the same
probing can be used.

This is not implemented on the DM control device, but on the DM mpath
block devices, to allow all users who have access to such a block device
to make use of this interface, specifically to implement failover for
ioctls. For the same reason, it is also unprivileged. Its implementation
is effectively just a bunch of reads, which could already be issued by
userspace, just without any guarantee that all the rights paths are
selected.

The probing implemented here is done fully synchronously path by path;
probing all paths concurrently is left as an improvement for the future.

Co-developed-by: Hanna Czenczek <hreitz@...hat.com>
Signed-off-by: Hanna Czenczek <hreitz@...hat.com>
Signed-off-by: Kevin Wolf <kwolf@...hat.com>
---
 include/uapi/linux/dm-ioctl.h |  9 +++-
 drivers/md/dm-ioctl.c         |  1 +
 drivers/md/dm-mpath.c         | 91 ++++++++++++++++++++++++++++++++++-
 3 files changed, 98 insertions(+), 3 deletions(-)

diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h
index b08c7378164d..3225e025e30e 100644
--- a/include/uapi/linux/dm-ioctl.h
+++ b/include/uapi/linux/dm-ioctl.h
@@ -258,10 +258,12 @@ enum {
 	DM_DEV_SET_GEOMETRY_CMD,
 	DM_DEV_ARM_POLL_CMD,
 	DM_GET_TARGET_VERSION_CMD,
+	DM_MPATH_PROBE_PATHS_CMD,
 };
 
 #define DM_IOCTL 0xfd
 
+/* Control device ioctls */
 #define DM_VERSION       _IOWR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl)
 #define DM_REMOVE_ALL    _IOWR(DM_IOCTL, DM_REMOVE_ALL_CMD, struct dm_ioctl)
 #define DM_LIST_DEVICES  _IOWR(DM_IOCTL, DM_LIST_DEVICES_CMD, struct dm_ioctl)
@@ -285,10 +287,13 @@ enum {
 #define DM_TARGET_MSG	 _IOWR(DM_IOCTL, DM_TARGET_MSG_CMD, struct dm_ioctl)
 #define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
+/* Block device ioctls */
+#define DM_MPATH_PROBE_PATHS _IO(DM_IOCTL, DM_MPATH_PROBE_PATHS_CMD)
+
 #define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	49
+#define DM_VERSION_MINOR	50
 #define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2025-01-17)"
+#define DM_VERSION_EXTRA	"-ioctl (2025-04-28)"
 
 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index d42eac944eb5..4165fef4c170 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1885,6 +1885,7 @@ static ioctl_fn lookup_ioctl(unsigned int cmd, int *ioctl_flags)
 		{DM_DEV_SET_GEOMETRY_CMD, 0, dev_set_geometry},
 		{DM_DEV_ARM_POLL_CMD, IOCTL_FLAGS_NO_PARAMS, dev_arm_poll},
 		{DM_GET_TARGET_VERSION_CMD, 0, get_target_version},
+		{DM_MPATH_PROBE_PATHS_CMD, 0, NULL}, /* block device ioctl */
 	};
 
 	if (unlikely(cmd >= ARRAY_SIZE(_ioctls)))
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 909ed6890ba5..af17a35c6457 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -2021,6 +2021,85 @@ static int multipath_message(struct dm_target *ti, unsigned int argc, char **arg
 	return r;
 }
 
+/*
+ * Perform a minimal read from the given path to find out whether the
+ * path still works.  If a path error occurs, fail it.
+ */
+static int probe_path(struct pgpath *pgpath)
+{
+	struct block_device *bdev = pgpath->path.dev->bdev;
+	unsigned int read_size = bdev_logical_block_size(bdev);
+	struct page *page;
+	struct bio *bio;
+	blk_status_t status;
+	int r = 0;
+
+	if (WARN_ON_ONCE(read_size > PAGE_SIZE))
+		return -EINVAL;
+
+	page = alloc_page(GFP_KERNEL);
+	if (!page)
+		return -ENOMEM;
+
+	/* Perform a minimal read: Sector 0, length read_size */
+	bio = bio_alloc(bdev, 1, REQ_OP_READ, GFP_KERNEL);
+	if (!bio) {
+		r = -ENOMEM;
+		goto out;
+	}
+
+	bio->bi_iter.bi_sector = 0;
+	__bio_add_page(bio, page, read_size, 0);
+	submit_bio_wait(bio);
+	status = bio->bi_status;
+	bio_put(bio);
+
+	if (status && blk_path_error(status))
+		fail_path(pgpath);
+
+out:
+	__free_page(page);
+	return r;
+}
+
+/*
+ * Probe all active paths in current_pg to find out whether they still work.
+ * Fail all paths that do not work.
+ *
+ * Return -ENOTCONN if no valid path is left (even outside of current_pg). We
+ * cannot probe paths in other pgs without switching current_pg, so if valid
+ * paths are only in different pgs, they may or may not work. Userspace can
+ * submit a request and we'll switch. If the request fails, it may need to
+ * probe again.
+ */
+static int probe_active_paths(struct multipath *m)
+{
+	struct pgpath *pgpath;
+	struct priority_group *pg;
+	int r = 0;
+
+	mutex_lock(&m->work_mutex);
+
+	pg = READ_ONCE(m->current_pg);
+	if (pg) {
+		list_for_each_entry(pgpath, &pg->pgpaths, list) {
+			if (!pgpath->is_active)
+				continue;
+
+			r = probe_path(pgpath);
+			if (r < 0)
+				goto out;
+		}
+	}
+
+	if (!atomic_read(&m->nr_valid_paths))
+		r = -ENOTCONN;
+
+out:
+	mutex_unlock(&m->work_mutex);
+	return r;
+}
+
 static int multipath_prepare_ioctl(struct dm_target *ti,
 				   struct block_device **bdev,
 				   unsigned int cmd, unsigned long arg,
@@ -2031,6 +2110,16 @@ static int multipath_prepare_ioctl(struct dm_target *ti,
 	unsigned long flags;
 	int r;
 
+	if (_IOC_TYPE(cmd) == DM_IOCTL) {
+		*forward = false;
+		switch (cmd) {
+		case DM_MPATH_PROBE_PATHS:
+			return probe_active_paths(m);
+		default:
+			return -ENOTTY;
+		}
+	}
+
 	pgpath = READ_ONCE(m->current_pgpath);
 	if (!pgpath || !mpath_double_check_test_bit(MPATHF_QUEUE_IO, m))
 		pgpath = choose_pgpath(m, 0);
@@ -2182,7 +2271,7 @@ static int multipath_busy(struct dm_target *ti)
  */
 static struct target_type multipath_target = {
 	.name = "multipath",
-	.version = {1, 14, 0},
+	.version = {1, 15, 0},
 	.features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE |
 		    DM_TARGET_PASSES_INTEGRITY,
 	.module = THIS_MODULE,
-- 
2.49.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ