[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250630191341.1263000-1-richard@nod.at>
Date: Mon, 30 Jun 2025 21:13:41 +0200
From: Richard Weinberger <richard@....at>
To: linux-nvme@...ts.infradead.org
Cc: linux-kernel@...r.kernel.org,
kch@...dia.com,
sagi@...mberg.me,
hch@....de,
dlemoal@...nel.org,
upstream+nvme@...ma-star.at,
Richard Weinberger <richard@....at>
Subject: [PATCH v2] nvmet: Make blksize_shift configurable
Currently, the block size is automatically configured, and for
file-backed namespaces it is likely to be 4K.
While this is a reasonable default for modern storage, it can
cause confusion if someone wants to export a pre-created disk image
that uses a 512-byte block size.
As a result, partition parsing will fail.
So, just like we already do for the loop block device, let the user
configure the block size if they know better.
Signed-off-by: Richard Weinberger <richard@....at>
---
Changes since v1 (RFC)[0]:
- Make sure blksize_shift is in general within reason
- In the bdev case and when using direct IO, blksize_shift has to be
smaller than the logical block it the device
- In the file case and when using direct IO try to use STATX_DIOALIGN,
just like the loop device does
[0] https://lore.kernel.org/linux-nvme/20250418090834.2755289-1-richard@nod.at/
Thanks,
//richard
---
drivers/nvme/target/configfs.c | 37 +++++++++++++++++++++++++++++++
drivers/nvme/target/io-cmd-bdev.c | 13 ++++++++++-
drivers/nvme/target/io-cmd-file.c | 28 ++++++++++++++++++-----
3 files changed, 71 insertions(+), 7 deletions(-)
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index e44ef69dffc24..26175c37374ab 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -797,6 +797,42 @@ static ssize_t nvmet_ns_resv_enable_store(struct config_item *item,
}
CONFIGFS_ATTR(nvmet_ns_, resv_enable);
+static ssize_t nvmet_ns_blksize_shift_show(struct config_item *item, char *page)
+{
+ return sysfs_emit(page, "%u\n", to_nvmet_ns(item)->blksize_shift);
+}
+
+static ssize_t nvmet_ns_blksize_shift_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nvmet_ns *ns = to_nvmet_ns(item);
+ u32 shift;
+ int ret;
+
+ ret = kstrtou32(page, 0, &shift);
+ if (ret)
+ return ret;
+
+ /*
+ * Make sure block size is within reason, something between 512 and
+ * BLK_MAX_BLOCK_SIZE.
+ */
+ if (shift < 9 || shift > 16)
+ return -EINVAL;
+
+ mutex_lock(&ns->subsys->lock);
+ if (ns->enabled) {
+ pr_err("the ns:%d is already enabled.\n", ns->nsid);
+ mutex_unlock(&ns->subsys->lock);
+ return -EINVAL;
+ }
+ ns->blksize_shift = shift;
+ mutex_unlock(&ns->subsys->lock);
+
+ return count;
+}
+CONFIGFS_ATTR(nvmet_ns_, blksize_shift);
+
static struct configfs_attribute *nvmet_ns_attrs[] = {
&nvmet_ns_attr_device_path,
&nvmet_ns_attr_device_nguid,
@@ -806,6 +842,7 @@ static struct configfs_attribute *nvmet_ns_attrs[] = {
&nvmet_ns_attr_buffered_io,
&nvmet_ns_attr_revalidate_size,
&nvmet_ns_attr_resv_enable,
+ &nvmet_ns_attr_blksize_shift,
#ifdef CONFIG_PCI_P2PDMA
&nvmet_ns_attr_p2pmem,
#endif
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index eba42df2f8215..be39837d4d792 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -77,6 +77,7 @@ static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns *ns)
int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
{
+ int bdev_blksize_shift;
int ret;
/*
@@ -100,7 +101,17 @@ int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
}
ns->bdev = file_bdev(ns->bdev_file);
ns->size = bdev_nr_bytes(ns->bdev);
- ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
+ bdev_blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
+
+ if (ns->blksize_shift) {
+ if (ns->blksize_shift < bdev_blksize_shift) {
+ pr_err("Configured blksize_shift needs to be at least %d for device %s\n",
+ bdev_blksize_shift, ns->device_path);
+ return -EINVAL;
+ }
+ } else {
+ ns->blksize_shift = bdev_blksize_shift;
+ }
ns->pi_type = 0;
ns->metadata_size = 0;
diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
index 2d068439b129c..a4066b5a1dc97 100644
--- a/drivers/nvme/target/io-cmd-file.c
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -49,12 +49,28 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns)
nvmet_file_ns_revalidate(ns);
- /*
- * i_blkbits can be greater than the universally accepted upper bound,
- * so make sure we export a sane namespace lba_shift.
- */
- ns->blksize_shift = min_t(u8,
- file_inode(ns->file)->i_blkbits, 12);
+ if (ns->blksize_shift) {
+ if (!ns->buffered_io) {
+ struct block_device *sb_bdev = ns->file->f_mapping->host->i_sb->s_bdev;
+ struct kstat st;
+
+ if (!vfs_getattr(&ns->file->f_path, &st, STATX_DIOALIGN, 0) &&
+ (st.result_mask & STATX_DIOALIGN) &&
+ (1 << ns->blksize_shift) < st.dio_offset_align)
+ return -EINVAL;
+
+ if (sb_bdev && (1 << ns->blksize_shift < bdev_logical_block_size(sb_bdev)))
+ return -EINVAL;
+ }
+ } else {
+ /*
+ * i_blkbits can be greater than the universally accepted
+ * upper bound, so make sure we export a sane namespace
+ * lba_shift.
+ */
+ ns->blksize_shift = min_t(u8,
+ file_inode(ns->file)->i_blkbits, 12);
+ }
ns->bvec_pool = mempool_create(NVMET_MIN_MPOOL_OBJ, mempool_alloc_slab,
mempool_free_slab, nvmet_bvec_cache);
--
2.49.0
Powered by blists - more mailing lists