[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <cc03b826-5e0e-8535-627f-71ca3c07a7a6@oracle.com>
Date: Tue, 30 May 2023 12:17:22 -0700
From: Jane Chu <jane.chu@...cle.com>
To: dan.j.williams@...el.com, vishal.l.verma@...el.com,
dave.jiang@...el.com, ira.weiny@...el.com, willy@...radead.org,
viro@...iv.linux.org.uk, brauner@...nel.org,
nvdimm@...ts.linux.dev, linux-kernel@...r.kernel.org,
linux-fsdevel@...r.kernel.org
Subject: Re: [PATCH v4 1/1] dax: enable dax fault handler to report
VM_FAULT_HWPOISON
Ping... Is there any further concern?
-jane
On 5/8/2023 10:47 PM, Jane Chu wrote:
> When multiple processes mmap() a dax file, then at some point,
> a process issues a 'load' and consumes a hwpoison, the process
> receives a SIGBUS with si_code = BUS_MCEERR_AR and with si_lsb
> set for the poison scope. Soon after, any other process issues
> a 'load' to the poisoned page (that is unmapped from the kernel
> side by memory_failure), it receives a SIGBUS with
> si_code = BUS_ADRERR and without valid si_lsb.
>
> This is confusing to user, and is different from page fault due
> to poison in RAM memory, also some helpful information is lost.
>
> Channel dax backend driver's poison detection to the filesystem
> such that instead of reporting VM_FAULT_SIGBUS, it could report
> VM_FAULT_HWPOISON.
>
> If user level block IO syscalls fail due to poison, the errno will
> be converted to EIO to maintain block API consistency.
>
> Signed-off-by: Jane Chu <jane.chu@...cle.com>
> ---
> drivers/dax/super.c | 5 ++++-
> drivers/nvdimm/pmem.c | 2 +-
> drivers/s390/block/dcssblk.c | 3 ++-
> fs/dax.c | 11 ++++++-----
> fs/fuse/virtio_fs.c | 3 ++-
> include/linux/dax.h | 5 +++++
> include/linux/mm.h | 2 ++
> 7 files changed, 22 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/dax/super.c b/drivers/dax/super.c
> index c4c4728a36e4..0da9232ea175 100644
> --- a/drivers/dax/super.c
> +++ b/drivers/dax/super.c
> @@ -203,6 +203,8 @@ size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
> int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
> size_t nr_pages)
> {
> + int ret;
> +
> if (!dax_alive(dax_dev))
> return -ENXIO;
> /*
> @@ -213,7 +215,8 @@ int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
> if (nr_pages != 1)
> return -EIO;
>
> - return dax_dev->ops->zero_page_range(dax_dev, pgoff, nr_pages);
> + ret = dax_dev->ops->zero_page_range(dax_dev, pgoff, nr_pages);
> + return dax_mem2blk_err(ret);
> }
> EXPORT_SYMBOL_GPL(dax_zero_page_range);
>
> diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
> index ceea55f621cc..46e094e56159 100644
> --- a/drivers/nvdimm/pmem.c
> +++ b/drivers/nvdimm/pmem.c
> @@ -260,7 +260,7 @@ __weak long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
> long actual_nr;
>
> if (mode != DAX_RECOVERY_WRITE)
> - return -EIO;
> + return -EHWPOISON;
>
> /*
> * Set the recovery stride is set to kernel page size because
> diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
> index c09f2e053bf8..ee47ac520cd4 100644
> --- a/drivers/s390/block/dcssblk.c
> +++ b/drivers/s390/block/dcssblk.c
> @@ -54,7 +54,8 @@ static int dcssblk_dax_zero_page_range(struct dax_device *dax_dev,
> rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS,
> &kaddr, NULL);
> if (rc < 0)
> - return rc;
> + return dax_mem2blk_err(rc);
> +
> memset(kaddr, 0, nr_pages << PAGE_SHIFT);
> dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT);
> return 0;
> diff --git a/fs/dax.c b/fs/dax.c
> index 2ababb89918d..a26eb5abfdc0 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -1148,7 +1148,7 @@ static int dax_iomap_copy_around(loff_t pos, uint64_t length, size_t align_size,
> if (!zero_edge) {
> ret = dax_iomap_direct_access(srcmap, pos, size, &saddr, NULL);
> if (ret)
> - return ret;
> + return dax_mem2blk_err(ret);
> }
>
> if (copy_all) {
> @@ -1310,7 +1310,7 @@ static s64 dax_unshare_iter(struct iomap_iter *iter)
>
> out_unlock:
> dax_read_unlock(id);
> - return ret;
> + return dax_mem2blk_err(ret);
> }
>
> int dax_file_unshare(struct inode *inode, loff_t pos, loff_t len,
> @@ -1342,7 +1342,8 @@ static int dax_memzero(struct iomap_iter *iter, loff_t pos, size_t size)
> ret = dax_direct_access(iomap->dax_dev, pgoff, 1, DAX_ACCESS, &kaddr,
> NULL);
> if (ret < 0)
> - return ret;
> + return dax_mem2blk_err(ret);
> +
> memset(kaddr + offset, 0, size);
> if (iomap->flags & IOMAP_F_SHARED)
> ret = dax_iomap_copy_around(pos, size, PAGE_SIZE, srcmap,
> @@ -1498,7 +1499,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
>
> map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size),
> DAX_ACCESS, &kaddr, NULL);
> - if (map_len == -EIO && iov_iter_rw(iter) == WRITE) {
> + if (map_len == -EHWPOISON && iov_iter_rw(iter) == WRITE) {
> map_len = dax_direct_access(dax_dev, pgoff,
> PHYS_PFN(size), DAX_RECOVERY_WRITE,
> &kaddr, NULL);
> @@ -1506,7 +1507,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
> recovery = true;
> }
> if (map_len < 0) {
> - ret = map_len;
> + ret = dax_mem2blk_err(map_len);
> break;
> }
>
> diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
> index 4d8d4f16c727..5f1be1da92ce 100644
> --- a/fs/fuse/virtio_fs.c
> +++ b/fs/fuse/virtio_fs.c
> @@ -775,7 +775,8 @@ static int virtio_fs_zero_page_range(struct dax_device *dax_dev,
> rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS, &kaddr,
> NULL);
> if (rc < 0)
> - return rc;
> + return dax_mem2blk_err(rc);
> +
> memset(kaddr, 0, nr_pages << PAGE_SHIFT);
> dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT);
> return 0;
> diff --git a/include/linux/dax.h b/include/linux/dax.h
> index bf6258472e49..a4e97acf60f5 100644
> --- a/include/linux/dax.h
> +++ b/include/linux/dax.h
> @@ -261,6 +261,11 @@ static inline bool dax_mapping(struct address_space *mapping)
> return mapping->host && IS_DAX(mapping->host);
> }
>
> +static inline int dax_mem2blk_err(int err)
> +{
> + return (err == -EHWPOISON) ? -EIO : err;
> +}
> +
> #ifdef CONFIG_DEV_DAX_HMEM_DEVICES
> void hmem_register_resource(int target_nid, struct resource *r);
> #else
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 1f79667824eb..e4c974587659 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3217,6 +3217,8 @@ static inline vm_fault_t vmf_error(int err)
> {
> if (err == -ENOMEM)
> return VM_FAULT_OOM;
> + else if (err == -EHWPOISON)
> + return VM_FAULT_HWPOISON;
> return VM_FAULT_SIGBUS;
> }
>
Powered by blists - more mailing lists