lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <ba6790e0-23aa-4069-f9e3-982c5aa6ab29@oracle.com>
Date:   Mon, 8 May 2023 18:12:04 -0700
From:   Jane Chu <jane.chu@...cle.com>
To:     Dan Williams <dan.j.williams@...el.com>, vishal.l.verma@...el.com,
        dave.jiang@...el.com, ira.weiny@...el.com, willy@...radead.org,
        viro@...iv.linux.org.uk, brauner@...nel.org,
        nvdimm@...ts.linux.dev, linux-kernel@...r.kernel.org,
        linux-fsdevel@...r.kernel.org
Subject: Re: [PATCH v3] dax: enable dax fault handler to report
 VM_FAULT_HWPOISON

On 5/4/2023 7:32 PM, Dan Williams wrote:
> Jane Chu wrote:
>> When multiple processes mmap() a dax file, then at some point,
>> a process issues a 'load' and consumes a hwpoison, the process
>> receives a SIGBUS with si_code = BUS_MCEERR_AR and with si_lsb
>> set for the poison scope. Soon after, any other process issues
>> a 'load' to the poisoned page (that is unmapped from the kernel
>> side by memory_failure), it receives a SIGBUS with
>> si_code = BUS_ADRERR and without valid si_lsb.
>>
>> This is confusing to user, and is different from page fault due
>> to poison in RAM memory, also some helpful information is lost.
>>
>> Channel dax backend driver's poison detection to the filesystem
>> such that instead of reporting VM_FAULT_SIGBUS, it could report
>> VM_FAULT_HWPOISON.
> 
> I do think it is interesting that this will start returning SIGBUS with
> BUS_MCEERR_AR for stores whereas it is only signalled for loads in the
> direct consumption path, but I can't think of a scenario where that
> would confuse existing software.

Yes indeed, nice catch, thank you!

> 
>> Change from v2:
>>    Convert -EHWPOISON to -EIO to prevent EHWPOISON errno from leaking
>> out to block read(2) - suggested by Matthew.
> 
> For next time these kind of changelog notes belong...
> 
>> Signed-off-by: Jane Chu <jane.chu@...cle.com>
>> ---
> 
> ...here after the "---".

I'll move the change log to a cover letter.

> 
>>   drivers/nvdimm/pmem.c | 2 +-
>>   fs/dax.c              | 4 ++--
>>   include/linux/mm.h    | 2 ++
>>   3 files changed, 5 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
>> index ceea55f621cc..46e094e56159 100644
>> --- a/drivers/nvdimm/pmem.c
>> +++ b/drivers/nvdimm/pmem.c
>> @@ -260,7 +260,7 @@ __weak long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
>>   		long actual_nr;
>>   
>>   		if (mode != DAX_RECOVERY_WRITE)
>> -			return -EIO;
>> +			return -EHWPOISON;
>>   
>>   		/*
>>   		 * Set the recovery stride is set to kernel page size because
>> diff --git a/fs/dax.c b/fs/dax.c
>> index 2ababb89918d..18f9598951f1 100644
>> --- a/fs/dax.c
>> +++ b/fs/dax.c
>> @@ -1498,7 +1498,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
>>   
>>   		map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size),
>>   				DAX_ACCESS, &kaddr, NULL);
>> -		if (map_len == -EIO && iov_iter_rw(iter) == WRITE) {
>> +		if (map_len == -EHWPOISON && iov_iter_rw(iter) == WRITE) {
>>   			map_len = dax_direct_access(dax_dev, pgoff,
>>   					PHYS_PFN(size), DAX_RECOVERY_WRITE,
>>   					&kaddr, NULL);
>> @@ -1506,7 +1506,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
>>   				recovery = true;
>>   		}
>>   		if (map_len < 0) {
>> -			ret = map_len;
>> +			ret = (map_len == -EHWPOISON) ? -EIO : map_len;
> 
> This fixup leaves out several other places where EHWPOISON could leak as
> the errno for read(2)/write(2). I think I want to see something like
> this:
> 
> diff --git a/fs/dax.c b/fs/dax.c
> index 2ababb89918d..ec17f9977bcb 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -1077,14 +1077,13 @@ int dax_writeback_mapping_range(struct address_space *mapping,
>   }
>   EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
>   
> -static int dax_iomap_direct_access(const struct iomap *iomap, loff_t pos,
> -               size_t size, void **kaddr, pfn_t *pfnp)
> +static int __dax_iomap_direct_access(const struct iomap *iomap, loff_t pos,
> +                                    size_t size, void **kaddr, pfn_t *pfnp)
>   {
>          pgoff_t pgoff = dax_iomap_pgoff(iomap, pos);
> -       int id, rc = 0;
>          long length;
> +       int rc = 0;
>   
> -       id = dax_read_lock();
>          length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size),
>                                     DAX_ACCESS, kaddr, pfnp);
>          if (length < 0) {
> @@ -1113,6 +1112,36 @@ static int dax_iomap_direct_access(const struct iomap *iomap, loff_t pos,
>          return rc;
>   }
>   
> +static int dax_iomap_direct_access(const struct iomap *iomap, loff_t pos,
> +                                  size_t size, void **kaddr, pfn_t *pfnp)
> +{
> +
> +       int id;
> +
> +       id = dax_read_lock();
> +       rc = __dax_iomap_direct_access(iomap, pos, size, kaddr, pfnp);
> +       dax_read_unlock(id);
> +
> +       /* don't leak a memory access error code to I/O syscalls */
> +       if (rc == -EHWPOISON)
> +               return -EIO;
> +       return rc;
> +}
> +
> +static int dax_fault_direct_access(const struct iomap *iomap, loff_t pos,
> +                                  size_t size, void **kaddr, pfn_t *pfnp)
> +{
> +
> +       int id;
> +
> +       id = dax_read_lock();
> +       rc = __dax_iomap_direct_access(iomap, pos, size, kaddr, pfnp);
> +       dax_read_unlock(id);
> +
> +       /* -EHWPOISON return ok */
> +       return rc;
> +}
> +
>   /**
>    * dax_iomap_copy_around - Prepare for an unaligned write to a shared/cow page
>    * by copying the data before and after the range to be written.
> @@ -1682,7 +1711,7 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf,
>                  return pmd ? VM_FAULT_FALLBACK : VM_FAULT_SIGBUS;
>          }
>   
> -       err = dax_iomap_direct_access(iomap, pos, size, &kaddr, &pfn);
> +       err = dax_fault_direct_access(iomap, pos, size, &kaddr, &pfn);
>          if (err)
>                  return pmd ? VM_FAULT_FALLBACK : dax_fault_return(err);
>   
> 
> 
> ...and then convert all other callers of dax_direct_access() in that
> file such that they are either calling:
> 
> dax_iomap_direct_access(): if caller wants EHWPOISON translated to -EIO and is ok
> 			   with internal locking
> dax_fault_direct_access(): if caller wants EHWPOISON passed through and is
> 			   ok with internal locking
> __dax_iomap_direct_access(): if the caller wants to do its own EHWPOISON
> 			     translation and locking

Got it.  I examined all callers of dax_direct_access() and found a 
couple move places that need the errno conversion.
I'd like to introduce a helper mem2blk_err(err) for that. It could make
the code more self explanatory.

Thanks,
-jane

> 
>>   			break;
>>   		}
>>   
>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>> index 1f79667824eb..e4c974587659 100644
>> --- a/include/linux/mm.h
>> +++ b/include/linux/mm.h
>> @@ -3217,6 +3217,8 @@ static inline vm_fault_t vmf_error(int err)
>>   {
>>   	if (err == -ENOMEM)
>>   		return VM_FAULT_OOM;
>> +	else if (err == -EHWPOISON)
>> +		return VM_FAULT_HWPOISON;
>>   	return VM_FAULT_SIGBUS;
>>   }
>>   
>> -- 
>> 2.18.4
>>
> 
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ