lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAA9_cmcbArobG-cxNSpaQWULYSyw2YbY2-dQUGxqN6GnzfoOOQ@mail.gmail.com>
Date:	Thu, 18 Feb 2016 13:33:15 -0800
From:	Dan Williams <dan.j.williams@...el.com>
To:	Ingo Molnar <mingo@...nel.org>
Cc:	"Luck, Tony" <tony.luck@...el.com>,
	Linus Torvalds <torvalds@...ux-foundation.org>,
	Linux Kernel Mailing List <linux-kernel@...r.kernel.org>,
	Thomas Gleixner <tglx@...utronix.de>,
	"H. Peter Anvin" <hpa@...or.com>,
	Peter Zijlstra <a.p.zijlstra@...llo.nl>
Subject: Re: [PATCH v11 3/4] x86, mce: Add __mcsafe_copy()

On Thu, Feb 18, 2016 at 12:14 PM, Ingo Molnar <mingo@...nel.org> wrote:
>
> * Luck, Tony <tony.luck@...el.com> wrote:
>
>> On Thu, Feb 18, 2016 at 10:12:42AM -0800, Linus Torvalds wrote:
>> > On Wed, Feb 17, 2016 at 10:20 AM, Tony Luck <tony.luck@...el.com> wrote:
>> > >
>> > > If we faulted during the copy, then 'trapnr' will say which type
>> > > of trap (X86_TRAP_PF or X86_TRAP_MC) and 'remain' says how many
>> > > bytes were not copied.
>> >
>> > So apart from the naming, a couple of questions:
>> >
>> >  - I'd like to see the actual *use* case explained, not just what it does.
>>
>> First user is libnvdimm. Dan Williams already has code to use this so that
>> kernel code accessing persistent memory can return -EIO to a user instead of
>> crashing the system if the cpu runs into an uncorrected error during the copy.
>
> Are these the memcpy_*_pmem() calls in drivers/nvdimm/pmem.c? Is there any actual
> patch to look at?
>

Here's the integration patch I had from the version of mcsafe_copy()
at the beginning of January.  Pardon the whitespace damage... the
original thread is here: [1].  Note that the "badblocks" intergration
portion of that set went upstream in v4.5-rc1.

[1]: https://lists.01.org/pipermail/linux-nvdimm/2016-January/003864.html

---

Subject: x86, pmem: use __mcsafe_copy() for memcpy_from_pmem()

In support of large capacity persistent memory use __mcsafe_copy() for
pmem I/O.  This allows the pmem driver to support an error model similar
to disks when machine check recovery is available.

Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: Ingo Molnar <mingo@...hat.com>
Cc: "H. Peter Anvin" <hpa@...or.com>
Cc: x86@...nel.org
Cc: Borislav Petkov <bp@...e.de>
Cc: Tony Luck <tony.luck@...el.com>
Cc: Andy Lutomirski <luto@...capital.net>
Signed-off-by: Dan Williams <dan.j.williams@...el.com>
---
 arch/x86/include/asm/pmem.h |   16 ++++++++++++++++
 drivers/nvdimm/Kconfig      |    1 +
 drivers/nvdimm/pmem.c       |   10 ++++++----
 include/linux/pmem.h        |   17 +++++++++++++----
 4 files changed, 36 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index d8ce3ec816ab..4ef301e78a2b 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -17,6 +17,7 @@
 #include <asm/cacheflush.h>
 #include <asm/cpufeature.h>
 #include <asm/special_insns.h>
+#include <asm/string.h>

 #ifdef CONFIG_ARCH_HAS_PMEM_API
 /**
@@ -47,6 +48,21 @@ static inline void arch_memcpy_to_pmem(void __pmem
*dst, const void *src,
                BUG();
 }

+static inline int arch_memcpy_from_pmem(void *dst, const void __pmem *src,
+               size_t n)
+{
+       if (IS_ENABLED(CONFIG_MCE_KERNEL_RECOVERY)) {
+               struct mcsafe_ret ret;
+
+               ret = __mcsafe_copy(dst, (void __force *) src, n);
+               if (ret.remain)
+                       return -EIO;
+               return 0;
+       }
+       memcpy(dst, (void __force *) src, n);
+       return 0;
+}
+
 /**
  * arch_wmb_pmem - synchronize writes to persistent memory
  *
diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig
index 53c11621d5b1..fe5885d01fd8 100644
--- a/drivers/nvdimm/Kconfig
+++ b/drivers/nvdimm/Kconfig
@@ -22,6 +22,7 @@ config BLK_DEV_PMEM
        depends on HAS_IOMEM
        select ND_BTT if BTT
        select ND_PFN if NVDIMM_PFN
+       select MCE_KERNEL_RECOVERY if X86_MCE && X86_64
        help
          Memory ranges for PMEM are described by either an NFIT
          (NVDIMM Firmware Interface Table, see CONFIG_NFIT_ACPI), a
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 8744235b5be2..d8e14e962327 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -62,6 +62,7 @@ static bool is_bad_pmem(struct badblocks *bb,
sector_t sector, unsigned int len)
 static int pmem_do_bvec(struct block_device *bdev, struct page *page,
                unsigned int len, unsigned int off, int rw, sector_t sector)
 {
+       int rc = 0;
        void *mem = kmap_atomic(page);
        struct gendisk *disk = bdev->bd_disk;
        struct pmem_device *pmem = disk->private_data;
@@ -71,7 +72,7 @@ static int pmem_do_bvec(struct block_device *bdev,
struct page *page,
        if (rw == READ) {
                if (unlikely(is_bad_pmem(disk->bb, sector, len)))
                        return -EIO;
-               memcpy_from_pmem(mem + off, pmem_addr, len);
+               rc = memcpy_from_pmem(mem + off, pmem_addr, len);
                flush_dcache_page(page);
        } else {
                flush_dcache_page(page);
@@ -79,7 +80,7 @@ static int pmem_do_bvec(struct block_device *bdev,
struct page *page,
        }

        kunmap_atomic(mem);
-       return 0;
+       return rc;
 }

 static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
@@ -237,6 +238,7 @@ static int pmem_rw_bytes(struct nd_namespace_common *ndns,
                resource_size_t offset, void *buf, size_t size, int rw)
 {
        struct pmem_device *pmem = dev_get_drvdata(ndns->claim);
+       int rc = 0;

        if (unlikely(offset + size > pmem->size)) {
                dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n");
@@ -244,13 +246,13 @@ static int pmem_rw_bytes(struct nd_namespace_common *ndns,
        }

        if (rw == READ)
-               memcpy_from_pmem(buf, pmem->virt_addr + offset, size);
+               rc = memcpy_from_pmem(buf, pmem->virt_addr + offset, size);
        else {
                memcpy_to_pmem(pmem->virt_addr + offset, buf, size);
                wmb_pmem();
        }

-       return 0;
+       return rc;
 }

 static int nd_pfn_init(struct nd_pfn *nd_pfn)
diff --git a/include/linux/pmem.h b/include/linux/pmem.h
index acfea8ce4a07..0e57a5beab21 100644
--- a/include/linux/pmem.h
+++ b/include/linux/pmem.h
@@ -42,6 +42,13 @@ static inline void arch_memcpy_to_pmem(void __pmem
*dst, const void *src,
        BUG();
 }

+static inline int arch_memcpy_from_pmem(void *dst,
+               const void __pmem *src, size_t n)
+{
+       BUG();
+       return 0;
+}
+
 static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
                struct iov_iter *i)
 {
@@ -57,12 +64,14 @@ static inline void arch_clear_pmem(void __pmem
*addr, size_t size)

 /*
  * Architectures that define ARCH_HAS_PMEM_API must provide
- * implementations for arch_memcpy_to_pmem(), arch_wmb_pmem(),
- * arch_copy_from_iter_pmem(), arch_clear_pmem() and arch_has_wmb_pmem().
+ * implementations for arch_memcpy_to_pmem(), arch_memcpy_from_pmem(),
+ * arch_wmb_pmem(), arch_copy_from_iter_pmem(), arch_clear_pmem() and
+ * arch_has_wmb_pmem().
  */
-static inline void memcpy_from_pmem(void *dst, void __pmem const
*src, size_t size)
+static inline int memcpy_from_pmem(void *dst, void __pmem const *src,
+               size_t size)
 {
-       memcpy(dst, (void __force const *) src, size);
+       return arch_memcpy_from_pmem(dst, src, size);
 }

 static inline bool arch_has_pmem_api(void)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ