[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <149222368842.32363.497776993572386297.stgit@dwillia2-desk3.amr.corp.intel.com>
Date: Fri, 14 Apr 2017 19:34:48 -0700
From: Dan Williams <dan.j.williams@...el.com>
To: linux-nvdimm@...ts.01.org
Cc: Jan Kara <jack@...e.cz>, Matthew Wilcox <mawilcox@...rosoft.com>,
linux-kernel@...r.kernel.org, Jeff Moyer <jmoyer@...hat.com>,
Al Viro <viro@...iv.linux.org.uk>,
Ross Zwisler <ross.zwisler@...ux.intel.com>,
Christoph Hellwig <hch@....de>
Subject: [PATCH v2 19/33] dax, pmem: introduce 'copy_from_iter' dax operation
The direct-I/O write path for a pmem device must ensure that data is
flushed to a power-fail safe zone when the operation is complete.
However, other dax capable block devices, like brd, do not have this
requirement. Introduce a 'copy_from_iter' dax operation so that pmem
can inject cache management without imposing this overhead on other dax
capable block_device drivers.
This is also a first step of moving all architecture-specific
pmem-operations to the pmem driver.
Cc: Jan Kara <jack@...e.cz>
Cc: Jeff Moyer <jmoyer@...hat.com>
Cc: Christoph Hellwig <hch@....de>
Cc: Al Viro <viro@...iv.linux.org.uk>
Cc: Matthew Wilcox <mawilcox@...rosoft.com>
Cc: Ross Zwisler <ross.zwisler@...ux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@...el.com>
---
drivers/nvdimm/pmem.c | 43 +++++++++++++++++++++++++++++++++++++++++++
include/linux/dax.h | 3 +++
2 files changed, 46 insertions(+)
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 3b3dab73d741..e501df4ab4b4 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -220,6 +220,48 @@ __weak long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
return PHYS_PFN(pmem->size - pmem->pfn_pad - offset);
}
+static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
+ void *addr, size_t bytes, struct iov_iter *i)
+{
+ size_t len;
+
+ /* TODO: skip the write-back by always using non-temporal stores */
+ len = copy_from_iter_nocache(addr, bytes, i);
+
+ /*
+ * In the iovec case on x86_64 copy_from_iter_nocache() uses
+ * non-temporal stores for the bulk of the transfer, but we need
+ * to manually flush if the transfer is unaligned. A cached
+ * memory copy is used when destination or size is not naturally
+ * aligned. That is:
+ * - Require 8-byte alignment when size is 8 bytes or larger.
+ * - Require 4-byte alignment when size is 4 bytes.
+ *
+ * In the non-iovec case the entire destination needs to be
+ * flushed.
+ */
+ if (iter_is_iovec(i)) {
+ unsigned long flushed, dest = (unsigned long) addr;
+
+ if (bytes < 8) {
+ if (!IS_ALIGNED(dest, 4) || (bytes != 4))
+ wb_cache_pmem(addr, 1);
+ } else {
+ if (!IS_ALIGNED(dest, 8)) {
+ dest = ALIGN(dest, boot_cpu_data.x86_clflush_size);
+ wb_cache_pmem(addr, 1);
+ }
+
+ flushed = dest - (unsigned long) addr;
+ if (bytes > flushed && !IS_ALIGNED(bytes - flushed, 8))
+ wb_cache_pmem(addr + bytes - 1, 1);
+ }
+ } else
+ wb_cache_pmem(addr, bytes);
+
+ return len;
+}
+
static const struct block_device_operations pmem_fops = {
.owner = THIS_MODULE,
.rw_page = pmem_rw_page,
@@ -236,6 +278,7 @@ static long pmem_dax_direct_access(struct dax_device *dax_dev,
static const struct dax_operations pmem_dax_ops = {
.direct_access = pmem_dax_direct_access,
+ .copy_from_iter = pmem_copy_from_iter,
};
static void pmem_release_queue(void *q)
diff --git a/include/linux/dax.h b/include/linux/dax.h
index d3158e74a59e..156f067d4db5 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -16,6 +16,9 @@ struct dax_operations {
*/
long (*direct_access)(struct dax_device *, pgoff_t, long,
void **, pfn_t *);
+ /* copy_from_iter: dax-driver override for default copy_from_iter */
+ size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t,
+ struct iov_iter *);
};
int dax_read_lock(void);
Powered by blists - more mailing lists