[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1340375468-22509-7-git-send-email-mgorman@suse.de>
Date: Fri, 22 Jun 2012 15:31:02 +0100
From: Mel Gorman <mgorman@...e.de>
To: Andrew Morton <akpm@...ux-foundation.org>
Cc: Linux-MM <linux-mm@...ck.org>,
Linux-Netdev <netdev@...r.kernel.org>,
Linux-NFS <linux-nfs@...r.kernel.org>,
LKML <linux-kernel@...r.kernel.org>,
David Miller <davem@...emloft.net>,
Trond Myklebust <Trond.Myklebust@...app.com>,
Neil Brown <neilb@...e.de>,
Christoph Hellwig <hch@...radead.org>,
Peter Zijlstra <a.p.zijlstra@...llo.nl>,
Mike Christie <michaelc@...wisc.edu>,
Eric B Munson <emunson@...bm.net>,
Mel Gorman <mgorman@...e.de>
Subject: [PATCH 06/12] mm: Add get_kernel_page[s] for pinning of kernel addresses for I/O
This patch adds two new APIs get_kernel_pages() and get_kernel_page()
that may be used to pin a vector of kernel addresses for IO. The initial
user is expected to be NFS for allowing pages to be written to swap
using aops->direct_IO(). Strictly speaking, swap-over-NFS only needs
to pin one page for IO but it makes sense to express the API in terms
of a vector and add a helper for pinning single pages.
Signed-off-by: Mel Gorman <mgorman@...e.de>
Reviewed-by: Rik van Riel <riel@...hat.com>
---
include/linux/blk_types.h | 2 ++
include/linux/fs.h | 2 ++
include/linux/mm.h | 4 ++++
mm/memory.c | 53 +++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 61 insertions(+)
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 0edb65d..7b7ac9c 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -160,6 +160,7 @@ enum rq_flag_bits {
__REQ_FLUSH_SEQ, /* request for flush sequence */
__REQ_IO_STAT, /* account I/O stat */
__REQ_MIXED_MERGE, /* merge of different types, fail separately */
+ __REQ_KERNEL, /* direct IO to kernel pages */
__REQ_NR_BITS, /* stops here */
};
@@ -201,5 +202,6 @@ enum rq_flag_bits {
#define REQ_IO_STAT (1 << __REQ_IO_STAT)
#define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE)
#define REQ_SECURE (1 << __REQ_SECURE)
+#define REQ_KERNEL (1 << __REQ_KERNEL)
#endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f5e5bbb..adf1b7d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -165,6 +165,8 @@ struct inodes_stat_t {
#define READ 0
#define WRITE RW_MASK
#define READA RWA_MASK
+#define KERNEL_READ (READ|REQ_KERNEL)
+#define KERNEL_WRITE (WRITE|REQ_KERNEL)
#define READ_SYNC (READ | REQ_SYNC)
#define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b3d4cd9..bbb3167 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1019,6 +1019,10 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
struct page **pages, struct vm_area_struct **vmas);
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages);
+struct kvec;
+int get_kernel_pages(const struct kvec *iov, int nr_pages, int write,
+ struct page **pages);
+int get_kernel_page(unsigned long start, int write, struct page **pages);
struct page *get_dump_page(unsigned long addr);
extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
diff --git a/mm/memory.c b/mm/memory.c
index 6322d36..c8153f5 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1843,6 +1843,59 @@ next_page:
EXPORT_SYMBOL(__get_user_pages);
/*
+ * get_kernel_pages() - pin kernel pages in memory
+ * @kiov: An array of struct kvec structures
+ * @nr_segs: number of segments to pin
+ * @write: pinning for read/write, currently ignored
+ * @pages: array that receives pointers to the pages pinned.
+ * Should be at least nr_segs long.
+ *
+ * Returns number of pages pinned. This may be fewer than the number
+ * requested. If nr_pages is 0 or negative, returns 0. If no pages
+ * were pinned, returns -errno. Each page returned must be released
+ * with a put_page() call when it is finished with.
+ */
+int get_kernel_pages(const struct kvec *kiov, int nr_segs, int write,
+ struct page **pages)
+{
+ int seg;
+
+ for (seg = 0; seg < nr_segs; seg++) {
+ if (WARN_ON(kiov[seg].iov_len != PAGE_SIZE))
+ return seg;
+
+ /* virt_to_page sanity checks the PFN */
+ pages[seg] = virt_to_page(kiov[seg].iov_base);
+ page_cache_get(pages[seg]);
+ }
+
+ return seg;
+}
+EXPORT_SYMBOL_GPL(get_kernel_pages);
+
+/*
+ * get_kernel_page() - pin a kernel page in memory
+ * @start: starting kernel address
+ * @write: pinning for read/write, currently ignored
+ * @pages: array that receives pointer to the page pinned.
+ * Must be at least nr_segs long.
+ *
+ * Returns 1 if page is pinned. If the page was not pinned, returns
+ * -errno. The page returned must be released with a put_page() call
+ * when it is finished with.
+ */
+int get_kernel_page(unsigned long start, int write, struct page **pages)
+{
+ const struct kvec kiov = {
+ .iov_base = (void *)start,
+ .iov_len = PAGE_SIZE
+ };
+
+ return get_kernel_pages(&kiov, 1, write, pages);
+}
+EXPORT_SYMBOL_GPL(get_kernel_page);
+
+/*
* fixup_user_fault() - manually resolve a user page fault
* @tsk: the task_struct to use for page fault accounting, or
* NULL if faults are not to be recorded.
--
1.7.9.2
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists