[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <200709121439.32641.fenkes@de.ibm.com>
Date: Wed, 12 Sep 2007 14:39:31 +0200
From: Joachim Fenkes <fenkes@...ibm.com>
To: "LinuxPPC-Dev" <linuxppc-dev@...abs.org>,
LKML <linux-kernel@...r.kernel.org>,
"OF-General" <general@...ts.openfabrics.org>,
Roland Dreier <rolandd@...co.com>,
"OF-EWG" <ewg@...ts.openfabrics.org>
Cc: "Hoang-Nam Nguyen" <hnguyen@...ibm.com>,
Christoph Raisch <raisch@...ibm.com>,
Stefan Roscher <stefan.roscher@...ibm.com>
Subject: [PATCH] IB/ehca: Make sure user pages are from hugetlb before using MR large pages
From: Hoang-Nam Nguyen <hnguyen@...ibm.com>
...because, on virtualized hardware like System p, we can't be sure that the
physical pages behind them are contiguous.
Signed-off-by: Joachim Fenkes <fenkes@...ibm.com>
---
Another patch for 2.6.24 that will apply cleanly on top of my previous
patchset. Please review and apply. Thanks!
drivers/infiniband/hw/ehca/ehca_classes.h | 8 ++--
drivers/infiniband/hw/ehca/ehca_mrmw.c | 82 +++++++++++++++++++++++++----
2 files changed, 75 insertions(+), 15 deletions(-)
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 206d4eb..c2edd4c 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -99,10 +99,10 @@ struct ehca_sport {
struct ehca_sma_attr saved_attr;
};
-#define HCA_CAP_MR_PGSIZE_4K 1
-#define HCA_CAP_MR_PGSIZE_64K 2
-#define HCA_CAP_MR_PGSIZE_1M 4
-#define HCA_CAP_MR_PGSIZE_16M 8
+#define HCA_CAP_MR_PGSIZE_4K 0x80000000
+#define HCA_CAP_MR_PGSIZE_64K 0x40000000
+#define HCA_CAP_MR_PGSIZE_1M 0x20000000
+#define HCA_CAP_MR_PGSIZE_16M 0x10000000
struct ehca_shca {
struct ib_device ib_device;
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index 4c8f3b3..1bb9d23 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -41,6 +41,8 @@
*/
#include <asm/current.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
#include <rdma/ib_umem.h>
@@ -51,6 +53,7 @@
#define NUM_CHUNKS(length, chunk_size) \
(((length) + (chunk_size - 1)) / (chunk_size))
+
/* max number of rpages (per hcall register_rpages) */
#define MAX_RPAGES 512
@@ -279,6 +282,52 @@ reg_phys_mr_exit0:
} /* end ehca_reg_phys_mr() */
/*----------------------------------------------------------------------*/
+static int ehca_is_mem_hugetlb(unsigned long addr, unsigned long size)
+{
+ struct vm_area_struct **vma_list;
+ unsigned long cur_base;
+ unsigned long npages;
+ int ret, i;
+
+ vma_list = (struct vm_area_struct **) __get_free_page(GFP_KERNEL);
+ if (!vma_list) {
+ ehca_gen_err("Can not alloc vma_list");
+ return -ENOMEM;
+ }
+
+ down_write(¤t->mm->mmap_sem);
+ npages = PAGE_ALIGN(size + (addr & ~PAGE_MASK)) >> PAGE_SHIFT;
+ cur_base = addr & PAGE_MASK;
+
+ while (npages) {
+ ret = get_user_pages(current, current->mm, cur_base,
+ min_t(int, npages,
+ PAGE_SIZE / sizeof (*vma_list)),
+ 1, 0, NULL, vma_list);
+
+ if (ret < 0) {
+ ehca_gen_err("get_user_pages() failed "
+ "ret=%x cur_base=%lx", ret, cur_base);
+ goto is_hugetlb_out;
+ }
+
+ for (i = 0; i < ret; ++i)
+ if (!is_vm_hugetlb_page(vma_list[i])) {
+ ret = 0;
+ goto is_hugetlb_out;
+ }
+
+ cur_base += ret * PAGE_SIZE;
+ npages -= ret;
+ }
+ ret = 1;
+
+is_hugetlb_out:
+ up_write(¤t->mm->mmap_sem);
+ free_page((unsigned long) vma_list);
+
+ return ret;
+}
struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt, int mr_access_flags,
@@ -346,18 +395,29 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE);
/* select proper hw_pgsize */
if (ehca_mr_largepage &&
- (shca->hca_cap_mr_pgsize & HCA_CAP_MR_PGSIZE_16M)) {
- if (length <= EHCA_MR_PGSIZE4K
- && PAGE_SIZE == EHCA_MR_PGSIZE4K)
- hwpage_size = EHCA_MR_PGSIZE4K;
- else if (length <= EHCA_MR_PGSIZE64K)
- hwpage_size = EHCA_MR_PGSIZE64K;
- else if (length <= EHCA_MR_PGSIZE1M)
- hwpage_size = EHCA_MR_PGSIZE1M;
- else
- hwpage_size = EHCA_MR_PGSIZE16M;
+ shca->hca_cap_mr_pgsize & HCA_CAP_MR_PGSIZE_16M) {
+ ret = ehca_is_mem_hugetlb(virt, length);
+ switch (ret) {
+ case 0: /* mem is not from hugetlb */
+ hwpage_size = PAGE_SIZE;
+ break;
+ case 1:
+ if (length <= EHCA_MR_PGSIZE4K
+ && PAGE_SIZE == EHCA_MR_PGSIZE4K)
+ hwpage_size = EHCA_MR_PGSIZE4K;
+ else if (length <= EHCA_MR_PGSIZE64K)
+ hwpage_size = EHCA_MR_PGSIZE64K;
+ else if (length <= EHCA_MR_PGSIZE1M)
+ hwpage_size = EHCA_MR_PGSIZE1M;
+ else
+ hwpage_size = EHCA_MR_PGSIZE16M;
+ break;
+ default: /* out of mem */
+ ib_mr = ERR_PTR(-ENOMEM);
+ goto reg_user_mr_exit1;
+ }
} else
- hwpage_size = EHCA_MR_PGSIZE4K;
+ hwpage_size = EHCA_MR_PGSIZE4K; /* ehca1 can only 4k */
ehca_dbg(pd->device, "hwpage_size=%lx", hwpage_size);
reg_user_mr_fallback:
--
1.5.2
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists