[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1527324107-56593-3-git-send-email-xavier.huwei@huawei.com>
Date: Sat, 26 May 2018 16:41:45 +0800
From: "Wei Hu (Xavier)" <xavier.huwei@...wei.com>
To: <dledford@...hat.com>, <jgg@...pe.ca>
CC: <linux-rdma@...r.kernel.org>, <xavier.huwei@...wei.com>,
<lijun_nudt@....com>, <oulijun@...wei.com>,
<charles.chenxin@...wei.com>, <linux-kernel@...r.kernel.org>,
<leonro@...lanox.com>
Subject: [PATCH V3 rdma-next 2/4] RDMA/hns: Fix the illegal memory operation when cross page
This patch fixed the potential illegal operation when using the
extend sge buffer cross page in post send operation. The bug
will cause the calltrace as below.
[ 3302.922107] Unable to handle kernel paging request at virtual address ffff00003b3a0004
[ 3302.930009] Mem abort info:
[ 3302.932790] Exception class = DABT (current EL), IL = 32 bits
[ 3302.938695] SET = 0, FnV = 0
[ 3302.941735] EA = 0, S1PTW = 0
[ 3302.944863] Data abort info:
[ 3302.947729] ISV = 0, ISS = 0x00000047
[ 3302.951551] CM = 0, WnR = 1
[ 3302.954506] swapper pgtable: 4k pages, 48-bit VAs, pgd = ffff000009ea5000
[ 3302.961279] [ffff00003b3a0004] *pgd=00000023dfffe003, *pud=00000023dfffd003, *pmd=00000022dc84c003, *pte=0000000000000000
[ 3302.972224] Internal error: Oops: 96000047 [#1] SMP
[ 3302.999509] CPU: 9 PID: 19628 Comm: roce_test_main Tainted: G OE 4.14.10 #1
[ 3303.007498] task: ffff80234df78000 task.stack: ffff00000f640000
[ 3303.013412] PC is at hns_roce_v2_post_send+0x690/0xe20 [hns_roce_pci]
[ 3303.019843] LR is at hns_roce_v2_post_send+0x658/0xe20 [hns_roce_pci]
[ 3303.026269] pc : [<ffff0000020694f8>] lr : [<ffff0000020694c0>] pstate: 804001c9
[ 3303.033649] sp : ffff00000f643870
[ 3303.036951] x29: ffff00000f643870 x28: ffff80232bfa9c00
[ 3303.042250] x27: ffff80234d909380 x26: ffff00003b37f0c0
[ 3303.047549] x25: 0000000000000000 x24: 0000000000000003
[ 3303.052848] x23: 0000000000000000 x22: 0000000000000000
[ 3303.058148] x21: 0000000000000101 x20: 0000000000000001
[ 3303.063447] x19: ffff80236163f800 x18: 0000000000000000
[ 3303.068746] x17: 0000ffff86b76fc8 x16: ffff000008301600
[ 3303.074045] x15: 000020a51c000000 x14: 3128726464615f65
[ 3303.079344] x13: 746f6d6572202c29 x12: 303035312879656b
[ 3303.084643] x11: 723a6f666e692072 x10: 573a6f666e693a5d
[ 3303.089943] x9 : 0000000000000004 x8 : ffff8023ce38b000
[ 3303.095242] x7 : ffff8023ce38b320 x6 : 0000000000000418
[ 3303.100541] x5 : ffff80232bfa9cc8 x4 : 0000000000000030
[ 3303.105839] x3 : 0000000000000100 x2 : 0000000000000200
[ 3303.111138] x1 : 0000000000000320 x0 : ffff00003b3a0000
[ 3303.116438] Process roce_test_main (pid: 19628, stack limit = 0xffff00000f640000)
[ 3303.123906] Call trace:
[ 3303.126339] Exception stack(0xffff00000f643730 to 0xffff00000f643870)
[ 3303.215790] [<ffff0000020694f8>] hns_roce_v2_post_send+0x690/0xe20 [hns_roce_pci]
[ 3303.223293] [<ffff0000021c3750>] rt_ktest_post_send+0x5d0/0x8b8 [rdma_test]
[ 3303.230261] [<ffff0000021b3234>] exec_send_cmd+0x664/0x1350 [rdma_test]
[ 3303.236881] [<ffff0000021b8b30>] rt_ktest_dispatch_cmd_3+0x1510/0x3790 [rdma_test]
[ 3303.244455] [<ffff0000021bae54>] rt_ktest_dispatch_cmd_2+0xa4/0x118 [rdma_test]
[ 3303.251770] [<ffff0000021bafec>] rt_ktest_dispatch_cmd+0x124/0xaa8 [rdma_test]
[ 3303.258997] [<ffff0000021bbc3c>] rt_ktest_dev_write+0x2cc/0x568 [rdma_test]
[ 3303.265947] [<ffff0000082ad688>] __vfs_write+0x60/0x18c
[ 3303.271158] [<ffff0000082ad998>] vfs_write+0xa8/0x198
[ 3303.276196] [<ffff0000082adc7c>] SyS_write+0x6c/0xd4
[ 3303.281147] Exception stack(0xffff00000f643ec0 to 0xffff00000f644000)
[ 3303.287573] 3ec0: 0000000000000003 0000fffffc85faa8 0000000000004e60 0000000000000000
[ 3303.295388] 3ee0: 0000000021fb2000 000000000000ffff eff0e3efe4e58080 0000fffffcc724fe
[ 3303.303204] 3f00: 0000000000000040 1999999999999999 0101010101010101 0000000000000038
[ 3303.311019] 3f20: 0000000000000005 ffffffffffffffff 0d73757461747320 ffffffffffffffff
[ 3303.318835] 3f40: 0000000000000000 0000000000459b00 0000fffffc85e360 000000000043d788
[ 3303.326650] 3f60: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
[ 3303.334465] 3f80: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
[ 3303.342281] 3fa0: 0000000000000000 0000fffffc85e570 0000000000438804 0000fffffc85e570
[ 3303.350096] 3fc0: 0000ffff8553f618 0000000080000000 0000000000000003 0000000000000040
[ 3303.357911] 3fe0: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
[ 3303.365729] [<ffff000008083808>] __sys_trace_return+0x0/0x4
[ 3303.371288] Code: b94008e9 34000129 b9400ce2 110006b5 (b9000402)
[ 3303.377377] ---[ end trace fd5ab98b3325cf9a ]---
Reported-by: Jie Chen <chenjie103@...wei.com>
Reported-by: Xiping Zhang (Francis) <zhangxiping3@...wei.com>
Fixes: b1c158350968("RDMA/hns: Get rid of virt_to_page and vmap calls after dma_alloc_coherent")
Signed-off-by: Wei Hu (Xavier) <xavier.huwei@...wei.com>
---
v2->v3: Add calltrace info and modify judegment conditon according
to Jason's comment.
v1->v2: Modify the Fixes statement according to Leon's comment.
---
drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 72 +++++++++++++++++++++---------
drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 1 +
2 files changed, 53 insertions(+), 20 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 368f682..46f289d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -34,6 +34,7 @@
#include <linux/etherdevice.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
+#include <linux/types.h>
#include <net/addrconf.h>
#include <rdma/ib_umem.h>
@@ -52,6 +53,53 @@ static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
dseg->len = cpu_to_le32(sg->length);
}
+static void set_extend_sge(struct hns_roce_qp *qp, struct ib_send_wr *wr,
+ unsigned int *sge_ind)
+{
+ struct hns_roce_v2_wqe_data_seg *dseg;
+ struct ib_sge *sg;
+ int num_in_wqe = 0;
+ int extend_sge_num;
+ int fi_sge_num;
+ int se_sge_num;
+ int shift;
+ int i;
+
+ if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC)
+ num_in_wqe = HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE;
+ extend_sge_num = wr->num_sge - num_in_wqe;
+ sg = wr->sg_list + num_in_wqe;
+ shift = qp->hr_buf.page_shift;
+
+ /*
+ * Check whether wr->num_sge sges are in the same page. If not, we
+ * should calculate how many sges in the first page and the second
+ * page.
+ */
+ dseg = get_send_extend_sge(qp, (*sge_ind) & (qp->sge.sge_cnt - 1));
+ fi_sge_num = (round_up((uintptr_t)dseg, 1 << shift) -
+ (uintptr_t)dseg) /
+ sizeof(struct hns_roce_v2_wqe_data_seg);
+ if (extend_sge_num > fi_sge_num) {
+ se_sge_num = extend_sge_num - fi_sge_num;
+ for (i = 0; i < fi_sge_num; i++) {
+ set_data_seg_v2(dseg++, sg + i);
+ (*sge_ind)++;
+ }
+ dseg = get_send_extend_sge(qp,
+ (*sge_ind) & (qp->sge.sge_cnt - 1));
+ for (i = 0; i < se_sge_num; i++) {
+ set_data_seg_v2(dseg++, sg + fi_sge_num + i);
+ (*sge_ind)++;
+ }
+ } else {
+ for (i = 0; i < extend_sge_num; i++) {
+ set_data_seg_v2(dseg++, sg + i);
+ (*sge_ind)++;
+ }
+ }
+}
+
static int set_rwqe_data_seg(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
void *wqe, unsigned int *sge_ind,
@@ -85,7 +133,7 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, struct ib_send_wr *wr,
roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S,
1);
} else {
- if (wr->num_sge <= 2) {
+ if (wr->num_sge <= HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) {
for (i = 0; i < wr->num_sge; i++) {
if (likely(wr->sg_list[i].length)) {
set_data_seg_v2(dseg, wr->sg_list + i);
@@ -98,24 +146,14 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, struct ib_send_wr *wr,
V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
(*sge_ind) & (qp->sge.sge_cnt - 1));
- for (i = 0; i < 2; i++) {
+ for (i = 0; i < HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; i++) {
if (likely(wr->sg_list[i].length)) {
set_data_seg_v2(dseg, wr->sg_list + i);
dseg++;
}
}
- dseg = get_send_extend_sge(qp,
- (*sge_ind) & (qp->sge.sge_cnt - 1));
-
- for (i = 0; i < wr->num_sge - 2; i++) {
- if (likely(wr->sg_list[i + 2].length)) {
- set_data_seg_v2(dseg,
- wr->sg_list + 2 + i);
- dseg++;
- (*sge_ind)++;
- }
- }
+ set_extend_sge(qp, wr, sge_ind);
}
roce_set_field(rc_sq_wqe->byte_16,
@@ -318,13 +356,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0],
GID_LEN_V2);
- dseg = get_send_extend_sge(qp,
- sge_ind & (qp->sge.sge_cnt - 1));
- for (i = 0; i < wr->num_sge; i++) {
- set_data_seg_v2(dseg + i, wr->sg_list + i);
- sge_ind++;
- }
-
+ set_extend_sge(qp, wr, &sge_ind);
ind++;
} else if (ibqp->qp_type == IB_QPT_RC) {
rc_sq_wqe = wqe;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 2caeb4c..d47675f 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -77,6 +77,7 @@
#define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2
#define HNS_ROCE_INVALID_LKEY 0x100
#define HNS_ROCE_CMQ_TX_TIMEOUT 30000
+#define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE 2
#define HNS_ROCE_CONTEXT_HOP_NUM 1
#define HNS_ROCE_MTT_HOP_NUM 1
--
1.9.1
Powered by blists - more mailing lists