[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <505e9af7-a0cd-bf75-4a72-5d883ee06bf1@oracle.com>
Date: Mon, 1 Jul 2019 09:39:54 -0700
From: Gerd Rausch <gerd.rausch@...cle.com>
To: Santosh Shilimkar <santosh.shilimkar@...cle.com>,
netdev@...r.kernel.org
Cc: David Miller <davem@...emloft.net>
Subject: [PATCH net-next 3/7] net/rds: Wait for the FRMR_IS_FREE (or
FRMR_IS_STALE) transition after posting IB_WR_LOCAL_INV
In order to:
1) avoid a silly bouncing between "clean_list" and "drop_list"
triggered by function "rds_ib_reg_frmr" as it is releases frmr
regions whose state is not "FRMR_IS_FREE" right away.
2) prevent an invalid access error in a race from a pending
"IB_WR_LOCAL_INV" operation with a teardown ("dma_unmap_sg", "put_page")
and de-registration ("ib_dereg_mr") of the corresponding
memory region.
Signed-off-by: Gerd Rausch <gerd.rausch@...cle.com>
---
net/rds/ib_frmr.c | 89 ++++++++++++++++++++++++++++++-----------------
net/rds/ib_mr.h | 2 ++
2 files changed, 59 insertions(+), 32 deletions(-)
diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c
index 9f8aa310c27a..3c953034dca3 100644
--- a/net/rds/ib_frmr.c
+++ b/net/rds/ib_frmr.c
@@ -76,6 +76,7 @@ static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev,
frmr->fr_state = FRMR_IS_FREE;
init_waitqueue_head(&frmr->fr_inv_done);
+ init_waitqueue_head(&frmr->fr_reg_done);
return ibmr;
out_no_cigar:
@@ -124,6 +125,7 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr)
*/
ib_update_fast_reg_key(frmr->mr, ibmr->remap_count++);
frmr->fr_state = FRMR_IS_INUSE;
+ frmr->fr_reg = true;
memset(®_wr, 0, sizeof(reg_wr));
reg_wr.wr.wr_id = (unsigned long)(void *)ibmr;
@@ -144,7 +146,29 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr)
if (printk_ratelimit())
pr_warn("RDS/IB: %s returned error(%d)\n",
__func__, ret);
+ goto out;
+ }
+
+ if (!frmr->fr_reg)
+ goto out;
+
+ /* Wait for the registration to complete in order to prevent an invalid
+ * access error resulting from a race between the memory region already
+ * being accessed while registration is still pending.
+ */
+ wait_event_timeout(frmr->fr_reg_done, !frmr->fr_reg,
+ msecs_to_jiffies(100));
+
+ /* Registration did not complete within one second, something's wrong */
+ if (frmr->fr_reg) {
+ pr_warn("RDS/IB: %s registration still incomplete after 100msec\n",
+ __func__);
+ frmr->fr_state = FRMR_IS_STALE;
+ ret = -EBUSY;
}
+
+out:
+
return ret;
}
@@ -262,6 +286,26 @@ static int rds_ib_post_inv(struct rds_ib_mr *ibmr)
pr_err("RDS/IB: %s returned error(%d)\n", __func__, ret);
goto out;
}
+
+ if (frmr->fr_state != FRMR_IS_INUSE)
+ goto out;
+
+ /* Wait for the FRMR_IS_FREE (or FRMR_IS_STALE) transition in order to
+ * 1) avoid a silly bouncing between "clean_list" and "drop_list"
+ * triggered by function "rds_ib_reg_frmr" as it is releases frmr
+ * regions whose state is not "FRMR_IS_FREE" right away.
+ * 2) prevents an invalid access error in a race
+ * from a pending "IB_WR_LOCAL_INV" operation
+ * with a teardown ("dma_unmap_sg", "put_page")
+ * and de-registration ("ib_dereg_mr") of the corresponding
+ * memory region.
+ */
+ wait_event_timeout(frmr->fr_inv_done, frmr->fr_state != FRMR_IS_INUSE,
+ msecs_to_jiffies(50));
+
+ if (frmr->fr_state == FRMR_IS_INUSE)
+ ret = -EBUSY;
+
out:
return ret;
}
@@ -289,6 +333,11 @@ void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
wake_up(&frmr->fr_inv_done);
}
+ if (frmr->fr_reg) {
+ frmr->fr_reg = false;
+ wake_up(&frmr->fr_reg_done);
+ }
+
atomic_inc(&ic->i_fastreg_wrs);
}
@@ -297,14 +346,18 @@ void rds_ib_unreg_frmr(struct list_head *list, unsigned int *nfreed,
{
struct rds_ib_mr *ibmr, *next;
struct rds_ib_frmr *frmr;
- int ret = 0;
+ int ret = 0, ret2;
unsigned int freed = *nfreed;
/* String all ib_mr's onto one list and hand them to ib_unmap_fmr */
list_for_each_entry(ibmr, list, unmap_list) {
- if (ibmr->sg_dma_len)
- ret |= rds_ib_post_inv(ibmr);
+ if (ibmr->sg_dma_len) {
+ ret2 = rds_ib_post_inv(ibmr);
+ if (ret2 && !ret)
+ ret = ret2;
+ }
}
+
if (ret)
pr_warn("RDS/IB: %s failed (err=%d)\n", __func__, ret);
@@ -347,36 +400,8 @@ struct rds_ib_mr *rds_ib_reg_frmr(struct rds_ib_device *rds_ibdev,
}
do {
- if (ibmr) {
- /* Memory regions make it onto the "clean_list" via
- * "rds_ib_flush_mr_pool", after the memory region has
- * been posted for invalidation via "rds_ib_post_inv".
- *
- * At that point in time, "fr_state" may still be
- * in state "FRMR_IS_INUSE", since the only place where
- * "fr_state" transitions to "FRMR_IS_FREE" is in
- * is in "rds_ib_mr_cqe_handler", which is
- * triggered by a tasklet.
- *
- * So in case we notice that
- * "fr_state != FRMR_IS_FREE" (see below), * we wait for
- * "fr_inv_done" to trigger with a maximum of 10msec.
- * Then we check again, and only put the memory region
- * onto the drop_list (via "rds_ib_free_frmr")
- * in case the situation remains unchanged.
- *
- * This avoids the problem of memory-regions bouncing
- * between "clean_list" and "drop_list" before they
- * even have a chance to be properly invalidated.
- */
- frmr = &ibmr->u.frmr;
- wait_event_timeout(frmr->fr_inv_done,
- frmr->fr_state == FRMR_IS_FREE,
- msecs_to_jiffies(10));
- if (frmr->fr_state == FRMR_IS_FREE)
- break;
+ if (ibmr)
rds_ib_free_frmr(ibmr, true);
- }
ibmr = rds_ib_alloc_frmr(rds_ibdev, nents);
if (IS_ERR(ibmr))
return ibmr;
diff --git a/net/rds/ib_mr.h b/net/rds/ib_mr.h
index ab26c20ed66f..9045a8c0edff 100644
--- a/net/rds/ib_mr.h
+++ b/net/rds/ib_mr.h
@@ -58,6 +58,8 @@ struct rds_ib_frmr {
enum rds_ib_fr_state fr_state;
bool fr_inv;
wait_queue_head_t fr_inv_done;
+ bool fr_reg;
+ wait_queue_head_t fr_reg_done;
struct ib_send_wr fr_wr;
unsigned int dma_npages;
unsigned int sg_byte_len;
--
2.18.0
Powered by blists - more mailing lists