From ff8d664c9606135d52fd2dc4778dd75a56a3b38e Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Thu, 4 Apr 2024 11:11:21 -0600 Subject: [PATCH] xprtrdma: add spinlock in rpcrdma_req to protect rl_free_mrs and rl_registered list In some rare conditions, the top half and the bottom half of the xprtrdma module access the same rl_free_mrs and rl_registered list of the request. One of the cases is when rpcrdma_marshal_req calls frwr_reset while the rpcrdma_reply_handler is executing on the same rpcrdma_req. When this happens the rl_free_mrs and rl_registered are corrupted. This patch adds a spinlock in rpcrdma_req to protect rl_free_mrs and rl_registered list. Since the chance that the top and bottom half of the xprtrdma module executing at the same time on the same request is rare, only in error conditions, it's expected that the contention of this lock is very low. Signed-off-by: Dai Ngo --- net/sunrpc/xprtrdma/frwr_ops.c | 27 +++++++++++++++++++++++---- net/sunrpc/xprtrdma/rpc_rdma.c | 9 ++++++--- net/sunrpc/xprtrdma/verbs.c | 6 ++++++ net/sunrpc/xprtrdma/xprt_rdma.h | 4 +++- 4 files changed, 38 insertions(+), 8 deletions(-) diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index ffbf99894970..e4e0f5532e8c 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -87,9 +87,11 @@ static void frwr_mr_put(struct rpcrdma_mr *mr) frwr_mr_unmap(mr->mr_xprt, mr); /* The MR is returned to the req's MR free list instead - * of to the xprt's MR free list. No spinlock is needed. + * of to the xprt's MR free list. */ + spin_lock(&mr->mr_req->rl_mr_list_lock); rpcrdma_mr_push(mr, &mr->mr_req->rl_free_mrs); + spin_unlock(&mr->mr_req->rl_mr_list_lock); } /* frwr_reset - Place MRs back on the free list @@ -106,8 +108,13 @@ void frwr_reset(struct rpcrdma_req *req) { struct rpcrdma_mr *mr; - while ((mr = rpcrdma_mr_pop(&req->rl_registered))) + spin_lock(&req->rl_mr_list_lock); + while ((mr = rpcrdma_mr_pop(&req->rl_registered))) { + spin_unlock(&req->rl_mr_list_lock); frwr_mr_put(mr); + spin_lock(&req->rl_mr_list_lock); + } + spin_unlock(&req->rl_mr_list_lock); } /** @@ -390,6 +397,7 @@ int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) num_wrs = 1; post_wr = send_wr; + spin_lock(&req->rl_mr_list_lock); list_for_each_entry(mr, &req->rl_registered, mr_list) { trace_xprtrdma_mr_fastreg(mr); @@ -402,6 +410,7 @@ int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) post_wr = &mr->mr_regwr.wr; ++num_wrs; } + spin_unlock(&req->rl_mr_list_lock); if ((kref_read(&req->rl_kref) > 1) || num_wrs > ep->re_send_count) { send_wr->send_flags |= IB_SEND_SIGNALED; @@ -425,17 +434,23 @@ int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) * @mrs: list of MRs to check * */ -void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs) +void frwr_reminv(struct rpcrdma_rep *rep, struct rpcrdma_req *req) { struct rpcrdma_mr *mr; + struct list_head *mrs = &req->rl_registered; - list_for_each_entry(mr, mrs, mr_list) + spin_lock(&req->rl_mr_list_lock); + list_for_each_entry(mr, mrs, mr_list) { if (mr->mr_handle == rep->rr_inv_rkey) { list_del_init(&mr->mr_list); trace_xprtrdma_mr_reminv(mr); + spin_unlock(&req->rl_mr_list_lock); frwr_mr_put(mr); + spin_lock(&req->rl_mr_list_lock); break; /* only one invalidated MR per RPC */ } + } + spin_unlock(&req->rl_mr_list_lock); } static void frwr_mr_done(struct ib_wc *wc, struct rpcrdma_mr *mr) @@ -507,6 +522,7 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) * a single ib_post_send() call. */ prev = &first; + spin_lock(&req->rl_mr_list_lock); mr = rpcrdma_mr_pop(&req->rl_registered); do { trace_xprtrdma_mr_localinv(mr); @@ -526,6 +542,7 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) *prev = last; prev = &last->next; } while ((mr = rpcrdma_mr_pop(&req->rl_registered))); + spin_unlock(&req->rl_mr_list_lock); mr = container_of(last, struct rpcrdma_mr, mr_invwr); @@ -610,6 +627,7 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) * a single ib_post_send() call. */ prev = &first; + spin_lock(&req->rl_mr_list_lock); mr = rpcrdma_mr_pop(&req->rl_registered); do { trace_xprtrdma_mr_localinv(mr); @@ -629,6 +647,7 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) *prev = last; prev = &last->next; } while ((mr = rpcrdma_mr_pop(&req->rl_registered))); + spin_unlock(&req->rl_mr_list_lock); /* Strong send queue ordering guarantees that when the * last WR in the chain completes, all WRs in the chain diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 190a4de239c8..29b10f6eb8b0 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -298,15 +298,18 @@ static struct rpcrdma_mr_seg *rpcrdma_mr_prepare(struct rpcrdma_xprt *r_xprt, int nsegs, bool writing, struct rpcrdma_mr **mr) { + spin_lock(&req->rl_mr_list_lock); *mr = rpcrdma_mr_pop(&req->rl_free_mrs); if (!*mr) { *mr = rpcrdma_mr_get(r_xprt); - if (!*mr) + if (!*mr) { + spin_unlock(&req->rl_mr_list_lock); goto out_getmr_err; + } (*mr)->mr_req = req; } - rpcrdma_mr_push(*mr, &req->rl_registered); + spin_unlock(&req->rl_mr_list_lock); return frwr_map(r_xprt, seg, nsegs, writing, req->rl_slot.rq_xid, *mr); out_getmr_err: @@ -1485,7 +1488,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) trace_xprtrdma_reply(rqst->rq_task, rep, credits); if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE) - frwr_reminv(rep, &req->rl_registered); + frwr_reminv(rep, req); if (!list_empty(&req->rl_registered)) frwr_unmap_async(r_xprt, req); /* LocalInv completion will complete the RPC */ diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 4f8d7efa469f..a8663f104729 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -825,6 +825,8 @@ struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, INIT_LIST_HEAD(&req->rl_free_mrs); INIT_LIST_HEAD(&req->rl_registered); + + spin_lock_init(&req->rl_mr_list_lock); spin_lock(&buffer->rb_lock); list_add(&req->rl_all, &buffer->rb_allreqs); spin_unlock(&buffer->rb_lock); @@ -1084,15 +1086,19 @@ void rpcrdma_req_destroy(struct rpcrdma_req *req) list_del(&req->rl_all); + spin_lock(&req->rl_mr_list_lock); while ((mr = rpcrdma_mr_pop(&req->rl_free_mrs))) { struct rpcrdma_buffer *buf = &mr->mr_xprt->rx_buf; + spin_unlock(&req->rl_mr_list_lock); spin_lock(&buf->rb_lock); list_del(&mr->mr_all); spin_unlock(&buf->rb_lock); frwr_mr_release(mr); + spin_lock(&req->rl_mr_list_lock); } + spin_unlock(&req->rl_mr_list_lock); rpcrdma_regbuf_free(req->rl_recvbuf); rpcrdma_regbuf_free(req->rl_sendbuf); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index da409450dfc0..12db0250c9ce 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -327,6 +327,8 @@ struct rpcrdma_req { struct list_head rl_all; struct kref rl_kref; + /* rl_mr_list_locks used for rl_free_mrs and rl_registered list */ + spinlock_t rl_mr_list_lock; struct list_head rl_free_mrs; struct list_head rl_registered; struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS]; @@ -539,7 +541,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, int nsegs, bool writing, __be32 xid, struct rpcrdma_mr *mr); int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req); -void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs); +void frwr_reminv(struct rpcrdma_rep *rep, struct rpcrdma_req *req); void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req); void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req); int frwr_wp_create(struct rpcrdma_xprt *r_xprt); -- 2.43.0