[<prev] [next>] [day] [month] [year] [list]
Message-Id: <1753779618-23629-1-git-send-email-kotaranov@linux.microsoft.com>
Date: Tue, 29 Jul 2025 02:00:18 -0700
From: Konstantin Taranov <kotaranov@...ux.microsoft.com>
To: kotaranov@...rosoft.com,
longli@...rosoft.com,
jgg@...pe.ca,
leon@...nel.org
Cc: linux-rdma@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: [PATCH rdma-next 1/1] RDMA/mana_ib: drain send wrs of gsi qp
From: Konstantin Taranov <kotaranov@...rosoft.com>
Drain send WRs of the GSI QP on device removal.
In rare servicing scenarios, the hardware may delete the
state of the GSI QP, preventing it from generating CQEs
for pending send WRs. Since WRs submitted to the GSI QP
hold CM resources, the device cannot be removed until
those WRs are completed. This patch marks all pending
send WRs as failed, allowing the GSI QP to release the CM
resources and enabling safe device removal.
Signed-off-by: Konstantin Taranov <kotaranov@...rosoft.com>
---
drivers/infiniband/hw/mana/cq.c | 26 ++++++++++++++++++++++++++
drivers/infiniband/hw/mana/device.c | 3 +++
drivers/infiniband/hw/mana/mana_ib.h | 3 +++
3 files changed, 32 insertions(+)
diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c
index 28e154bbb50f..1becc8779123 100644
--- a/drivers/infiniband/hw/mana/cq.c
+++ b/drivers/infiniband/hw/mana/cq.c
@@ -291,6 +291,32 @@ static int mana_process_completions(struct mana_ib_cq *cq, int nwc, struct ib_wc
return wc_index;
}
+void mana_drain_gsi_sqs(struct mana_ib_dev *mdev)
+{
+ struct mana_ib_qp *qp = mana_get_qp_ref(mdev, MANA_GSI_QPN, false);
+ struct ud_sq_shadow_wqe *shadow_wqe;
+ struct mana_ib_cq *cq;
+ unsigned long flags;
+
+ if (!qp)
+ return;
+
+ cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
+
+ spin_lock_irqsave(&cq->cq_lock, flags);
+ while ((shadow_wqe = shadow_queue_get_next_to_complete(&qp->shadow_sq))
+ != NULL) {
+ shadow_wqe->header.error_code = IB_WC_GENERAL_ERR;
+ shadow_queue_advance_next_to_complete(&qp->shadow_sq);
+ }
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+
+ if (cq->ibcq.comp_handler)
+ cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
+
+ mana_put_qp_ref(qp);
+}
+
int mana_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
{
struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c
index fa60872f169f..bdeddb642b87 100644
--- a/drivers/infiniband/hw/mana/device.c
+++ b/drivers/infiniband/hw/mana/device.c
@@ -230,6 +230,9 @@ static void mana_ib_remove(struct auxiliary_device *adev)
{
struct mana_ib_dev *dev = dev_get_drvdata(&adev->dev);
+ if (mana_ib_is_rnic(dev))
+ mana_drain_gsi_sqs(dev);
+
ib_unregister_device(&dev->ib_dev);
dma_pool_destroy(dev->av_pool);
if (mana_ib_is_rnic(dev)) {
diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h
index 5d31034ac7fb..af09a3e6ccb7 100644
--- a/drivers/infiniband/hw/mana/mana_ib.h
+++ b/drivers/infiniband/hw/mana/mana_ib.h
@@ -43,6 +43,8 @@
*/
#define MANA_AV_BUFFER_SIZE 64
+#define MANA_GSI_QPN (1)
+
struct mana_ib_adapter_caps {
u32 max_sq_id;
u32 max_rq_id;
@@ -718,6 +720,7 @@ int mana_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
int mana_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
const struct ib_send_wr **bad_wr);
+void mana_drain_gsi_sqs(struct mana_ib_dev *mdev);
int mana_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int mana_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
--
2.43.0
Powered by blists - more mailing lists