netdev - [PATCH 2/3] iw_cxgb4: complete the cached SRQ buffers

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20190206172444.21997-3-rajur@chelsio.com>
Date:   Wed,  6 Feb 2019 22:54:43 +0530
From:   Raju Rangoju <rajur@...lsio.com>
To:     jgg@...lanox.com, davem@...emloft.net, linux-rdma@...r.kernel.org
Cc:     netdev@...r.kernel.org, swise@...ngridcomputing.com,
        rajur@...lsio.com
Subject: [PATCH 2/3] iw_cxgb4: complete the cached SRQ buffers

If TP fetches an SRQ buffer but ends up not using it before the
connection is aborted, then it passes the index of that SRQ
buffer to the host in ABORT_REQ_RSS or ABORT_RPL CPL message.

But, if the srqidx field is zero in the received ABORT_RPL or
ABORT_REQ_RSS CPL, then we need to read the tcb.rq_start field
to see if it really did have an RQE cached. This works around a
case where HW does not include the srqidx in the
ABORT_RPL/ABORT_REQ_RSS CPL.

The final value of rq_start is the one present in TCB with the
TF_RX_PDU_OUT bit cleared. So, we need to read the TCB, examine
the TF_RX_PDU_OUT (bit 49 of t_flags) in order to determine if
there's a rx PDU feedback event pending.

Signed-off-by: Raju Rangoju <rajur@...lsio.com>
---
 drivers/infiniband/hw/cxgb4/cm.c       | 161 +++++++++++++++++++++++++++++++--
 drivers/infiniband/hw/cxgb4/iw_cxgb4.h |   3 +
 drivers/infiniband/hw/cxgb4/t4.h       |   1 +
 3 files changed, 157 insertions(+), 8 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 8221813219e5..7aca5d73c19e 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -655,7 +655,33 @@ static int send_halfclose(struct c4iw_ep *ep)
 	return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
 }
 
-static int send_abort(struct c4iw_ep *ep)
+void read_tcb(struct c4iw_ep *ep)
+{
+	struct sk_buff *skb;
+	struct cpl_get_tcb *req;
+	int wrlen = roundup(sizeof(*req), 16);
+
+	skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
+	if (WARN_ON(!skb))
+		return;
+
+	set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx);
+	req = (struct cpl_get_tcb *) skb_put(skb, wrlen);
+	memset(req, 0, wrlen);
+	INIT_TP_WR(req, ep->hwtid);
+	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_GET_TCB, ep->hwtid));
+	req->reply_ctrl = htons(REPLY_CHAN_V(0) | QUEUENO_V(ep->rss_qid));
+
+	/*
+	 * keep a ref on the ep so the tcb is not unlocked before this
+	 * cpl completes. The ref is released in read_tcb_rpl().
+	 */
+	c4iw_get_ep(&ep->com);
+	if (WARN_ON(c4iw_ofld_send(&ep->com.dev->rdev, skb)))
+		c4iw_put_ep(&ep->com);
+}
+
+static int send_abort_req(struct c4iw_ep *ep)
 {
 	u32 wrlen = roundup(sizeof(struct cpl_abort_req), 16);
 	struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list);
@@ -670,6 +696,17 @@ static int send_abort(struct c4iw_ep *ep)
 	return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t);
 }
 
+static int send_abort(struct c4iw_ep *ep)
+{
+	if (!ep->com.qp || !ep->com.qp->srq) {
+		send_abort_req(ep);
+		return 0;
+	}
+	set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags);
+	read_tcb(ep);
+	return 0;
+}
+
 static int send_connect(struct c4iw_ep *ep)
 {
 	struct cpl_act_open_req *req = NULL;
@@ -1851,14 +1888,11 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
 	return 0;
 }
 
-static void complete_cached_srq_buffers(struct c4iw_ep *ep,
-					__be32 srqidx_status)
+static void complete_cached_srq_buffers(struct c4iw_ep *ep, u32 srqidx)
 {
 	enum chip_type adapter_type;
-	u32 srqidx;
 
 	adapter_type = ep->com.dev->rdev.lldi.adapter_type;
-	srqidx = ABORT_RSS_SRQIDX_G(be32_to_cpu(srqidx_status));
 
 	/*
 	 * If this TCB had a srq buffer cached, then we must complete
@@ -1876,6 +1910,7 @@ static void complete_cached_srq_buffers(struct c4iw_ep *ep,
 
 static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
 {
+	u32 srqidx;
 	struct c4iw_ep *ep;
 	struct cpl_abort_rpl_rss6 *rpl = cplhdr(skb);
 	int release = 0;
@@ -1887,7 +1922,10 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
 		return 0;
 	}
 
-	complete_cached_srq_buffers(ep, rpl->srqidx_status);
+	if (ep->com.qp && ep->com.qp->srq) {
+		srqidx = ABORT_RSS_SRQIDX_G(be32_to_cpu(rpl->srqidx_status));
+		complete_cached_srq_buffers(ep, srqidx ? srqidx : ep->srqe_idx);
+	}
 
 	pr_debug("ep %p tid %u\n", ep, ep->hwtid);
 	mutex_lock(&ep->com.mutex);
@@ -2740,6 +2778,21 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
 	return 0;
 }
 
+static void finish_peer_abort(struct c4iw_dev *dev, struct c4iw_ep *ep)
+{
+	complete_cached_srq_buffers(ep, ep->srqe_idx);
+	if (ep->com.cm_id && ep->com.qp) {
+		struct c4iw_qp_attributes attrs;
+
+		attrs.next_state = C4IW_QP_STATE_ERROR;
+		c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
+			       C4IW_QP_ATTR_NEXT_STATE,	&attrs, 1);
+	}
+	peer_abort_upcall(ep);
+	release_ep_resources(ep);
+	c4iw_put_ep(&ep->com);
+}
+
 static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
 {
 	struct cpl_abort_req_rss6 *req = cplhdr(skb);
@@ -2750,6 +2803,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
 	int release = 0;
 	unsigned int tid = GET_TID(req);
 	u8 status;
+	u32 srqidx;
 
 	u32 len = roundup(sizeof(struct cpl_abort_rpl), 16);
 
@@ -2769,8 +2823,6 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
 		goto deref_ep;
 	}
 
-	complete_cached_srq_buffers(ep, req->srqidx_status);
-
 	pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid,
 		 ep->com.state);
 	set_bit(PEER_ABORT, &ep->com.history);
@@ -2819,6 +2871,23 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
 		stop_ep_timer(ep);
 		/*FALLTHROUGH*/
 	case FPDU_MODE:
+		if (ep->com.qp && ep->com.qp->srq) {
+			srqidx = ABORT_RSS_SRQIDX_G(
+					be32_to_cpu(req->srqidx_status));
+			if (srqidx) {
+				complete_cached_srq_buffers(ep,
+							    req->srqidx_status);
+			} else {
+				/* Hold ep ref until finish_peer_abort() */
+				c4iw_get_ep(&ep->com);
+				__state_set(&ep->com, ABORTING);
+				set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags);
+				read_tcb(ep);
+				break;
+
+			}
+		}
+
 		if (ep->com.cm_id && ep->com.qp) {
 			attrs.next_state = C4IW_QP_STATE_ERROR;
 			ret = c4iw_modify_qp(ep->com.qp->rhp,
@@ -3717,6 +3786,80 @@ static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb,
 	return;
 }
 
+static inline u64 t4_tcb_get_field64(__be64 *tcb, u16 word)
+{
+	u64 tlo = be64_to_cpu(tcb[((31 - word) / 2)]);
+	u64 thi = be64_to_cpu(tcb[((31 - word) / 2) - 1]);
+	u64 t;
+	u32 shift = 32;
+
+	t = (thi << shift) | (tlo >> shift);
+
+	return t;
+}
+
+static inline u32 t4_tcb_get_field32(__be64 *tcb, u16 word, u32 mask, u32 shift)
+{
+	u32 v;
+	u64 t = be64_to_cpu(tcb[(31 - word) / 2]);
+
+	if (word & 0x1)
+		shift += 32;
+	v = (t >> shift) & mask;
+	return v;
+}
+
+static int read_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
+{
+	struct cpl_get_tcb_rpl *rpl = cplhdr(skb);
+	__be64 *tcb = (__be64 *)(rpl + 1);
+	unsigned int tid = GET_TID(rpl);
+	struct c4iw_ep *ep;
+	u64 t_flags_64;
+	u32 rx_pdu_out;
+
+	ep = get_ep_from_tid(dev, tid);
+	if (!ep)
+		return 0;
+	/* Examine the TF_RX_PDU_OUT (bit 49 of the t_flags) in order to
+	 * determine if there's a rx PDU feedback event pending.
+	 *
+	 * If that bit is set, it means we'll need to re-read the TCB's
+	 * rq_start value. The final value is the one present in a TCB
+	 * with the TF_RX_PDU_OUT bit cleared.
+	 */
+
+	t_flags_64 = t4_tcb_get_field64(tcb, TCB_T_FLAGS_W);
+	rx_pdu_out = (t_flags_64 & TF_RX_PDU_OUT_V(1)) >> TF_RX_PDU_OUT_S;
+
+	c4iw_put_ep(&ep->com); /* from get_ep_from_tid() */
+	c4iw_put_ep(&ep->com); /* from read_tcb() */
+
+	/* If TF_RX_PDU_OUT bit is set, re-read the TCB */
+	if (rx_pdu_out) {
+		if (++ep->rx_pdu_out_cnt >= 2) {
+			WARN_ONCE(1, "tcb re-read() reached the guard limit, finishing the cleanup\n");
+			goto cleanup;
+		}
+		read_tcb(ep);
+		return 0;
+	}
+
+	ep->srqe_idx = t4_tcb_get_field32(tcb, TCB_RQ_START_W, TCB_RQ_START_W,
+			TCB_RQ_START_S);
+cleanup:
+	pr_debug("ep %p tid %u %016x\n", ep, ep->hwtid, ep->srqe_idx);
+
+	if (test_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags))
+		finish_peer_abort(dev, ep);
+	else if (test_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags))
+		send_abort_req(ep);
+	else
+		WARN_ONCE(1, "unexpected state!");
+
+	return 0;
+}
+
 static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
 {
 	struct cpl_fw6_msg *rpl = cplhdr(skb);
@@ -4037,6 +4180,7 @@ static c4iw_handler_func work_handlers[NUM_CPL_CMDS + NUM_FAKE_CPLS] = {
 	[CPL_CLOSE_CON_RPL] = close_con_rpl,
 	[CPL_RDMA_TERMINATE] = terminate,
 	[CPL_FW4_ACK] = fw4_ack,
+	[CPL_GET_TCB_RPL] = read_tcb_rpl,
 	[CPL_FW6_MSG] = deferred_fw6_msg,
 	[CPL_RX_PKT] = rx_pkt,
 	[FAKE_CPL_PUT_EP_SAFE] = _put_ep_safe,
@@ -4268,6 +4412,7 @@ c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS] = {
 	[CPL_RDMA_TERMINATE] = sched,
 	[CPL_FW4_ACK] = sched,
 	[CPL_SET_TCB_RPL] = set_tcb_rpl,
+	[CPL_GET_TCB_RPL] = sched,
 	[CPL_FW6_MSG] = fw6_msg,
 	[CPL_RX_PKT] = sched
 };
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index f0fceadd0d12..3a0923f7c60e 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -982,6 +982,9 @@ struct c4iw_ep {
 	int rcv_win;
 	u32 snd_wscale;
 	struct c4iw_ep_stats stats;
+	u32 srqe_idx;
+	u32 rx_pdu_out_cnt;
+	struct sk_buff *peer_abort_skb;
 };
 
 static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id)
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index fff6d48d262f..b170817b2741 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -35,6 +35,7 @@
 #include "t4_regs.h"
 #include "t4_values.h"
 #include "t4_msg.h"
+#include "t4_tcb.h"
 #include "t4fw_ri_api.h"
 
 #define T4_MAX_NUM_PD 65536
-- 
2.13.0