[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20181128161607.19175-3-Ariel.Elior@cavium.com>
Date:   Wed, 28 Nov 2018 18:16:03 +0200
From:   Ariel Elior <Ariel.Elior@...ium.com>
To:     <davem@...emloft.net>
CC:     <netdev@...r.kernel.org>, Ariel Elior <Ariel.Elior@...ium.com>,
        "Michal Kalderon" <Michal.Kalderon@...ium.com>,
        Tomer Tayar <Tomer.Tayar@...ium.com>
Subject: [PATCH net-next v3 2/6] qed: Use the doorbell overflow recovery mechanism in case of doorbell overflow
In case of an attention from the doorbell queue block, analyze the HW
indications. In case of a doorbell overflow, execute a doorbell recovery.
Since there can be spurious indications (race conditions between multiple PFs),
schedule a periodic task for checking whether a doorbell overflow may have been
missed. After a set time with no indications, terminate the periodic task.
Signed-off-by: Ariel Elior <Ariel.Elior@...ium.com>
Signed-off-by: Michal Kalderon <Michal.Kalderon@...ium.com>
Signed-off-by: Tomer Tayar <Tomer.Tayar@...ium.com>
---
 drivers/net/ethernet/qlogic/qed/qed.h          |  14 ++-
 drivers/net/ethernet/qlogic/qed/qed_dev.c      |  14 ++-
 drivers/net/ethernet/qlogic/qed/qed_int.c      | 152 ++++++++++++++++++++++---
 drivers/net/ethernet/qlogic/qed/qed_int.h      |  10 ++
 drivers/net/ethernet/qlogic/qed/qed_main.c     |  64 ++++++++++-
 drivers/net/ethernet/qlogic/qed/qed_reg_addr.h |  50 ++++++++
 6 files changed, 280 insertions(+), 24 deletions(-)
diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index 882279e..a053062 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -536,6 +536,7 @@ struct qed_simd_fp_handler {
 
 enum qed_slowpath_wq_flag {
 	QED_SLOWPATH_MFW_TLV_REQ,
+	QED_SLOWPATH_PERIODIC_DB_REC,
 };
 
 struct qed_hwfn {
@@ -669,11 +670,12 @@ struct qed_hwfn {
 	struct delayed_work iov_task;
 	unsigned long iov_task_flags;
 #endif
-
-	struct z_stream_s		*stream;
+	struct z_stream_s *stream;
+	bool slowpath_wq_active;
 	struct workqueue_struct *slowpath_wq;
 	struct delayed_work slowpath_task;
 	unsigned long slowpath_task_flags;
+	u32 periodic_db_rec_count;
 };
 
 struct pci_params {
@@ -914,6 +916,12 @@ void qed_set_fw_mac_addr(__le16 *fw_msb,
 
 #define QED_LEADING_HWFN(dev)   (&dev->hwfns[0])
 
+/* doorbell recovery mechanism */
+void qed_db_recovery_dp(struct qed_hwfn *p_hwfn);
+void qed_db_recovery_execute(struct qed_hwfn *p_hwfn,
+			     enum qed_db_rec_exec db_exec);
+bool qed_edpm_enabled(struct qed_hwfn *p_hwfn);
+
 /* Other Linux specific common definitions */
 #define DP_NAME(cdev) ((cdev)->name)
 
@@ -948,4 +956,6 @@ int qed_mfw_fill_tlv_data(struct qed_hwfn *hwfn,
 			  union qed_mfw_tlv_data *tlv_data);
 
 void qed_hw_info_set_offload_tc(struct qed_hw_info *p_info, u8 tc);
+
+void qed_periodic_db_rec_start(struct qed_hwfn *p_hwfn);
 #endif /* _QED_H */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index a63f87f..0172d90 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -1759,6 +1759,14 @@ enum QED_ROCE_EDPM_MODE {
 	QED_ROCE_EDPM_MODE_DISABLE = 2,
 };
 
+bool qed_edpm_enabled(struct qed_hwfn *p_hwfn)
+{
+	if (p_hwfn->dcbx_no_edpm || p_hwfn->db_bar_no_edpm)
+		return false;
+
+	return true;
+}
+
 static int
 qed_hw_init_pf_doorbell_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
@@ -1828,13 +1836,13 @@ enum QED_ROCE_EDPM_MODE {
 	p_hwfn->wid_count = (u16) n_cpus;
 
 	DP_INFO(p_hwfn,
-		"doorbell bar: normal_region_size=%d, pwm_region_size=%d, dpi_size=%d, dpi_count=%d, roce_edpm=%s\n",
+		"doorbell bar: normal_region_size=%d, pwm_region_size=%d, dpi_size=%d, dpi_count=%d, roce_edpm=%s, page_size=%lu\n",
 		norm_regsize,
 		pwm_regsize,
 		p_hwfn->dpi_size,
 		p_hwfn->dpi_count,
-		((p_hwfn->dcbx_no_edpm) || (p_hwfn->db_bar_no_edpm)) ?
-		"disabled" : "enabled");
+		(!qed_edpm_enabled(p_hwfn)) ?
+		"disabled" : "enabled", PAGE_SIZE);
 
 	if (rc) {
 		DP_ERR(p_hwfn,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index af3a28e..0fe44a6 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -361,29 +361,147 @@ static int qed_pglub_rbc_attn_cb(struct qed_hwfn *p_hwfn)
 	return 0;
 }
 
-#define QED_DORQ_ATTENTION_REASON_MASK	(0xfffff)
-#define QED_DORQ_ATTENTION_OPAQUE_MASK (0xffff)
-#define QED_DORQ_ATTENTION_SIZE_MASK	(0x7f)
-#define QED_DORQ_ATTENTION_SIZE_SHIFT	(16)
+#define QED_DORQ_ATTENTION_REASON_MASK  (0xfffff)
+#define QED_DORQ_ATTENTION_OPAQUE_MASK  (0xffff)
+#define QED_DORQ_ATTENTION_OPAQUE_SHIFT (0x0)
+#define QED_DORQ_ATTENTION_SIZE_MASK            (0x7f)
+#define QED_DORQ_ATTENTION_SIZE_SHIFT           (16)
+
+#define QED_DB_REC_COUNT                        1000
+#define QED_DB_REC_INTERVAL                     100
+
+static int qed_db_rec_flush_queue(struct qed_hwfn *p_hwfn,
+				  struct qed_ptt *p_ptt)
+{
+	u32 count = QED_DB_REC_COUNT;
+	u32 usage = 1;
+
+	/* wait for usage to zero or count to run out. This is necessary since
+	 * EDPM doorbell transactions can take multiple 64b cycles, and as such
+	 * can "split" over the pci. Possibly, the doorbell drop can happen with
+	 * half an EDPM in the queue and other half dropped. Another EDPM
+	 * doorbell to the same address (from doorbell recovery mechanism or
+	 * from the doorbelling entity) could have first half dropped and second
+	 * half interpreted as continuation of the first. To prevent such
+	 * malformed doorbells from reaching the device, flush the queue before
+	 * releasing the overflow sticky indication.
+	 */
+	while (count-- && usage) {
+		usage = qed_rd(p_hwfn, p_ptt, DORQ_REG_PF_USAGE_CNT);
+		udelay(QED_DB_REC_INTERVAL);
+	}
+
+	/* should have been depleted by now */
+	if (usage) {
+		DP_NOTICE(p_hwfn->cdev,
+			  "DB recovery: doorbell usage failed to zero after %d usec. usage was %x\n",
+			  QED_DB_REC_INTERVAL * QED_DB_REC_COUNT, usage);
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+int qed_db_rec_handler(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	u32 overflow;
+	int rc;
+
+	overflow = qed_rd(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY);
+	DP_NOTICE(p_hwfn, "PF Overflow sticky 0x%x\n", overflow);
+	if (!overflow) {
+		qed_db_recovery_execute(p_hwfn, DB_REC_ONCE);
+		return 0;
+	}
+
+	if (qed_edpm_enabled(p_hwfn)) {
+		rc = qed_db_rec_flush_queue(p_hwfn, p_ptt);
+		if (rc)
+			return rc;
+	}
+
+	/* Flush any pending (e)dpm as they may never arrive */
+	qed_wr(p_hwfn, p_ptt, DORQ_REG_DPM_FORCE_ABORT, 0x1);
+
+	/* Release overflow sticky indication (stop silently dropping everything) */
+	qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY, 0x0);
+
+	/* Repeat all last doorbells (doorbell drop recovery) */
+	qed_db_recovery_execute(p_hwfn, DB_REC_REAL_DEAL);
+
+	return 0;
+}
+
 static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn)
 {
-	u32 reason;
+	u32 int_sts, first_drop_reason, details, address, all_drops_reason;
+	struct qed_ptt *p_ptt = p_hwfn->p_dpc_ptt;
+	int rc;
 
-	reason = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt, DORQ_REG_DB_DROP_REASON) &
-			QED_DORQ_ATTENTION_REASON_MASK;
-	if (reason) {
-		u32 details = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
-				     DORQ_REG_DB_DROP_DETAILS);
+	int_sts = qed_rd(p_hwfn, p_ptt, DORQ_REG_INT_STS);
+	DP_NOTICE(p_hwfn->cdev, "DORQ attention. int_sts was %x\n", int_sts);
 
-		DP_INFO(p_hwfn->cdev,
-			"DORQ db_drop: address 0x%08x Opaque FID 0x%04x Size [bytes] 0x%08x Reason: 0x%08x\n",
-			qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
-			       DORQ_REG_DB_DROP_DETAILS_ADDRESS),
-			(u16)(details & QED_DORQ_ATTENTION_OPAQUE_MASK),
-			GET_FIELD(details, QED_DORQ_ATTENTION_SIZE) * 4,
-			reason);
+	/* int_sts may be zero since all PFs were interrupted for doorbell
+	 * overflow but another one already handled it. Can abort here. If
+	 * This PF also requires overflow recovery we will be interrupted again.
+	 * The masked almost full indication may also be set. Ignoring.
+	 */
+	if (!(int_sts & ~DORQ_REG_INT_STS_DORQ_FIFO_AFULL))
+		return 0;
+
+	/* check if db_drop or overflow happened */
+	if (int_sts & (DORQ_REG_INT_STS_DB_DROP |
+		       DORQ_REG_INT_STS_DORQ_FIFO_OVFL_ERR)) {
+		/* Obtain data about db drop/overflow */
+		first_drop_reason = qed_rd(p_hwfn, p_ptt,
+					   DORQ_REG_DB_DROP_REASON) &
+		    QED_DORQ_ATTENTION_REASON_MASK;
+		details = qed_rd(p_hwfn, p_ptt, DORQ_REG_DB_DROP_DETAILS);
+		address = qed_rd(p_hwfn, p_ptt,
+				 DORQ_REG_DB_DROP_DETAILS_ADDRESS);
+		all_drops_reason = qed_rd(p_hwfn, p_ptt,
+					  DORQ_REG_DB_DROP_DETAILS_REASON);
+
+		/* Log info */
+		DP_NOTICE(p_hwfn->cdev,
+			  "Doorbell drop occurred\n"
+			  "Address\t\t0x%08x\t(second BAR address)\n"
+			  "FID\t\t0x%04x\t\t(Opaque FID)\n"
+			  "Size\t\t0x%04x\t\t(in bytes)\n"
+			  "1st drop reason\t0x%08x\t(details on first drop since last handling)\n"
+			  "Sticky reasons\t0x%08x\t(all drop reasons since last handling)\n",
+			  address,
+			  GET_FIELD(details, QED_DORQ_ATTENTION_OPAQUE),
+			  GET_FIELD(details, QED_DORQ_ATTENTION_SIZE) * 4,
+			  first_drop_reason, all_drops_reason);
+
+		rc = qed_db_rec_handler(p_hwfn, p_ptt);
+		qed_periodic_db_rec_start(p_hwfn);
+		if (rc)
+			return rc;
+
+		/* Clear the doorbell drop details and prepare for next drop */
+		qed_wr(p_hwfn, p_ptt, DORQ_REG_DB_DROP_DETAILS_REL, 0);
+
+		/* Mark interrupt as handled (note: even if drop was due to a different
+		 * reason than overflow we mark as handled)
+		 */
+		qed_wr(p_hwfn,
+		       p_ptt,
+		       DORQ_REG_INT_STS_WR,
+		       DORQ_REG_INT_STS_DB_DROP |
+		       DORQ_REG_INT_STS_DORQ_FIFO_OVFL_ERR);
+
+		/* If there are no indications other than drop indications, success */
+		if ((int_sts & ~(DORQ_REG_INT_STS_DB_DROP |
+				 DORQ_REG_INT_STS_DORQ_FIFO_OVFL_ERR |
+				 DORQ_REG_INT_STS_DORQ_FIFO_AFULL)) == 0)
+			return 0;
 	}
 
+	/* Some other indication was present - non recoverable */
+	DP_INFO(p_hwfn, "DORQ fatal attention\n");
+
 	return -EINVAL;
 }
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.h b/drivers/net/ethernet/qlogic/qed/qed_int.h
index 54b4ee0..d81a62e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.h
@@ -190,6 +190,16 @@ void qed_int_get_num_sbs(struct qed_hwfn	*p_hwfn,
  */
 void qed_int_disable_post_isr_release(struct qed_dev *cdev);
 
+/**
+ * @brief - Doorbell Recovery handler.
+ *          Run DB_REAL_DEAL doorbell recovery in case of PF overflow
+ *          (and flush DORQ if needed), otherwise run DB_REC_ONCE.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ */
+int qed_db_rec_handler(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
 #define QED_CAU_DEF_RX_TIMER_RES 0
 #define QED_CAU_DEF_TX_TIMER_RES 0
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 75d217a..f2c50ef 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -965,9 +965,47 @@ static void qed_update_pf_params(struct qed_dev *cdev,
 	}
 }
 
+#define QED_PERIODIC_DB_REC_COUNT		100
+#define QED_PERIODIC_DB_REC_INTERVAL_MS		100
+#define QED_PERIODIC_DB_REC_INTERVAL \
+	msecs_to_jiffies(QED_PERIODIC_DB_REC_INTERVAL_MS)
+#define QED_PERIODIC_DB_REC_WAIT_COUNT		10
+#define QED_PERIODIC_DB_REC_WAIT_INTERVAL \
+	(QED_PERIODIC_DB_REC_INTERVAL_MS / QED_PERIODIC_DB_REC_WAIT_COUNT)
+
+static int qed_slowpath_delayed_work(struct qed_hwfn *hwfn,
+				     enum qed_slowpath_wq_flag wq_flag,
+				     unsigned long delay)
+{
+	if (!hwfn->slowpath_wq_active)
+		return -EINVAL;
+
+	/* Memory barrier for setting atomic bit */
+	smp_mb__before_atomic();
+	set_bit(wq_flag, &hwfn->slowpath_task_flags);
+	smp_mb__after_atomic();
+	queue_delayed_work(hwfn->slowpath_wq, &hwfn->slowpath_task, delay);
+
+	return 0;
+}
+
+void qed_periodic_db_rec_start(struct qed_hwfn *p_hwfn)
+{
+	/* Reset periodic Doorbell Recovery counter */
+	p_hwfn->periodic_db_rec_count = QED_PERIODIC_DB_REC_COUNT;
+
+	/* Don't schedule periodic Doorbell Recovery if already scheduled */
+	if (test_bit(QED_SLOWPATH_PERIODIC_DB_REC,
+		     &p_hwfn->slowpath_task_flags))
+		return;
+
+	qed_slowpath_delayed_work(p_hwfn, QED_SLOWPATH_PERIODIC_DB_REC,
+				  QED_PERIODIC_DB_REC_INTERVAL);
+}
+
 static void qed_slowpath_wq_stop(struct qed_dev *cdev)
 {
-	int i;
+	int i, sleep_count = QED_PERIODIC_DB_REC_WAIT_COUNT;
 
 	if (IS_VF(cdev))
 		return;
@@ -976,6 +1014,15 @@ static void qed_slowpath_wq_stop(struct qed_dev *cdev)
 		if (!cdev->hwfns[i].slowpath_wq)
 			continue;
 
+		/* Stop queuing new delayed works */
+		cdev->hwfns[i].slowpath_wq_active = false;
+
+		/* Wait until the last periodic doorbell recovery is executed */
+		while (test_bit(QED_SLOWPATH_PERIODIC_DB_REC,
+				&cdev->hwfns[i].slowpath_task_flags) &&
+		       sleep_count--)
+			msleep(QED_PERIODIC_DB_REC_WAIT_INTERVAL);
+
 		flush_workqueue(cdev->hwfns[i].slowpath_wq);
 		destroy_workqueue(cdev->hwfns[i].slowpath_wq);
 	}
@@ -988,7 +1035,10 @@ static void qed_slowpath_task(struct work_struct *work)
 	struct qed_ptt *ptt = qed_ptt_acquire(hwfn);
 
 	if (!ptt) {
-		queue_delayed_work(hwfn->slowpath_wq, &hwfn->slowpath_task, 0);
+		if (hwfn->slowpath_wq_active)
+			queue_delayed_work(hwfn->slowpath_wq,
+					   &hwfn->slowpath_task, 0);
+
 		return;
 	}
 
@@ -996,6 +1046,15 @@ static void qed_slowpath_task(struct work_struct *work)
 			       &hwfn->slowpath_task_flags))
 		qed_mfw_process_tlv_req(hwfn, ptt);
 
+	if (test_and_clear_bit(QED_SLOWPATH_PERIODIC_DB_REC,
+			       &hwfn->slowpath_task_flags)) {
+		qed_db_rec_handler(hwfn, ptt);
+		if (hwfn->periodic_db_rec_count--)
+			qed_slowpath_delayed_work(hwfn,
+						  QED_SLOWPATH_PERIODIC_DB_REC,
+						  QED_PERIODIC_DB_REC_INTERVAL);
+	}
+
 	qed_ptt_release(hwfn, ptt);
 }
 
@@ -1022,6 +1081,7 @@ static int qed_slowpath_wq_start(struct qed_dev *cdev)
 		}
 
 		INIT_DELAYED_WORK(&hwfn->slowpath_task, qed_slowpath_task);
+		hwfn->slowpath_wq_active = true;
 	}
 
 	return 0;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
index 2440970..8939ed6 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
@@ -1243,6 +1243,56 @@
 	0x1701534UL
 #define TSEM_REG_DBG_FORCE_FRAME \
 	0x1701538UL
+#define DORQ_REG_PF_USAGE_CNT \
+	0x1009c0UL
+#define DORQ_REG_PF_OVFL_STICKY	\
+	0x1009d0UL
+#define DORQ_REG_DPM_FORCE_ABORT \
+	0x1009d8UL
+#define DORQ_REG_INT_STS \
+	0x100180UL
+#define DORQ_REG_INT_STS_ADDRESS_ERROR \
+	(0x1UL << 0)
+#define DORQ_REG_INT_STS_WR \
+	0x100188UL
+#define DORQ_REG_DB_DROP_DETAILS_REL \
+	0x100a28UL
+#define DORQ_REG_INT_STS_ADDRESS_ERROR_SHIFT \
+	0
+#define DORQ_REG_INT_STS_DB_DROP \
+		(0x1UL << 1)
+#define DORQ_REG_INT_STS_DB_DROP_SHIFT \
+	1
+#define DORQ_REG_INT_STS_DORQ_FIFO_OVFL_ERR \
+		(0x1UL << 2)
+#define DORQ_REG_INT_STS_DORQ_FIFO_OVFL_ERR_SHIFT \
+	2
+#define DORQ_REG_INT_STS_DORQ_FIFO_AFULL\
+		(0x1UL << 3)
+#define DORQ_REG_INT_STS_DORQ_FIFO_AFULL_SHIFT \
+	3
+#define DORQ_REG_INT_STS_CFC_BYP_VALIDATION_ERR \
+		(0x1UL << 4)
+#define DORQ_REG_INT_STS_CFC_BYP_VALIDATION_ERR_SHIFT \
+	4
+#define DORQ_REG_INT_STS_CFC_LD_RESP_ERR \
+		(0x1UL << 5)
+#define DORQ_REG_INT_STS_CFC_LD_RESP_ERR_SHIFT \
+	5
+#define DORQ_REG_INT_STS_XCM_DONE_CNT_ERR \
+		(0x1UL << 6)
+#define DORQ_REG_INT_STS_XCM_DONE_CNT_ERR_SHIFT	\
+	6
+#define DORQ_REG_INT_STS_CFC_LD_REQ_FIFO_OVFL_ERR \
+		(0x1UL << 7)
+#define DORQ_REG_INT_STS_CFC_LD_REQ_FIFO_OVFL_ERR_SHIFT	\
+	7
+#define DORQ_REG_INT_STS_CFC_LD_REQ_FIFO_UNDER_ERR \
+		(0x1UL << 8)
+#define DORQ_REG_INT_STS_CFC_LD_REQ_FIFO_UNDER_ERR_SHIFT \
+	8
+#define DORQ_REG_DB_DROP_DETAILS_REASON	\
+	0x100a20UL
 #define MSEM_REG_DBG_SELECT \
 	0x1801528UL
 #define MSEM_REG_DBG_DWORD_ENABLE \
-- 
1.8.3.1
Powered by blists - more mailing lists
 
