[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20200514095727.1361-3-irusskikh@marvell.com>
Date: Thu, 14 May 2020 12:57:18 +0300
From: Igor Russkikh <irusskikh@...vell.com>
To: <netdev@...r.kernel.org>
CC: "David S . Miller" <davem@...emloft.net>,
Ariel Elior <aelior@...vell.com>,
Michal Kalderon <mkalderon@...vell.com>,
Denis Bolotin <dbolotin@...vell.com>,
Jakub Kicinski <kuba@...nel.org>,
Igor Russkikh <irusskikh@...vell.com>,
Ariel Elior <ariel.elior@...vell.com>,
"Michal Kalderon" <michal.kalderon@...vell.com>
Subject: [PATCH v2 net-next 02/11] net: qede: add hw err scheduled handler
qede (ethernet level driver) registers a callback handler.
This handler maintains eth dev state flags/bits to track error processing.
It implements in place processing part for nonsleeping context (WARN_ON
trigger), and a deferred (delayed work) part which triggers recovery
process for recoverable errors.
In later patches this atomic handler will come with more meat.
We introduce err_flags on ethdevice structure, its being used to record
error handling properties.
Signed-off-by: Ariel Elior <ariel.elior@...vell.com>
Signed-off-by: Michal Kalderon <michal.kalderon@...vell.com>
Signed-off-by: Igor Russkikh <irusskikh@...vell.com>
---
drivers/net/ethernet/qlogic/qede/qede.h | 13 ++-
drivers/net/ethernet/qlogic/qede/qede_main.c | 95 +++++++++++++++++++-
2 files changed, 106 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index f6f0b51620ab..695d645d9ba9 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -278,6 +278,14 @@ struct qede_dev {
struct qede_rdma_dev rdma_info;
struct bpf_prog *xdp_prog;
+
+ unsigned long err_flags;
+#define QEDE_ERR_IS_HANDLED 31
+#define QEDE_ERR_ATTN_CLR_EN 0
+#define QEDE_ERR_GET_DBG_INFO 1
+#define QEDE_ERR_IS_RECOVERABLE 2
+#define QEDE_ERR_WARN 3
+
struct qede_dump_info dump_info;
};
@@ -485,12 +493,15 @@ struct qede_fastpath {
#define QEDE_SP_RECOVERY 0
#define QEDE_SP_RX_MODE 1
+#define QEDE_SP_RSVD1 2
+#define QEDE_SP_RSVD2 3
+#define QEDE_SP_HW_ERR 4
+#define QEDE_SP_ARFS_CONFIG 5
#define QEDE_SP_AER 7
#ifdef CONFIG_RFS_ACCEL
int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
u16 rxq_index, u32 flow_id);
-#define QEDE_SP_ARFS_CONFIG 4
#define QEDE_SP_TASK_POLL_DELAY (5 * HZ)
#endif
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 300405369c37..e67d5da23792 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -139,10 +139,12 @@ static void qede_shutdown(struct pci_dev *pdev);
static void qede_link_update(void *dev, struct qed_link_output *link);
static void qede_schedule_recovery_handler(void *dev);
static void qede_recovery_handler(struct qede_dev *edev);
+static void qede_schedule_hw_err_handler(void *dev,
+ enum qed_hw_err_type err_type);
static void qede_get_eth_tlv_data(void *edev, void *data);
static void qede_get_generic_tlv_data(void *edev,
struct qed_generic_tlvs *data);
-
+static void qede_generic_hw_err_handler(struct qede_dev *edev);
#ifdef CONFIG_QED_SRIOV
static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos,
__be16 vlan_proto)
@@ -230,6 +232,7 @@ static struct qed_eth_cb_ops qede_ll_ops = {
#endif
.link_update = qede_link_update,
.schedule_recovery_handler = qede_schedule_recovery_handler,
+ .schedule_hw_err_handler = qede_schedule_hw_err_handler,
.get_generic_tlv_data = qede_get_generic_tlv_data,
.get_protocol_tlv_data = qede_get_eth_tlv_data,
},
@@ -1009,6 +1012,8 @@ static void qede_sp_task(struct work_struct *work)
qede_process_arfs_filters(edev, false);
}
#endif
+ if (test_and_clear_bit(QEDE_SP_HW_ERR, &edev->sp_flags))
+ qede_generic_hw_err_handler(edev);
__qede_unlock(edev);
if (test_and_clear_bit(QEDE_SP_AER, &edev->sp_flags)) {
@@ -2509,6 +2514,94 @@ static void qede_recovery_handler(struct qede_dev *edev)
qede_recovery_failed(edev);
}
+static void qede_atomic_hw_err_handler(struct qede_dev *edev)
+{
+ DP_NOTICE(edev,
+ "Generic non-sleepable HW error handling started - err_flags 0x%lx\n",
+ edev->err_flags);
+
+ /* Get a call trace of the flow that led to the error */
+ WARN_ON(test_bit(QEDE_ERR_WARN, &edev->err_flags));
+
+ DP_NOTICE(edev, "Generic non-sleepable HW error handling is done\n");
+}
+
+static void qede_generic_hw_err_handler(struct qede_dev *edev)
+{
+ struct qed_dev *cdev = edev->cdev;
+
+ DP_NOTICE(edev,
+ "Generic sleepable HW error handling started - err_flags 0x%lx\n",
+ edev->err_flags);
+
+ /* Trigger a recovery process.
+ * This is placed in the sleep requiring section just to make
+ * sure it is the last one, and that all the other operations
+ * were completed.
+ */
+ if (test_bit(QEDE_ERR_IS_RECOVERABLE, &edev->err_flags))
+ edev->ops->common->recovery_process(cdev);
+
+ clear_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags);
+
+ DP_NOTICE(edev, "Generic sleepable HW error handling is done\n");
+}
+
+static void qede_set_hw_err_flags(struct qede_dev *edev,
+ enum qed_hw_err_type err_type)
+{
+ unsigned long err_flags = 0;
+
+ switch (err_type) {
+ case QED_HW_ERR_DMAE_FAIL:
+ set_bit(QEDE_ERR_WARN, &err_flags);
+ fallthrough;
+ case QED_HW_ERR_MFW_RESP_FAIL:
+ case QED_HW_ERR_HW_ATTN:
+ case QED_HW_ERR_RAMROD_FAIL:
+ case QED_HW_ERR_FW_ASSERT:
+ set_bit(QEDE_ERR_ATTN_CLR_EN, &err_flags);
+ set_bit(QEDE_ERR_GET_DBG_INFO, &err_flags);
+ break;
+
+ default:
+ DP_NOTICE(edev, "Unexpected HW error [%d]\n", err_type);
+ break;
+ }
+
+ edev->err_flags |= err_flags;
+}
+
+static void qede_schedule_hw_err_handler(void *dev,
+ enum qed_hw_err_type err_type)
+{
+ struct qede_dev *edev = dev;
+
+ /* Fan failure cannot be masked by handling of another HW error or by a
+ * concurrent recovery process.
+ */
+ if ((test_and_set_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags) ||
+ edev->state == QEDE_STATE_RECOVERY) &&
+ err_type != QED_HW_ERR_FAN_FAIL) {
+ DP_INFO(edev,
+ "Avoid scheduling an error handling while another HW error is being handled\n");
+ return;
+ }
+
+ if (err_type >= QED_HW_ERR_LAST) {
+ DP_NOTICE(edev, "Unknown HW error [%d]\n", err_type);
+ clear_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags);
+ return;
+ }
+
+ qede_set_hw_err_flags(edev, err_type);
+ qede_atomic_hw_err_handler(edev);
+ set_bit(QEDE_SP_HW_ERR, &edev->sp_flags);
+ schedule_delayed_work(&edev->sp_task, 0);
+
+ DP_INFO(edev, "Scheduled a error handler [err_type %d]\n", err_type);
+}
+
static bool qede_is_txq_full(struct qede_dev *edev, struct qede_tx_queue *txq)
{
struct netdev_queue *netdev_txq;
--
2.17.1
Powered by blists - more mailing lists