[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20200421145300.16278-4-skalluru@marvell.com>
Date: Tue, 21 Apr 2020 07:53:00 -0700
From: Sudarsana Reddy Kalluru <skalluru@...vell.com>
To: <davem@...emloft.net>
CC: <netdev@...r.kernel.org>, <aelior@...vell.com>,
<irusskikh@...vell.com>, <mkalderon@...vell.com>
Subject: [PATCH net-next 3/3] qede: Add support for handling the pcie errors.
The error recovery is handled by management firmware (MFW) with the help of
qed/qede drivers. Upon detecting the errors, driver informs MFW about this
event which in turn starts a recovery process. MFW sends ERROR_RECOVERY
notification to the driver which performs the required cleanup/recovery
from the driver side.
Signed-off-by: Sudarsana Reddy Kalluru <skalluru@...vell.com>
Signed-off-by: Ariel Elior <aelior@...vell.com>
Signed-off-by: Igor Russkikh <irusskikh@...vell.com>
---
drivers/net/ethernet/qlogic/qede/qede.h | 1 +
drivers/net/ethernet/qlogic/qede/qede_main.c | 68 +++++++++++++++++++++++++++-
2 files changed, 68 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index bf8b8ad..bacc58f 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -486,6 +486,7 @@ struct qede_fastpath {
#define QEDE_SP_RECOVERY 0
#define QEDE_SP_RX_MODE 1
+#define QEDE_SP_AER 7
#ifdef CONFIG_RFS_ACCEL
int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 9c4d9cd..4adfe2f 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -60,6 +60,7 @@
#include <net/ip6_checksum.h>
#include <linux/bitops.h>
#include <linux/vmalloc.h>
+#include <linux/aer.h>
#include "qede.h"
#include "qede_ptp.h"
@@ -124,6 +125,8 @@ enum qede_pci_private {
MODULE_DEVICE_TABLE(pci, qede_pci_tbl);
static int qede_probe(struct pci_dev *pdev, const struct pci_device_id *id);
+static pci_ers_result_t
+qede_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state);
#define TX_TIMEOUT (5 * HZ)
@@ -206,6 +209,10 @@ static int qede_sriov_configure(struct pci_dev *pdev, int num_vfs_param)
}
#endif
+static const struct pci_error_handlers qede_err_handler = {
+ .error_detected = qede_io_error_detected,
+};
+
static struct pci_driver qede_pci_driver = {
.name = "qede",
.id_table = qede_pci_tbl,
@@ -215,6 +222,7 @@ static int qede_sriov_configure(struct pci_dev *pdev, int num_vfs_param)
#ifdef CONFIG_QED_SRIOV
.sriov_configure = qede_sriov_configure,
#endif
+ .err_handler = &qede_err_handler,
};
static struct qed_eth_cb_ops qede_ll_ops = {
@@ -977,7 +985,8 @@ static void qede_sp_task(struct work_struct *work)
/* SRIOV must be disabled outside the lock to avoid a deadlock.
* The recovery of the active VFs is currently not supported.
*/
- qede_sriov_configure(edev->pdev, 0);
+ if (edev->num_vfs)
+ qede_sriov_configure(edev->pdev, 0);
#endif
qede_lock(edev);
qede_recovery_handler(edev);
@@ -997,6 +1006,17 @@ static void qede_sp_task(struct work_struct *work)
}
#endif
__qede_unlock(edev);
+
+ if (test_and_clear_bit(QEDE_SP_AER, &edev->sp_flags)) {
+#ifdef CONFIG_QED_SRIOV
+ /* SRIOV must be disabled outside the lock to avoid a deadlock.
+ * The recovery of the active VFs is currently not supported.
+ */
+ if (edev->num_vfs)
+ qede_sriov_configure(edev->pdev, 0);
+#endif
+ edev->ops->common->recovery_process(edev->cdev);
+ }
}
static void qede_update_pf_params(struct qed_dev *cdev)
@@ -2582,3 +2602,49 @@ static void qede_get_eth_tlv_data(void *dev, void *data)
etlv->num_txqs_full_set = true;
etlv->num_rxqs_full_set = true;
}
+
+/**
+ * qede_io_error_detected - called when PCI error is detected
+ * @pdev: Pointer to PCI device
+ * @state: The current pci connection state
+ *
+ * This function is called after a PCI bus error affecting
+ * this device has been detected.
+ */
+static pci_ers_result_t
+qede_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
+{
+ struct net_device *dev = pci_get_drvdata(pdev);
+ struct qede_dev *edev = netdev_priv(dev);
+
+ if (!edev)
+ return PCI_ERS_RESULT_NONE;
+
+ DP_NOTICE(edev, "IO error detected [%d]\n", state);
+
+ __qede_lock(edev);
+ if (edev->state == QEDE_STATE_RECOVERY) {
+ DP_NOTICE(edev, "Device already in the recovery state\n");
+ __qede_unlock(edev);
+ return PCI_ERS_RESULT_NONE;
+ }
+
+ /* PF handles the recovery of its VFs */
+ if (IS_VF(edev)) {
+ DP_VERBOSE(edev, QED_MSG_IOV,
+ "VF recovery is handled by its PF\n");
+ __qede_unlock(edev);
+ return PCI_ERS_RESULT_RECOVERED;
+ }
+
+ /* Close OS Tx */
+ netif_tx_disable(edev->ndev);
+ netif_carrier_off(edev->ndev);
+
+ set_bit(QEDE_SP_AER, &edev->sp_flags);
+ schedule_delayed_work(&edev->sp_task, 0);
+
+ __qede_unlock(edev);
+
+ return PCI_ERS_RESULT_CAN_RECOVER;
+}
--
1.8.3.1
Powered by blists - more mailing lists