[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <c61d07469ecf5d3053442e24d4d050405f466b76.1679502371.git.petrm@nvidia.com>
Date: Wed, 22 Mar 2023 17:49:35 +0100
From: Petr Machata <petrm@...dia.com>
To: "David S. Miller" <davem@...emloft.net>,
Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>, <netdev@...r.kernel.org>
CC: Ido Schimmel <idosch@...dia.com>, Amit Cohen <amcohen@...dia.com>,
"Petr Machata" <petrm@...dia.com>, <mlxsw@...dia.com>
Subject: [PATCH net-next 6/6] mlxsw: pci: Add support for new reset flow
From: Amit Cohen <amcohen@...dia.com>
The driver resets the device during probe and during a devlink reload.
The current reset method reloads the current firmware version or a pending
one, if one was previously flashed using devlink. However, the reset does
not take down the PCI link, preventing the PCI firmware from being
upgraded, unless the system is rebooted.
To solve this problem, a new reset command (6) was implemented in the
firmware. Unlike the current command (1), after issuing the new command
the device will not start the reset immediately, but only after the PCI
link was disabled. The driver is expected to wait for 500ms before
re-enabling the link to give the firmware enough time to start the reset.
Implement the new reset method and use it only after verifying it is
supported by the current firmware version by querying the Management
Capabilities Mask (MCAM) register. Consider the PCI firmware to be
operational either after waiting for a predefined time of 2000ms or after
reading an active link status when "Data Link Layer Link Active Reporting"
is supported. For good measures, make sure the device ID can be read from
the configuration space of the device.
Once the PCI firmware is operational, go back to the regular reset flow
and wait for the entire device to become ready. That is, repeatedly read
the "system_status" register from the BAR until a value of "FW_READY"
(0x5E) appears.
Signed-off-by: Amit Cohen <amcohen@...dia.com>
Reviewed-by: Ido Schimmel <idosch@...dia.com>
Signed-off-by: Petr Machata <petrm@...dia.com>
---
drivers/net/ethernet/mellanox/mlxsw/pci.c | 151 ++++++++++++++++++-
drivers/net/ethernet/mellanox/mlxsw/pci_hw.h | 5 +
2 files changed, 155 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 73ae2fdd94c4..9b11c5280424 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -1459,6 +1459,137 @@ static int mlxsw_pci_sys_ready_wait(struct mlxsw_pci *mlxsw_pci,
return -EBUSY;
}
+static int mlxsw_pci_link_active_wait(struct pci_dev *pdev)
+{
+ unsigned long end;
+ u16 lnksta;
+ int err;
+
+ end = jiffies + msecs_to_jiffies(MLXSW_PCI_TOGGLE_TIMEOUT_MSECS);
+ do {
+ msleep(MLXSW_PCI_TOGGLE_WAIT_MSECS);
+ err = pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnksta);
+ if (err)
+ return pcibios_err_to_errno(err);
+
+ if (lnksta & PCI_EXP_LNKSTA_DLLLA)
+ return 0;
+ } while (time_before(jiffies, end));
+
+ pci_err(pdev, "PCI link not ready (0x%04x) after %d ms\n", lnksta,
+ MLXSW_PCI_TOGGLE_TIMEOUT_MSECS);
+
+ return -ETIMEDOUT;
+}
+
+static int mlxsw_pci_link_active_check(struct pci_dev *pdev)
+{
+ u32 lnkcap;
+ int err;
+
+ err = pcie_capability_read_dword(pdev, PCI_EXP_LNKCAP, &lnkcap);
+ if (err)
+ goto out;
+
+ if (lnkcap & PCI_EXP_LNKCAP_DLLLARC)
+ return mlxsw_pci_link_active_wait(pdev);
+
+ /* In case the device does not support "Data Link Layer Link Active
+ * Reporting", simply wait for a predefined time for the device to
+ * become active.
+ */
+ pci_dbg(pdev, "No PCI link reporting capability (0x%08x)\n", lnkcap);
+
+out:
+ /* Sleep before handling the rest of the flow and accessing to PCI. */
+ msleep(MLXSW_PCI_TOGGLE_TIMEOUT_MSECS);
+ return pcibios_err_to_errno(err);
+}
+
+static int mlxsw_pci_link_toggle(struct pci_dev *pdev)
+{
+ int err;
+
+ /* Disable the link. */
+ err = pcie_capability_set_word(pdev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_LD);
+ if (err)
+ return pcibios_err_to_errno(err);
+
+ /* Sleep to give firmware enough time to start the reset. */
+ msleep(MLXSW_PCI_TOGGLE_WAIT_BEFORE_EN_MSECS);
+
+ /* Enable the link. */
+ err = pcie_capability_clear_word(pdev, PCI_EXP_LNKCTL,
+ PCI_EXP_LNKCTL_LD);
+ if (err)
+ return pcibios_err_to_errno(err);
+
+ /* Wait for link active. */
+ return mlxsw_pci_link_active_check(pdev);
+}
+
+static int mlxsw_pci_device_id_read(struct pci_dev *pdev, u16 exp_dev_id)
+{
+ unsigned long end;
+ u16 dev_id;
+ int err;
+
+ end = jiffies + msecs_to_jiffies(MLXSW_PCI_TOGGLE_TIMEOUT_MSECS);
+ do {
+ msleep(MLXSW_PCI_TOGGLE_WAIT_MSECS);
+
+ /* Expect to get the correct PCI device ID as first indication
+ * that the ASIC is available.
+ */
+ err = pci_read_config_word(pdev, PCI_DEVICE_ID, &dev_id);
+ if (err)
+ return pcibios_err_to_errno(err);
+
+ if (dev_id == exp_dev_id)
+ return 0;
+ } while (time_before(jiffies, end));
+
+ pci_err(pdev, "PCI device ID is not as expected after %d ms\n",
+ MLXSW_PCI_TOGGLE_TIMEOUT_MSECS);
+
+ return -ETIMEDOUT;
+}
+
+static int mlxsw_pci_reset_at_pci_disable(struct mlxsw_pci *mlxsw_pci)
+{
+ struct pci_bus *bridge_bus = mlxsw_pci->pdev->bus;
+ struct pci_dev *bridge_pdev = bridge_bus->self;
+ struct pci_dev *pdev = mlxsw_pci->pdev;
+ char mrsr_pl[MLXSW_REG_MRSR_LEN];
+ u16 dev_id = pdev->device;
+ int err;
+
+ mlxsw_reg_mrsr_pack(mrsr_pl,
+ MLXSW_REG_MRSR_COMMAND_RESET_AT_PCI_DISABLE);
+ err = mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl);
+ if (err)
+ return err;
+
+ /* Save the PCI configuration space so that we will be able to restore
+ * it after the firmware was reset.
+ */
+ pci_save_state(pdev);
+ pci_cfg_access_lock(pdev);
+
+ err = mlxsw_pci_link_toggle(bridge_pdev);
+ if (err) {
+ pci_err(bridge_pdev, "Failed to toggle PCI link\n");
+ goto restore;
+ }
+
+ err = mlxsw_pci_device_id_read(pdev, dev_id);
+
+restore:
+ pci_cfg_access_unlock(pdev);
+ pci_restore_state(pdev);
+ return err;
+}
+
static int mlxsw_pci_reset_sw(struct mlxsw_pci *mlxsw_pci)
{
char mrsr_pl[MLXSW_REG_MRSR_LEN];
@@ -1471,6 +1602,8 @@ static int
mlxsw_pci_reset(struct mlxsw_pci *mlxsw_pci, const struct pci_device_id *id)
{
struct pci_dev *pdev = mlxsw_pci->pdev;
+ char mcam_pl[MLXSW_REG_MCAM_LEN];
+ bool pci_reset_supported;
u32 sys_status;
int err;
@@ -1481,7 +1614,23 @@ mlxsw_pci_reset(struct mlxsw_pci *mlxsw_pci, const struct pci_device_id *id)
return err;
}
- err = mlxsw_pci_reset_sw(mlxsw_pci);
+ mlxsw_reg_mcam_pack(mcam_pl,
+ MLXSW_REG_MCAM_FEATURE_GROUP_ENHANCED_FEATURES);
+ err = mlxsw_reg_query(mlxsw_pci->core, MLXSW_REG(mcam), mcam_pl);
+ if (err)
+ return err;
+
+ mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_PCI_RESET,
+ &pci_reset_supported);
+
+ if (pci_reset_supported) {
+ pci_dbg(pdev, "Starting PCI reset flow\n");
+ err = mlxsw_pci_reset_at_pci_disable(mlxsw_pci);
+ } else {
+ pci_dbg(pdev, "Starting software reset flow\n");
+ err = mlxsw_pci_reset_sw(mlxsw_pci);
+ }
+
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
index 48dbfea0a2a1..ded0828d7f1f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
@@ -27,6 +27,11 @@
#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 900000
#define MLXSW_PCI_SW_RESET_WAIT_MSECS 200
+
+#define MLXSW_PCI_TOGGLE_WAIT_BEFORE_EN_MSECS 500
+#define MLXSW_PCI_TOGGLE_WAIT_MSECS 20
+#define MLXSW_PCI_TOGGLE_TIMEOUT_MSECS 2000
+
#define MLXSW_PCI_FW_READY 0xA1844
#define MLXSW_PCI_FW_READY_MASK 0xFFFF
#define MLXSW_PCI_FW_READY_MAGIC 0x5E
--
2.39.0
Powered by blists - more mailing lists