lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240705181519.4067507-6-fenghua.yu@intel.com>
Date: Fri,  5 Jul 2024 11:15:18 -0700
From: Fenghua Yu <fenghua.yu@...el.com>
To: "Vinod Koul" <vkoul@...nel.org>,
	"Dave Jiang" <dave.jiang@...el.com>
Cc: dmaengine@...r.kernel.org,
	"linux-kernel" <linux-kernel@...r.kernel.org>,
	Fenghua Yu <fenghua.yu@...el.com>
Subject: [PATCH 5/5] dmaengine: idxd: Enable Function Level Reset (FLR) for halt

When DSA/IAA device hits a fatal error, the device enters a halt state.
The driver can reset the device depending on Reset Type required by
hardware to recover the device.

Supported Reset Types are:
0: Reset Device command
1: Function Level Reset (FLR)
2: Warm reset
3: Cold reset

Currently, the driver only supports Reset Type 0.

This patch adds support for FLR recovery Type 1. Before issuing a PCIe
FLR command, IDXD device and WQ states are saved. After the FLR command
execution, the device is recovered to its previous states, allowing
the user can continue using the device.

Signed-off-by: Fenghua Yu <fenghua.yu@...el.com>
---
 drivers/dma/idxd/init.c | 123 ++++++++++++++++++++++++++++++++++++++++
 drivers/dma/idxd/irq.c  |  28 ++++++++-
 2 files changed, 148 insertions(+), 3 deletions(-)

diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index bb03d8cc5d32..87f2c6a878c6 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -973,6 +973,118 @@ static void idxd_device_config_restore(struct idxd_device *idxd,
 	kfree(idxd_saved->saved_wqs);
 }
 
+static void idxd_reset_prepare(struct pci_dev *pdev)
+{
+	struct idxd_device *idxd = pci_get_drvdata(pdev);
+	struct device *dev = &idxd->pdev->dev;
+	const char *idxd_name;
+	int rc;
+
+	dev = &idxd->pdev->dev;
+	idxd_name = dev_name(idxd_confdev(idxd));
+
+	struct idxd_saved_states *idxd_saved __free(kfree) =
+			kzalloc_node(sizeof(*idxd_saved), GFP_KERNEL,
+				     dev_to_node(&pdev->dev));
+	if (!idxd_saved) {
+		dev_err(dev, "HALT: no memory\n");
+
+		return;
+	}
+
+	/* Save IDXD configurations. */
+	rc = idxd_device_config_save(idxd, idxd_saved);
+	if (rc < 0) {
+		dev_err(dev, "HALT: cannot save %s configs\n", idxd_name);
+
+		return;
+	}
+
+	idxd->idxd_saved = no_free_ptr(idxd_saved);
+
+	/* Save PCI device state. */
+	pci_save_state(idxd->pdev);
+}
+
+static void idxd_reset_done(struct pci_dev *pdev)
+{
+	struct idxd_device *idxd = pci_get_drvdata(pdev);
+	const char *idxd_name;
+	struct device *dev;
+	int rc, i;
+
+	if (!idxd->idxd_saved)
+		return;
+
+	dev = &idxd->pdev->dev;
+	idxd_name = dev_name(idxd_confdev(idxd));
+
+	/* Restore PCI device state. */
+	pci_restore_state(idxd->pdev);
+
+	/* Unbind idxd device from driver. */
+	idxd_unbind(&idxd_drv.drv, idxd_name);
+
+	/*
+	 * Probe PCI device without allocating or changing
+	 * idxd software data which keeps the same as before FLR.
+	 */
+	idxd_pci_probe_alloc(idxd, NULL, NULL);
+
+	/* Restore IDXD configurations. */
+	idxd_device_config_restore(idxd, idxd->idxd_saved);
+
+	/* Re-configure IDXD device if allowed. */
+	if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) {
+		rc = idxd_device_config(idxd);
+		if (rc < 0) {
+			dev_err(dev, "HALT: %s config fails\n", idxd_name);
+			goto out;
+		}
+	}
+
+	/* Bind IDXD device to driver. */
+	rc = idxd_bind(&idxd_drv.drv, idxd_name);
+	if (rc < 0) {
+		dev_err(dev, "HALT: binding %s to driver fails\n", idxd_name);
+		goto out;
+	}
+
+	/* Bind enabled wq in the IDXD device to driver. */
+	for (i = 0; i < idxd->max_wqs; i++) {
+		if (test_bit(i, idxd->wq_enable_map)) {
+			struct idxd_wq *wq = idxd->wqs[i];
+			char wq_name[32];
+
+			wq->state = IDXD_WQ_DISABLED;
+			sprintf(wq_name, "wq%d.%d", idxd->id, wq->id);
+			/*
+			 * Bind to user driver depending on wq type.
+			 *
+			 * Currently only support user type WQ. Will support
+			 * kernel type WQ in the future.
+			 */
+			if (wq->type == IDXD_WQT_USER)
+				rc = idxd_bind(&idxd_user_drv.drv, wq_name);
+			else
+				rc = -EINVAL;
+			if (rc < 0) {
+				clear_bit(i, idxd->wq_enable_map);
+				dev_err(dev,
+					"HALT: unable to re-enable wq %s\n",
+					dev_name(wq_confdev(wq)));
+			}
+		}
+	}
+out:
+	kfree(idxd->idxd_saved);
+}
+
+static const struct pci_error_handlers idxd_error_handler = {
+	.reset_prepare	= idxd_reset_prepare,
+	.reset_done	= idxd_reset_done,
+};
+
 /*
  * Probe idxd PCI device.
  * If idxd is not given, need to allocate idxd and set up its data.
@@ -1046,6 +1158,16 @@ int idxd_pci_probe_alloc(struct idxd_device *idxd, struct pci_dev *pdev,
 			dev_warn(dev, "IDXD debugfs failed to setup\n");
 	}
 
+	if (!alloc_idxd) {
+		/* Release interrupts in the IDXD device. */
+		idxd_cleanup_interrupts(idxd);
+
+		/* Re-enable interrupts in the IDXD device. */
+		rc = idxd_setup_interrupts(idxd);
+		if (rc)
+			dev_warn(dev, "IDXD interrupts failed to setup\n");
+	}
+
 	dev_info(&pdev->dev, "Intel(R) Accelerator Device (v%x)\n",
 		 idxd->hw.version);
 
@@ -1136,6 +1258,7 @@ static struct pci_driver idxd_pci_driver = {
 	.probe		= idxd_pci_probe,
 	.remove		= idxd_remove,
 	.shutdown	= idxd_shutdown,
+	.err_handler	= &idxd_error_handler,
 };
 
 static int __init idxd_init_module(void)
diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c
index a46e58b756a5..1107db3ce0a3 100644
--- a/drivers/dma/idxd/irq.c
+++ b/drivers/dma/idxd/irq.c
@@ -383,6 +383,20 @@ static void process_evl_entries(struct idxd_device *idxd)
 	mutex_unlock(&evl->lock);
 }
 
+static void idxd_device_flr(struct work_struct *work)
+{
+	struct idxd_device *idxd = container_of(work, struct idxd_device, work);
+	int rc;
+
+	/*
+	 * IDXD device requires a Function Level Reset (FLR).
+	 * pci_reset_function() will reset the device with FLR.
+	 */
+	rc = pci_reset_function(idxd->pdev);
+	if (rc)
+		dev_err(&idxd->pdev->dev, "FLR failed\n");
+}
+
 static irqreturn_t idxd_halt(struct idxd_device *idxd)
 {
 	union gensts_reg gensts;
@@ -398,15 +412,23 @@ static irqreturn_t idxd_halt(struct idxd_device *idxd)
 			 */
 			INIT_WORK(&idxd->work, idxd_device_reinit);
 			queue_work(idxd->wq, &idxd->work);
+		} else if (gensts.reset_type == IDXD_DEVICE_RESET_FLR) {
+			idxd->state = IDXD_DEV_HALTED;
+			idxd_mask_error_interrupts(idxd);
+			dev_dbg(&idxd->pdev->dev,
+				"idxd halted, doing FLR. After FLR, configs are restored\n");
+			INIT_WORK(&idxd->work, idxd_device_flr);
+			queue_work(idxd->wq, &idxd->work);
+
 		} else {
 			idxd->state = IDXD_DEV_HALTED;
 			idxd_wqs_quiesce(idxd);
 			idxd_wqs_unmap_portal(idxd);
 			idxd_device_clear_state(idxd);
 			dev_err(&idxd->pdev->dev,
-				"idxd halted, need %s.\n",
-				gensts.reset_type == IDXD_DEVICE_RESET_FLR ?
-				"FLR" : "system reset");
+				"idxd halted, need system reset");
+
+			return -ENXIO;
 		}
 	}
 
-- 
2.37.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ