lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250904040828.319452-3-ankita@nvidia.com>
Date: Thu, 4 Sep 2025 04:08:16 +0000
From: <ankita@...dia.com>
To: <ankita@...dia.com>, <jgg@...dia.com>, <alex.williamson@...hat.com>,
	<yishaih@...dia.com>, <skolothumtho@...dia.com>, <kevin.tian@...el.com>,
	<yi.l.liu@...el.com>, <zhiw@...dia.com>
CC: <aniketa@...dia.com>, <cjia@...dia.com>, <kwankhede@...dia.com>,
	<targupta@...dia.com>, <vsethi@...dia.com>, <acurrid@...dia.com>,
	<apopple@...dia.com>, <jhubbard@...dia.com>, <danw@...dia.com>,
	<anuaggarwal@...dia.com>, <mochs@...dia.com>, <kjaju@...dia.com>,
	<dnigam@...dia.com>, <kvm@...r.kernel.org>, <linux-kernel@...r.kernel.org>
Subject: [RFC 02/14] vfio/nvgrace-gpu: Create auxiliary device for EGM

From: Ankit Agrawal <ankita@...dia.com>

The Extended GPU Memory (EGM) feature enables the GPU access to
the system memory across sockets and physical systems on the
Grace Hopper and Grace Blackwell systems. When the feature is
enabled through SBIOS, part of the system memory is made available
to the GPU for access through EGM path.

The EGM functionality is separate and largely independent from the
core GPU device functionality. However, the EGM region information
of base SPA and size is associated with the GPU on the ACPI tables.
An architecture wih EGM represented as an auxiliary device suits well
in this context.

The parent GPU device creates an EGM auxiliary device to be managed
independently by an auxiliary EGM driver. The EGM region information
is kept as part of the shared struct nvgrace_egm_dev along with the
auxiliary device handle.

Each socket has a separate EGM region and hence a multi-socket system
have multiple EGM regions. Each EGM region has a separate nvgrace_egm_dev
and the nvgrace-gpu keeps the EGM regions as part of a list.

Note that EGM is an optional feature enabled through SBIOS. The EGM
properties are only populated in ACPI tables if the feature is enabled;
they are absent otherwise. The absence of the properties is thus not
considered fatal. The presence of improper set of values however are
considered fatal.

It is also noteworthy that there may also be multiple GPUs present per
socket and have duplicate EGM region information with them. Make sure
the duplicate data does not get added.

Suggested-by: Jason Gunthorpe <jgg@...dia.com>
Signed-off-by: Ankit Agrawal <ankita@...dia.com>
---
 MAINTAINERS                            |  5 +-
 drivers/vfio/pci/nvgrace-gpu/Makefile  |  2 +-
 drivers/vfio/pci/nvgrace-gpu/egm_dev.c | 61 ++++++++++++++++++++++
 drivers/vfio/pci/nvgrace-gpu/egm_dev.h | 17 +++++++
 drivers/vfio/pci/nvgrace-gpu/main.c    | 70 +++++++++++++++++++++++++-
 include/linux/nvgrace-egm.h            | 23 +++++++++
 6 files changed, 175 insertions(+), 3 deletions(-)
 create mode 100644 drivers/vfio/pci/nvgrace-gpu/egm_dev.c
 create mode 100644 drivers/vfio/pci/nvgrace-gpu/egm_dev.h
 create mode 100644 include/linux/nvgrace-egm.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 6dcfbd11efef..dd7df834b70b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -26471,7 +26471,10 @@ VFIO NVIDIA GRACE GPU DRIVER
 M:	Ankit Agrawal <ankita@...dia.com>
 L:	kvm@...r.kernel.org
 S:	Supported
-F:	drivers/vfio/pci/nvgrace-gpu/
+F:	drivers/vfio/pci/nvgrace-gpu/egm_dev.c
+F:	drivers/vfio/pci/nvgrace-gpu/egm_dev.h
+F:	drivers/vfio/pci/nvgrace-gpu/main.c
+F:	include/linux/nvgrace-egm.h
 
 VFIO PCI DEVICE SPECIFIC DRIVERS
 R:	Jason Gunthorpe <jgg@...dia.com>
diff --git a/drivers/vfio/pci/nvgrace-gpu/Makefile b/drivers/vfio/pci/nvgrace-gpu/Makefile
index 3ca8c187897a..e72cc6739ef8 100644
--- a/drivers/vfio/pci/nvgrace-gpu/Makefile
+++ b/drivers/vfio/pci/nvgrace-gpu/Makefile
@@ -1,3 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_NVGRACE_GPU_VFIO_PCI) += nvgrace-gpu-vfio-pci.o
-nvgrace-gpu-vfio-pci-y := main.o
+nvgrace-gpu-vfio-pci-y := main.o egm_dev.o
diff --git a/drivers/vfio/pci/nvgrace-gpu/egm_dev.c b/drivers/vfio/pci/nvgrace-gpu/egm_dev.c
new file mode 100644
index 000000000000..f4e27dadf1ef
--- /dev/null
+++ b/drivers/vfio/pci/nvgrace-gpu/egm_dev.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#include <linux/vfio_pci_core.h>
+#include "egm_dev.h"
+
+/*
+ * Determine if the EGM feature is enabled. If disabled, there
+ * will be no EGM properties populated in the ACPI tables and this
+ * fetch would fail.
+ */
+int nvgrace_gpu_has_egm_property(struct pci_dev *pdev, u64 *pegmpxm)
+{
+	return device_property_read_u64(&pdev->dev, "nvidia,egm-pxm",
+					pegmpxm);
+}
+
+static void nvgrace_gpu_release_aux_device(struct device *device)
+{
+	struct auxiliary_device *aux_dev = container_of(device, struct auxiliary_device, dev);
+	struct nvgrace_egm_dev *egm_dev = container_of(aux_dev, struct nvgrace_egm_dev, aux_dev);
+
+	kvfree(egm_dev);
+}
+
+struct nvgrace_egm_dev *
+nvgrace_gpu_create_aux_device(struct pci_dev *pdev, const char *name,
+			      u64 egmpxm)
+{
+	struct nvgrace_egm_dev *egm_dev;
+	int ret;
+
+	egm_dev = kvzalloc(sizeof(*egm_dev), GFP_KERNEL);
+	if (!egm_dev)
+		goto create_err;
+
+	egm_dev->egmpxm = egmpxm;
+	egm_dev->aux_dev.id = egmpxm;
+	egm_dev->aux_dev.name = name;
+	egm_dev->aux_dev.dev.release = nvgrace_gpu_release_aux_device;
+	egm_dev->aux_dev.dev.parent = &pdev->dev;
+
+	ret = auxiliary_device_init(&egm_dev->aux_dev);
+	if (ret)
+		goto free_dev;
+
+	ret = auxiliary_device_add(&egm_dev->aux_dev);
+	if (ret) {
+		auxiliary_device_uninit(&egm_dev->aux_dev);
+		goto create_err;
+	}
+
+	return egm_dev;
+
+free_dev:
+	kvfree(egm_dev);
+create_err:
+	return NULL;
+}
diff --git a/drivers/vfio/pci/nvgrace-gpu/egm_dev.h b/drivers/vfio/pci/nvgrace-gpu/egm_dev.h
new file mode 100644
index 000000000000..c00f5288f4e7
--- /dev/null
+++ b/drivers/vfio/pci/nvgrace-gpu/egm_dev.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#ifndef EGM_DEV_H
+#define EGM_DEV_H
+
+#include <linux/nvgrace-egm.h>
+
+int nvgrace_gpu_has_egm_property(struct pci_dev *pdev, u64 *pegmpxm);
+
+struct nvgrace_egm_dev *
+nvgrace_gpu_create_aux_device(struct pci_dev *pdev, const char *name,
+			      u64 egmphys);
+
+#endif /* EGM_DEV_H */
diff --git a/drivers/vfio/pci/nvgrace-gpu/main.c b/drivers/vfio/pci/nvgrace-gpu/main.c
index 72e7ac1fa309..2cf851492990 100644
--- a/drivers/vfio/pci/nvgrace-gpu/main.c
+++ b/drivers/vfio/pci/nvgrace-gpu/main.c
@@ -7,6 +7,8 @@
 #include <linux/vfio_pci_core.h>
 #include <linux/delay.h>
 #include <linux/jiffies.h>
+#include <linux/nvgrace-egm.h>
+#include "egm_dev.h"
 
 /*
  * The device memory usable to the workloads running in the VM is cached
@@ -60,6 +62,63 @@ struct nvgrace_gpu_pci_core_device {
 	bool has_mig_hw_bug;
 };
 
+static struct list_head egm_dev_list;
+
+static int nvgrace_gpu_create_egm_aux_device(struct pci_dev *pdev)
+{
+	struct nvgrace_egm_dev_entry *egm_entry;
+	u64 egmpxm;
+	int ret = 0;
+
+	/*
+	 * EGM is an optional feature enabled in SBIOS. If disabled, there
+	 * will be no EGM properties populated in the ACPI tables and this
+	 * fetch would fail. Treat this failure as non-fatal and return
+	 * early.
+	 */
+	if (nvgrace_gpu_has_egm_property(pdev, &egmpxm))
+		goto exit;
+
+	egm_entry = kvzalloc(sizeof(*egm_entry), GFP_KERNEL);
+	if (!egm_entry)
+		return -ENOMEM;
+
+	egm_entry->egm_dev =
+		nvgrace_gpu_create_aux_device(pdev, NVGRACE_EGM_DEV_NAME,
+					      egmpxm);
+	if (!egm_entry->egm_dev) {
+		kvfree(egm_entry);
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	list_add_tail(&egm_entry->list, &egm_dev_list);
+
+exit:
+	return ret;
+}
+
+static void nvgrace_gpu_destroy_egm_aux_device(struct pci_dev *pdev)
+{
+	struct nvgrace_egm_dev_entry *egm_entry, *temp_egm_entry;
+	u64 egmpxm;
+
+	if (nvgrace_gpu_has_egm_property(pdev, &egmpxm))
+		return;
+
+	list_for_each_entry_safe(egm_entry, temp_egm_entry, &egm_dev_list, list) {
+		/*
+		 * Free the EGM region corresponding to the input GPU
+		 * device.
+		 */
+		if (egm_entry->egm_dev->egmpxm == egmpxm) {
+			auxiliary_device_destroy(&egm_entry->egm_dev->aux_dev);
+			list_del(&egm_entry->list);
+			kvfree(egm_entry);
+		}
+	}
+}
+
 static void nvgrace_gpu_init_fake_bar_emu_regs(struct vfio_device *core_vdev)
 {
 	struct nvgrace_gpu_pci_core_device *nvdev =
@@ -965,14 +1024,20 @@ static int nvgrace_gpu_probe(struct pci_dev *pdev,
 						    memphys, memlength);
 		if (ret)
 			goto out_put_vdev;
+
+		ret = nvgrace_gpu_create_egm_aux_device(pdev);
+		if (ret)
+			goto out_put_vdev;
 	}
 
 	ret = vfio_pci_core_register_device(&nvdev->core_device);
 	if (ret)
-		goto out_put_vdev;
+		goto out_reg;
 
 	return ret;
 
+out_reg:
+	nvgrace_gpu_destroy_egm_aux_device(pdev);
 out_put_vdev:
 	vfio_put_device(&nvdev->core_device.vdev);
 	return ret;
@@ -982,6 +1047,7 @@ static void nvgrace_gpu_remove(struct pci_dev *pdev)
 {
 	struct vfio_pci_core_device *core_device = dev_get_drvdata(&pdev->dev);
 
+	nvgrace_gpu_destroy_egm_aux_device(pdev);
 	vfio_pci_core_unregister_device(core_device);
 	vfio_put_device(&core_device->vdev);
 }
@@ -1011,6 +1077,8 @@ static struct pci_driver nvgrace_gpu_vfio_pci_driver = {
 
 static int __init nvgrace_gpu_vfio_pci_init(void)
 {
+	INIT_LIST_HEAD(&egm_dev_list);
+
 	return pci_register_driver(&nvgrace_gpu_vfio_pci_driver);
 }
 module_init(nvgrace_gpu_vfio_pci_init);
diff --git a/include/linux/nvgrace-egm.h b/include/linux/nvgrace-egm.h
new file mode 100644
index 000000000000..9575d4ad4338
--- /dev/null
+++ b/include/linux/nvgrace-egm.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#ifndef NVGRACE_EGM_H
+#define NVGRACE_EGM_H
+
+#include <linux/auxiliary_bus.h>
+
+#define NVGRACE_EGM_DEV_NAME "egm"
+
+struct nvgrace_egm_dev {
+	struct auxiliary_device aux_dev;
+	u64 egmpxm;
+};
+
+struct nvgrace_egm_dev_entry {
+	struct list_head list;
+	struct nvgrace_egm_dev *egm_dev;
+};
+
+#endif /* NVGRACE_EGM_H */
-- 
2.34.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ