lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <e2f17f3915f072d283c1f34774b3d069e670ae36.1724748733.git.perry.yuan@amd.com>
Date: Tue, 27 Aug 2024 17:36:54 +0800
From: Perry Yuan <perry.yuan@....com>
To: <hdegoede@...hat.com>, <ilpo.jarvinen@...ux.intel.com>,
	<Mario.Limonciello@....com>, <Borislav.Petkov@....com>,
	<kprateek.nayak@....com>
CC: <Alexander.Deucher@....com>, <Xinmei.Huang@....com>,
	<bharathprabhu.perdoor@....com>, <poonam.aggrwal@....com>, <Li.Meng@....com>,
	<platform-driver-x86@...r.kernel.org>, <linux-kernel@...r.kernel.org>,
	<Xiaojian.Du@....com>
Subject: [PATCH 06/11] platform/x86/: hfi: parse CPU core ranking data from shared memory

From: Perry Yuan <Perry.Yuan@....com>

When `amd_hfi` driver is loaded, it will use PCCT subspace type 4 table
to retrieve the shared memory address which contains the CPU core ranking
table. This table includes a header that specifies the number of ranking data
entries to be parsed and rank each CPU core with the Performance and Energy
Efficiency capability as implemented by the CPU power management firmware.

Once the table has been parsed, each CPU is assigned a ranking score within
its class. Subsequently, when the scheduler selects cores, it chooses
from the ranking list based on the assigned scores in each class, thereby
ensuring the optimal selection of CPU cores according to their predefined
classifications and priorities.

Signed-off-by: Perry Yuan <Perry.Yuan@....com>
---
 drivers/platform/x86/amd/hfi/hfi.c | 267 +++++++++++++++++++++++++++++
 1 file changed, 267 insertions(+)

diff --git a/drivers/platform/x86/amd/hfi/hfi.c b/drivers/platform/x86/amd/hfi/hfi.c
index 914390682fef..50f6ca9c148a 100644
--- a/drivers/platform/x86/amd/hfi/hfi.c
+++ b/drivers/platform/x86/amd/hfi/hfi.c
@@ -19,23 +19,108 @@
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mailbox_client.h>
 #include <linux/mutex.h>
+#include <linux/percpu-defs.h>
 #include <linux/platform_device.h>
 #include <linux/printk.h>
 #include <linux/smp.h>
 #include <linux/string.h>
+#include <linux/topology.h>
+#include <linux/workqueue.h>
+
+#include <asm/cpu_device_id.h>
+
+#include <acpi/pcc.h>
+#include <acpi/cppc_acpi.h>
 
 #define AMD_HFI_DRIVER		"amd_hfi"
 #define AMD_HETERO_CPUID_27	0x80000027
+#define AMD_HETERO_RANKING_TABLE_VER	2
 static struct platform_device *device;
 
+/**
+ * struct amd_core_rank_table - HFI capabilities for the logical
+ * processors in the memory mappep table.
+ *
+ * @signature:	The PCC signature. The signature of a subspace is computed by
+ *		a bitwise of the value 0x50434300 with the subspace ID.
+ * @flags:	Notify on completion
+ * @length:	Length of payload being transmitted including command field
+ * @command:	Command being sent over the subspace
+ * @version_number:		Version number of the table
+ * @n_logical_processors:	Number of logical processors
+ * @n_capabilities:		Number of ranking dimensions (performance, efficiency, etc)
+ * @table_update_context:	Command being sent over the subspace
+ * @n_bitmaps:			Number of 32-bit bitmaps to enumerate all the APIC IDs
+ *				This is based on the maximum apic ID enumerated in the system
+ * @reserved:			The 24bit spare
+ * @bitmap_of_apic_id0:		Bit Map of enabled logical processors APIC ID for 31:0
+ * @bitmap_of_apic_id1:		Bit Map of enabled logical processors APIC ID for 64:32
+ * @n_classes:			Number of workload class
+ * @dynamic_rank_feature:	Table update mode
+ * @diagnostics:		Reserved space for diagnostics
+ * @timestamp:			Timestamp for last table update
+ * @table_size:			Table length of shared memory
+ * @shmem_info:			The table data read from shared memory
+ * @bitmap_data:		Bitmap data read from table
+ * @max_index:			The max data array in the table
+ *
+ * A memory mapped table is used to express the capabilities of each logical
+ * processor for each thread classification. with dynamic table update feature
+ * supported, the table will be notified to update for all the cores while system
+ * running, each table update can reorder the cores for much better performance and
+ * power efficiency.
+ *
+ */
+struct amd_hfi_metadata {
+	u32	signature;
+	u32	flags:1;
+	u32	length;
+	u32	command;
+	u8	version_number;
+	u8	n_logical_processors;
+	u8	n_capabilities;
+	u8	table_update_context;
+	u8	n_bitmaps;
+	u32	reserved:24;
+	u32	bitmap_of_apic_id0;
+	u32	bitmap_of_apic_id1;
+	u8	n_classes;
+	bool	dynamic_rank_feature;
+	int	diagnostics;
+	u64	timestamp;
+	u64	table_size;
+	u32	shmem_info[64];
+	u32	bitmap_data;
+	u32	max_index;
+};
+
 struct amd_hfi_data {
 	const char	*name;
 	struct device	*dev;
 	struct mutex	lock;
 	acpi_handle	dhandle;
+
+	/* PCCT table related*/
+	int		plat_irq;
+	struct pcc_mbox_chan	*pcc_chan;
+	void __iomem		*pcc_comm_addr;
+	struct completion	done;
+	struct mbox_client	cl;
+	raw_spinlock_t		table_lock;
+	struct acpi_subtable_header	*pcct_entry;
+	struct amd_hfi_metadata		*hfi_meta;
 };
 
+/**
+ * struct amd_hfi_classes - HFI class capabilities per CPU
+ * @perf:		Performance capability
+ * @eff:		Power efficiency capability
+ *
+ * Capabilities of a logical processor in the rank table. These capabilities are
+ * unitless and specific to each HFI class.
+ */
 struct amd_hfi_classes {
 	u32	perf;
 	u32	eff;
@@ -61,6 +146,118 @@ struct amd_hfi_cpuinfo {
 
 static DEFINE_PER_CPU(struct amd_hfi_cpuinfo, amd_hfi_cpuinfo) = {.class_index = -1};
 
+static int find_cpu_index_by_apicid(unsigned int target_apicid)
+{
+	int cpu_index;
+
+	for_each_possible_cpu(cpu_index) {
+		struct cpuinfo_x86 *info = &cpu_data(cpu_index);
+
+		if (info->topo.apicid == target_apicid) {
+			pr_debug(" match apic id %d for cpu index: %d",
+						info->topo.apicid, cpu_index);
+			return cpu_index;
+		}
+	}
+
+	return -ENODEV;
+}
+
+static int amd_hfi_fill_metadata(struct amd_hfi_data *amd_hfi_data)
+{
+	struct pcc_mbox_chan *pcc_chan = amd_hfi_data->pcc_chan;
+	struct acpi_subtable_header *pcct_entry = amd_hfi_data->pcct_entry;
+	struct acpi_pcct_ext_pcc_slave *pcct_ext =
+		(struct acpi_pcct_ext_pcc_slave *)pcct_entry;
+	struct amd_hfi_metadata *meta = amd_hfi_data->hfi_meta;
+	u32 header_index = 0, data_index = 0;
+	struct amd_hfi_cpuinfo *info;
+	u32 offset, offset_begin;
+	void __iomem *pcc_comm_addr;
+	int idx, ret, length;
+	u32 *shmem_info;
+
+	length = pcct_ext->length;
+	if (length <= 0) {
+		pr_err("length is less than min table length required\n");
+		return -EINVAL;
+	}
+
+	shmem_info = devm_kmalloc_array(amd_hfi_data->dev, length, sizeof(u32), GFP_KERNEL);
+	if (!shmem_info) {
+		pr_err("failed to allocate memory %x\n", length);
+		return -ENOMEM;
+	}
+
+	pcc_chan->shmem_base_addr = pcct_ext->base_address;
+	pcc_chan->shmem_size = pcct_ext->length;
+
+	amd_hfi_data->plat_irq = pcct_ext->platform_interrupt;
+	if (amd_hfi_data->plat_irq < 0) {
+		pr_err("invalid irq allocated in pcct table\n");
+		return -EINVAL;
+	}
+
+	pcc_comm_addr = acpi_os_ioremap(pcc_chan->shmem_base_addr, pcc_chan->shmem_size);
+	if (!pcc_comm_addr) {
+		pr_err("failed to ioremap PCC common region mem\n");
+		return -ENOMEM;
+	}
+
+	raw_spin_lock(&amd_hfi_data->table_lock);
+
+	memcpy_fromio(shmem_info, (u32 __iomem *)pcc_comm_addr, length);
+
+	/* extended PCC subspace shared memory region */
+	meta->signature = shmem_info[header_index];
+	meta->flags = shmem_info[++header_index];
+	meta->length = shmem_info[++header_index];
+	meta->command = shmem_info[++header_index];
+	idx = header_index + 1;
+
+	/* shared memory region for cores ranking data */
+	meta->version_number = shmem_info[idx] & 0xFF;
+	meta->n_logical_processors = (shmem_info[idx] >> 8) & 0xFF;
+	meta->n_capabilities = (shmem_info[idx] >> 16) & 0xFF;
+	meta->table_update_context = (shmem_info[idx] >> 24) & 0xFF;
+	meta->n_bitmaps = shmem_info[++idx] & 0xFF;
+	meta->n_classes = (shmem_info[idx] >> 8) & 0xFF;
+	meta->bitmap_data = shmem_info[++idx];
+	meta->max_index = meta->n_bitmaps * 32;
+
+	if (meta->version_number == AMD_HETERO_RANKING_TABLE_VER)
+		offset_begin = idx + 1;
+
+	for (u32 bit_idx = 0; bit_idx < meta->max_index; bit_idx++) {
+		if (meta->bitmap_data & (1u << bit_idx)) {
+			int cpu_index = find_cpu_index_by_apicid(bit_idx);
+			if (cpu_index < 0) {
+				ret = -ENODEV;
+				goto err_map;
+			}
+
+			info = per_cpu_ptr(&amd_hfi_cpuinfo, cpu_index);
+
+			offset = data_index * 6 + offset_begin;
+			for (int i = 0; i < meta->n_classes; i++) {
+				info->amd_hfi_classes[i].eff = shmem_info[offset + 2 * i];
+				info->amd_hfi_classes[i].perf = shmem_info[offset + 2 * i + 1];
+			}
+		} else {
+			continue;
+		}
+		data_index++;
+	}
+	raw_spin_unlock(&amd_hfi_data->table_lock);
+	iounmap(pcc_comm_addr);
+
+	return 0;
+
+err_map:
+	raw_spin_unlock(&amd_hfi_data->table_lock);
+	return ret;
+}
+
 static int amd_hfi_alloc_class_data(struct platform_device *pdev)
 {
 	struct amd_hfi_cpuinfo *hfi_cpuinfo;
@@ -96,6 +293,56 @@ static void amd_hfi_remove(struct platform_device *pdev)
 
 	mutex_destroy(&dev->lock);
 }
+static int amd_hfi_metadata_parser(struct platform_device *pdev,
+		struct amd_hfi_data *amd_hfi_data)
+{
+	struct mbox_chan *pcc_mbox_channels;
+	struct pcc_mbox_chan *pcc_chan;
+	struct acpi_subtable_header *pcct_entry;
+	struct acpi_table_header *pcct_tbl;
+	struct device *dev = &pdev->dev;
+	acpi_status status;
+	int ret = 0, count = 1;
+
+	status = acpi_get_table(ACPI_SIG_PCCT, 0, &pcct_tbl);
+	if (ACPI_FAILURE(status) || !pcct_tbl) {
+		pr_err("acpi_get_table failed!\n");
+		return -ENODEV;
+	}
+
+	pcc_mbox_channels = devm_kcalloc(dev, count,
+			sizeof(*pcc_mbox_channels), GFP_KERNEL);
+	if (!pcc_mbox_channels) {
+		ret = -ENOMEM;
+		goto exit_err;
+	}
+
+	pcc_chan = devm_kcalloc(dev, count, sizeof(*pcc_chan), GFP_KERNEL);
+	if (!pcc_chan) {
+		ret = -ENOMEM;
+		goto exit_err;
+	}
+
+	/* get pointer to the first PCC subspace entry */
+	pcct_entry = (struct acpi_subtable_header *) (
+			(unsigned long) pcct_tbl + sizeof(struct acpi_table_pcct));
+
+	pcc_chan->mchan = &pcc_mbox_channels[0];
+
+	amd_hfi_data->pcc_chan = pcc_chan;
+	amd_hfi_data->pcct_entry = pcct_entry;
+
+	/* parse the shared memory info from the pcct table */
+	ret = amd_hfi_fill_metadata(amd_hfi_data);
+	if (ret) {
+		pr_err("failed to parse core ranking table\n");
+		ret = -ENODATA;
+	}
+
+exit_err:
+	acpi_put_table(pcct_tbl);
+	return ret;
+}
 
 static const struct acpi_device_id amd_hfi_platform_match[] = {
 	{ "AMDI0104", 0},
@@ -120,6 +367,10 @@ static int amd_hfi_probe(struct platform_device *pdev)
 	if (!amd_hfi_data)
 		return -ENOMEM;
 
+	amd_hfi_data->hfi_meta = devm_kzalloc(&pdev->dev,
+					sizeof(*amd_hfi_data->hfi_meta), GFP_KERNEL);
+	if (!amd_hfi_data->hfi_meta)
+		return -ENOMEM;
 	amd_hfi_data->dev = &pdev->dev;
 	dhandle = ACPI_HANDLE(&pdev->dev);
 	if (!dhandle) {
@@ -132,7 +383,10 @@ static int amd_hfi_probe(struct platform_device *pdev)
 		return -ENODEV;
 
 	amd_hfi_data->dhandle = dhandle;
+
+	raw_spin_lock_init(&amd_hfi_data->table_lock);
 	mutex_init(&amd_hfi_data->lock);
+
 	platform_set_drvdata(pdev, amd_hfi_data);
 
 	/* alloc data array for hardware feedback class data */
@@ -140,7 +394,20 @@ static int amd_hfi_probe(struct platform_device *pdev)
 	if (ret)
 		return -ENOMEM;
 
+	ret = amd_hfi_metadata_parser(pdev, amd_hfi_data);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to parse PCCT table data with %d.\n", ret);
+		goto err_exit;
+	}
+
+	amd_hfi_data->hfi_meta->dynamic_rank_feature =
+					cpuid_ebx(AMD_HETERO_CPUID_27) & 0xF;
+	dev_dbg(&pdev->dev, "%s driver registered.\n", pdev->name);
+
 	return 0;
+
+err_exit:
+	return ret;
 }
 
 static struct platform_driver amd_hfi_driver = {
-- 
2.34.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ