lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220518063032.2377351-7-tarumizu.kohei@fujitsu.com>
Date:   Wed, 18 May 2022 15:30:30 +0900
From:   Kohei Tarumizu <tarumizu.kohei@...itsu.com>
To:     catalin.marinas@....com, will@...nel.org, tglx@...utronix.de,
        mingo@...hat.com, bp@...en8.de, dave.hansen@...ux.intel.com,
        x86@...nel.org, hpa@...or.com, gregkh@...uxfoundation.org,
        rafael@...nel.org, mchehab+huawei@...nel.org, eugenis@...gle.com,
        tony.luck@...el.com, pcc@...gle.com, peterz@...radead.org,
        marcos@...a.pet, conor.dooley@...rochip.com,
        nicolas.ferre@...rochip.com, marcan@...can.st,
        linus.walleij@...aro.org, arnd@...db.de,
        linux-kernel@...r.kernel.org, linux-arm-kernel@...ts.infradead.org
Cc:     tarumizu.kohei@...itsu.com
Subject: [PATCH v4 6/8] x86: Add hardware prefetch control support for x86

Adds module init/exit code to create sysfs attributes for x86 with
"hardware_prefetcher_enable", "ip_prefetcher_enable" and
"adjacent_cache_line_prefetcher_enable".

This driver works only if a CPU model is mapped to type of register
specification(e.g. TYPE_L12_BASE) in pfctl_match[].

The details of the registers(MSR_MISC_FEATURE_CONTROL) to be read and
written in this patch are described below:

"https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sdm.html"
    Volume 4

Signed-off-by: Kohei Tarumizu <tarumizu.kohei@...itsu.com>
---
 arch/x86/kernel/cpu/x86-pfctl.c | 258 ++++++++++++++++++++++++++++++++
 1 file changed, 258 insertions(+)
 create mode 100644 arch/x86/kernel/cpu/x86-pfctl.c

diff --git a/arch/x86/kernel/cpu/x86-pfctl.c b/arch/x86/kernel/cpu/x86-pfctl.c
new file mode 100644
index 000000000000..0b3b22128dff
--- /dev/null
+++ b/arch/x86/kernel/cpu/x86-pfctl.c
@@ -0,0 +1,258 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2022 FUJITSU LIMITED
+ *
+ * x86 Hardware Prefetch Control support
+ */
+
+#include <linux/cacheinfo.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/pfctl.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/sysfs.h>
+#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
+#include <asm/msr.h>
+
+/*
+ * MSR_MISC_FEATURE_CONTROL has three type of register specifications.
+ *
+ * The register specification of TYPE_L12_BASE is as follow:
+ * [0]    L2 Hardware Prefetcher Disable (R/W)
+ * [1]    Reserved
+ * [2]    DCU Hardware Prefetcher Disable (R/W)
+ * [63:3] Reserved
+ *
+ * The register specification of TYPE_L12_PLUS is as follow:
+ * [0]    L2 Hardware Prefetcher Disable (R/W)
+ * [1]    L2 Adjacent Cache Line Prefetcher Disable (R/W)
+ * [2]    DCU Hardware Prefetcher Disable (R/W)
+ * [3]    DCU IP Prefetcher Disable (R/W)
+ * [63:4] Reserved
+ *
+ * The register specification of TYPE_L12_XPHI is as follow:
+ * [0]    L2 Hardware Prefetcher Disable (R/W)
+ * [1]    DCU Hardware Prefetcher Disable (R/W)
+ * [63:2] Reserved
+ *
+ * See "Intel 64 and IA-32 Architectures Software Developer's Manual"
+ * (https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sdm.html)
+ * for register specification details.
+ */
+enum {
+	TYPE_L12_BASE,
+	TYPE_L12_PLUS,
+	TYPE_L12_XPHI,
+};
+
+struct x86_pfctl_attr {
+	struct device_attribute		attr;
+	u64				mask;
+};
+
+/*
+ * Returns the cpu number of the cpu_device(/sys/devices/system/cpu/cpuX)
+ * in the ancestor directory of prefetch_control.
+ *
+ * When initializing this driver, it is verified that the cache directory exists
+ * under cpuX device. Therefore, the third level up from prefetch_control is
+ * cpuX device as shown below.
+ *
+ * /sys/devices/system/cpu/cpuX/cache/indexX/prefetch_control
+ */
+static inline unsigned int pfctl_dev_get_cpu(struct device *pfctl_dev)
+{
+	return pfctl_dev->parent->parent->parent->id;
+}
+
+static ssize_t
+pfctl_show(struct device *pfctl_dev, struct device_attribute *attr, char *buf)
+{
+	unsigned int cpu = pfctl_dev_get_cpu(pfctl_dev);
+	struct x86_pfctl_attr *xa;
+	u64 val;
+
+	xa = container_of(attr, struct x86_pfctl_attr, attr);
+
+	rdmsrl_on_cpu(cpu, MSR_MISC_FEATURE_CONTROL, &val);
+	return sysfs_emit(buf, "%d\n", val & xa->mask ? 0 : 1);
+}
+
+struct write_info {
+	u64 mask;
+	bool enable;
+};
+
+/*
+ * wrmsrl() in this patch is only done inside of an interrupt-disabled region
+ * to avoid a conflict of write access from other drivers,
+ */
+static void pfctl_write(void *info)
+{
+	struct write_info *winfo = info;
+	u64 reg;
+
+	reg = 0;
+	rdmsrl(MSR_MISC_FEATURE_CONTROL, reg);
+
+	if (winfo->enable)
+		reg &= ~winfo->mask;
+	else
+		reg |= winfo->mask;
+
+	wrmsrl(MSR_MISC_FEATURE_CONTROL, reg);
+}
+
+/*
+ * MSR_MISC_FEATURE_CONTROL has "core" scope, so define the lock to avoid a
+ * conflict of write access from different logical processors in the same core.
+ */
+static DEFINE_MUTEX(pfctl_mutex);
+
+static ssize_t
+pfctl_store(struct device *pfctl_dev, struct device_attribute *attr,
+	    const char *buf, size_t size)
+{
+	unsigned int cpu = pfctl_dev_get_cpu(pfctl_dev);
+	struct x86_pfctl_attr *xa;
+	struct write_info info;
+
+	xa = container_of(attr, struct x86_pfctl_attr, attr);
+	info.mask = xa->mask;
+
+	if (strtobool(buf, &info.enable) < 0)
+		return -EINVAL;
+
+	mutex_lock(&pfctl_mutex);
+	smp_call_function_single(cpu, pfctl_write, &info, true);
+	mutex_unlock(&pfctl_mutex);
+
+	return size;
+}
+
+#define PFCTL_ATTR(_name, _level, _bit)					\
+	struct x86_pfctl_attr attr_l##_level##_##_name = {		\
+		.attr = __ATTR(_name, 0600, pfctl_show, pfctl_store),	\
+		.mask = BIT_ULL(_bit), }
+
+static PFCTL_ATTR(hardware_prefetcher_enable,			1, 2);
+static PFCTL_ATTR(hardware_prefetcher_enable,			2, 0);
+static PFCTL_ATTR(ip_prefetcher_enable,				1, 3);
+static PFCTL_ATTR(adjacent_cache_line_prefetcher_enable,	2, 1);
+
+static struct device_attribute *l1_attrs[] __ro_after_init = {
+	&attr_l1_hardware_prefetcher_enable.attr,
+	&attr_l1_ip_prefetcher_enable.attr,
+	NULL,
+};
+
+static struct device_attribute *l2_attrs[] __ro_after_init = {
+	&attr_l2_hardware_prefetcher_enable.attr,
+	&attr_l2_adjacent_cache_line_prefetcher_enable.attr,
+	NULL,
+};
+
+static const struct pfctl_group x86_pfctl_groups[] = {
+	{
+		.level = 1,
+		.type = CACHE_TYPE_DATA,
+		.attrs = l1_attrs,
+	},
+	{
+		.level = 2,
+		.type = CACHE_TYPE_UNIFIED,
+		.attrs = l2_attrs,
+	},
+	{
+		.attrs = NULL,
+	},
+};
+
+/*
+ * Only BROADWELL_X has been tested in the actual machine at this point. Other
+ * models were defined based on the information in the "Intel 64 and IA-32
+ * Architectures Software Developer's Manual"
+ */
+static const struct x86_cpu_id pfctl_match[] __initconst = {
+	X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D,	TYPE_L12_BASE),
+	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT,	TYPE_L12_BASE),
+	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS,	TYPE_L12_BASE),
+	X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,	TYPE_L12_BASE),
+	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X,	TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(HASWELL,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L,	TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE,		TYPE_L12_PLUS),
+	X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,	TYPE_L12_XPHI),
+	X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM,	TYPE_L12_XPHI),
+	{},
+};
+
+static int __init x86_pfctl_init(void)
+{
+	const struct x86_cpu_id *m;
+
+	if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
+		return -ENODEV;
+
+	m = x86_match_cpu(pfctl_match);
+	if (!m)
+		return -ENODEV;
+
+	switch (m->driver_data) {
+	case TYPE_L12_BASE:
+		l1_attrs[1] = NULL;
+		l2_attrs[1] = NULL;
+		break;
+	case TYPE_L12_PLUS:
+		break;
+	case TYPE_L12_XPHI:
+		attr_l1_hardware_prefetcher_enable.mask = BIT_ULL(1);
+		l1_attrs[1] = NULL;
+		l2_attrs[1] = NULL;
+		break;
+	default:
+		return -ENODEV;
+	};
+
+	return pfctl_register_attrs(x86_pfctl_groups);
+}
+
+static void __exit x86_pfctl_exit(void)
+{
+	return pfctl_unregister_attrs(x86_pfctl_groups);
+}
+
+late_initcall(x86_pfctl_init);
+module_exit(x86_pfctl_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("FUJITSU LIMITED");
+MODULE_DESCRIPTION("x86 Hardware Prefetch Control Driver");
-- 
2.27.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ