lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Sat,  3 Feb 2024 17:11:55 +0800
From: Zhao Liu <zhao1.liu@...ux.intel.com>
To: Paolo Bonzini <pbonzini@...hat.com>,
	Sean Christopherson <seanjc@...gle.com>,
	"Rafael J . Wysocki" <rafael@...nel.org>,
	Daniel Lezcano <daniel.lezcano@...aro.org>,
	Thomas Gleixner <tglx@...utronix.de>,
	Ingo Molnar <mingo@...hat.com>,
	Borislav Petkov <bp@...en8.de>,
	Dave Hansen <dave.hansen@...ux.intel.com>,
	"H . Peter Anvin" <hpa@...or.com>,
	kvm@...r.kernel.org,
	linux-pm@...r.kernel.org,
	linux-kernel@...r.kernel.org,
	x86@...nel.org
Cc: Ricardo Neri <ricardo.neri-calderon@...ux.intel.com>,
	Len Brown <len.brown@...el.com>,
	Zhang Rui <rui.zhang@...el.com>,
	Zhenyu Wang <zhenyu.z.wang@...el.com>,
	Zhuocheng Ding <zhuocheng.ding@...el.com>,
	Dapeng Mi <dapeng1.mi@...el.com>,
	Yanting Jiang <yanting.jiang@...el.com>,
	Yongwei Ma <yongwei.ma@...el.com>,
	Vineeth Pillai <vineeth@...byteword.org>,
	Suleiman Souhlal <suleiman@...gle.com>,
	Masami Hiramatsu <mhiramat@...gle.com>,
	David Dai <davidai@...gle.com>,
	Saravana Kannan <saravanak@...gle.com>,
	Zhao Liu <zhao1.liu@...el.com>
Subject: [RFC 07/26] KVM: VMX: Emulate ACPI (CPUID.0x01.edx[bit 22]) feature

From: Zhuocheng Ding <zhuocheng.ding@...el.com>

The ACPI (Thermal Monitor and Software Controlled Clock Facilities)
feature is a dependency of thermal interrupt processing so that
it is required for the HFI notification (a thermal interrupt)
handling.

To support VM to handle thermal interrupt, we need to emulate ACPI
feature in KVM:

1. Emulate MSR_IA32_THERM_CONTROL (alias, IA32_CLOCK_MODULATION),
MSR_IA32_THERM_INTERRUPT and MSR_IA32_THERM_STATUS with dummy values.

According to SDM [1], the ACPI feature means:

"The ACPI flag (bit 22) of the CPUID feature flags indicates the
presence of the IA32_THERM_STATUS, IA32_THERM_INTERRUPT,
IA32_CLOCK_MODULATION MSRs, and the xAPIC thermal LVT entry."

It is enough to use dummy values in KVM to emulate the RDMSR/WRMSR on
them.

2. Add the thermal interrupt injection interfaces.

This interface reflects the integrity of the ACPI emulation. Although
thermal interrupts are not actually injected into the Guest now, in the
following HFI/ITD emulations, thermal interrupt will be injected into
Guest once the conditions are met.

3. Additionally, expose the CPUID bit of the ACPI feature to the VM,
which can help enable thermal interrupt handling in the VM.

[1]: SDM, vol. 3B, section 15.8.4.1, Detection of Software Controlled
Clock Modulation Extension.

Tested-by: Yanting Jiang <yanting.jiang@...el.com>
Signed-off-by: Zhuocheng Ding <zhuocheng.ding@...el.com>
Co-developed-by: Zhao Liu <zhao1.liu@...el.com>
Signed-off-by: Zhao Liu <zhao1.liu@...el.com>
---
 arch/x86/kvm/cpuid.c   |  2 +-
 arch/x86/kvm/irq.h     |  1 +
 arch/x86/kvm/lapic.c   |  9 ++++
 arch/x86/kvm/svm/svm.c |  3 ++
 arch/x86/kvm/vmx/vmx.c | 94 ++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/vmx/vmx.h |  3 ++
 arch/x86/kvm/x86.c     |  3 ++
 7 files changed, 114 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index adba49afb5fe..1ad547651022 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -623,7 +623,7 @@ void kvm_set_cpu_caps(void)
 		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
 		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
 		F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLUSH) |
-		0 /* Reserved, DS, ACPI */ | F(MMX) |
+		0 /* Reserved, DS */ | F(ACPI) | F(MMX) |
 		F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
 		0 /* HTT, TM, Reserved, PBE */
 	);
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index c2d7cfe82d00..e11c1fb6e1e6 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -99,6 +99,7 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
+void kvm_apic_therm_deliver(struct kvm_vcpu *vcpu);
 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
 void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu);
 void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 3242f3da2457..af8572798976 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2783,6 +2783,15 @@ void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
 		kvm_apic_local_deliver(apic, APIC_LVT0);
 }
 
+void kvm_apic_therm_deliver(struct kvm_vcpu *vcpu)
+{
+	struct kvm_lapic *apic = vcpu->arch.apic;
+
+	if (apic)
+		kvm_apic_local_deliver(apic, APIC_LVTTHMR);
+}
+EXPORT_SYMBOL_GPL(kvm_apic_therm_deliver);
+
 static const struct kvm_io_device_ops apic_mmio_ops = {
 	.read     = apic_mmio_read,
 	.write    = apic_mmio_write,
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index e90b429c84f1..2e22d5e86768 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4288,6 +4288,9 @@ static bool svm_has_emulated_msr(struct kvm *kvm, u32 index)
 	switch (index) {
 	case MSR_IA32_MCG_EXT_CTL:
 	case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
+	case MSR_IA32_THERM_CONTROL:
+	case MSR_IA32_THERM_INTERRUPT:
+	case MSR_IA32_THERM_STATUS:
 		return false;
 	case MSR_IA32_SMBASE:
 		if (!IS_ENABLED(CONFIG_KVM_SMM))
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 8f5981635fe5..aa37b55cf045 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -157,6 +157,32 @@ module_param(allow_smaller_maxphyaddr, bool, S_IRUGO);
 	RTIT_STATUS_ERROR | RTIT_STATUS_STOPPED | \
 	RTIT_STATUS_BYTECNT))
 
+/*
+ * TM2 (CPUID.01H:ECX[8]), DTHERM (CPUID.06H:EAX[0]), PLN (CPUID.06H:EAX[4]),
+ * and HWP (CPUID.06H:EAX[7]) are not emulated in kvm.
+ */
+#define MSR_IA32_THERM_STATUS_RO_MASK (THERM_STATUS_PROCHOT | \
+	THERM_STATUS_PROCHOT_FORCEPR_EVENT | THERM_STATUS_CRITICAL_TEMP)
+#define MSR_IA32_THERM_STATUS_RWC0_MASK (THERM_STATUS_PROCHOT_LOG | \
+	THERM_STATUS_PROCHOT_FORCEPR_LOG | THERM_STATUS_CRITICAL_TEMP_LOG)
+/* MSR_IA32_THERM_STATUS unavailable bits mask: unsupported and reserved bits. */
+#define MSR_IA32_THERM_STATUS_UNAVAIL_MASK (~(MSR_IA32_THERM_STATUS_RO_MASK | \
+	MSR_IA32_THERM_STATUS_RWC0_MASK))
+
+/* ECMD (CPUID.06H:EAX[5]) is not emulated in kvm. */
+#define MSR_IA32_THERM_CONTROL_AVAIL_MASK (THERM_ON_DEM_CLO_MOD_ENABLE | \
+	THERM_ON_DEM_CLO_MOD_DUTY_CYC_MASK)
+
+/*
+ * MSR_IA32_THERM_INTERRUPT available bits mask.
+ * PLN (CPUID.06H:EAX[4]) and HFN (CPUID.06H:EAX[24]) are not emulated in kvm.
+ */
+#define MSR_IA32_THERM_INTERRUPT_AVAIL_MASK (THERM_INT_HIGH_ENABLE | \
+	THERM_INT_LOW_ENABLE | THERM_INT_PROCHOT_ENABLE | \
+	THERM_INT_FORCEPR_ENABLE | THERM_INT_CRITICAL_TEM_ENABLE | \
+	THERM_MASK_THRESHOLD0 | THERM_INT_THRESHOLD0_ENABLE | \
+	THERM_MASK_THRESHOLD1 | THERM_INT_THRESHOLD1_ENABLE)
+
 /*
  * List of MSRs that can be directly passed to the guest.
  * In addition to these x2apic and PT MSRs are handled specially.
@@ -1470,6 +1496,19 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
 	}
 }
 
+static void vmx_inject_therm_interrupt(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * From SDM, the ACPI flag also indicates the presence of the
+	 * xAPIC thermal LVT entry.
+	 */
+	if (!guest_cpuid_has(vcpu, X86_FEATURE_ACPI))
+		return;
+
+	if (irqchip_in_kernel(vcpu->kvm))
+		kvm_apic_therm_deliver(vcpu);
+}
+
 /*
  * Switches to specified vcpu, until a matching vcpu_put(), but assumes
  * vcpu mutex is already taken.
@@ -2109,6 +2148,24 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_IA32_DEBUGCTLMSR:
 		msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
 		break;
+	case MSR_IA32_THERM_CONTROL:
+		if (!msr_info->host_initiated &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_ACPI))
+			return 1;
+		msr_info->data = vmx->msr_ia32_therm_control;
+		break;
+	case MSR_IA32_THERM_INTERRUPT:
+		if (!msr_info->host_initiated &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_ACPI))
+			return 1;
+		msr_info->data = vmx->msr_ia32_therm_interrupt;
+		break;
+	case MSR_IA32_THERM_STATUS:
+		if (!msr_info->host_initiated &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_ACPI))
+			return 1;
+		msr_info->data = vmx->msr_ia32_therm_status;
+		break;
 	default:
 	find_uret_msr:
 		msr = vmx_find_uret_msr(vmx, msr_info->index);
@@ -2452,6 +2509,40 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		}
 		ret = kvm_set_msr_common(vcpu, msr_info);
 		break;
+	case MSR_IA32_THERM_CONTROL:
+		if (!msr_info->host_initiated &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_ACPI))
+			return 1;
+		if (!msr_info->host_initiated &&
+		    data & ~MSR_IA32_THERM_CONTROL_AVAIL_MASK)
+			return 1;
+		vmx->msr_ia32_therm_control = data;
+		break;
+	case MSR_IA32_THERM_INTERRUPT:
+		if (!msr_info->host_initiated &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_ACPI))
+			return 1;
+		if (!msr_info->host_initiated &&
+		    data & ~MSR_IA32_THERM_INTERRUPT_AVAIL_MASK)
+			return 1;
+		vmx->msr_ia32_therm_interrupt = data;
+		break;
+	case MSR_IA32_THERM_STATUS:
+		if (!msr_info->host_initiated &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_ACPI))
+			return 1;
+		/* Unsupported and reserved bits: generate the exception. */
+		if (!msr_info->host_initiated &&
+		    data & MSR_IA32_THERM_STATUS_UNAVAIL_MASK)
+			return 1;
+		if (!msr_info->host_initiated) {
+			data = vmx_set_msr_rwc0_bits(data, vmx->msr_ia32_therm_status,
+						     MSR_IA32_THERM_STATUS_RWC0_MASK);
+			data = vmx_set_msr_ro_bits(data, vmx->msr_ia32_therm_status,
+						   MSR_IA32_THERM_STATUS_RO_MASK);
+		}
+		vmx->msr_ia32_therm_status = data;
+		break;
 
 	default:
 	find_uret_msr:
@@ -4870,6 +4961,9 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	vmx->spec_ctrl = 0;
 
 	vmx->msr_ia32_umwait_control = 0;
+	vmx->msr_ia32_therm_control = 0;
+	vmx->msr_ia32_therm_interrupt = 0;
+	vmx->msr_ia32_therm_status = 0;
 
 	vmx->hv_deadline_tsc = -1;
 	kvm_set_cr8(vcpu, 0);
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index e3b0985bb74a..e159dd5b7a66 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -282,6 +282,9 @@ struct vcpu_vmx {
 
 	u64		      spec_ctrl;
 	u32		      msr_ia32_umwait_control;
+	u64		      msr_ia32_therm_control;
+	u64		      msr_ia32_therm_interrupt;
+	u64		      msr_ia32_therm_status;
 
 	/*
 	 * loaded_vmcs points to the VMCS currently used in this vcpu. For a
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cd9a7251c768..50aceb0ce4ee 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1545,6 +1545,9 @@ static const u32 emulated_msrs_all[] = {
 	MSR_AMD64_TSC_RATIO,
 	MSR_IA32_POWER_CTL,
 	MSR_IA32_UCODE_REV,
+	MSR_IA32_THERM_CONTROL,
+	MSR_IA32_THERM_INTERRUPT,
+	MSR_IA32_THERM_STATUS,
 
 	/*
 	 * KVM always supports the "true" VMX control MSRs, even if the host
-- 
2.34.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ