lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250415154656.1698522-2-scott@os.amperecomputing.com>
Date: Tue, 15 Apr 2025 08:46:56 -0700
From: D Scott Phillips <scott@...amperecomputing.com>
To: Catalin Marinas <catalin.marinas@....com>,
	Joey Gouly <joey.gouly@....com>,
	Marc Zyngier <maz@...nel.org>,
	Oliver Upton <oliver.upton@...ux.dev>,
	Suzuki K Poulose <suzuki.poulose@....com>,
	Will Deacon <will@...nel.org>,
	Zenghui Yu <yuzenghui@...wei.com>,
	kvmarm@...ts.linux.dev,
	linux-arm-kernel@...ts.infradead.org,
	linux-kernel@...r.kernel.org
Subject: [PATCH 2/2] KVM: arm64: Avoid blocking irqs when tlb flushing/ATing with HCR.TGE=0

pNMIs are intended to be deliverable during operations like guest
tlb flushing or nested AT, however the setting of HCR_EL2 controls
are accidentally blocking async exceptions.

You can see this by doing:

     # perf record -e cycles:Hk -g ./dirty_log_perf_test -m 3 \
       -i 4 -s anonymous -b 4G -v 32

Where no samples will be collected in __kvm_tlb_flush_vmid_ipa_nsh()
between enter_vmid_context() and exit_vmid_context() then many
samples are collected right after the write to HCR_EL2 in
exit_vmid_context(), where pNMIs actually get unmasked.

Set HCR_EL2.IMO so that pNMIs are not blocked during guest tlb
flushing or nested AT.

Signed-off-by: D Scott Phillips <scott@...amperecomputing.com>
---
 arch/arm64/include/asm/hardirq.h |  3 ++-
 arch/arm64/kvm/at.c              |  4 +++-
 arch/arm64/kvm/hyp/vhe/tlb.c     | 10 ++++++++++
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h
index cbfa7b6f2e098..6eb3f93851023 100644
--- a/arch/arm64/include/asm/hardirq.h
+++ b/arch/arm64/include/asm/hardirq.h
@@ -41,7 +41,8 @@ do {									\
 									\
 	___hcr = read_sysreg(hcr_el2);					\
 	if (!(___hcr & HCR_TGE)) {					\
-		write_sysreg(___hcr | HCR_TGE, hcr_el2);		\
+		write_sysreg((___hcr & ~(HCR_AMO | HCR_IMO | HCR_FMO)) |\
+			     HCR_TGE, hcr_el2);				\
 		isb();							\
 	}								\
 	/*								\
diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c
index ff4b06ce661af..f31f0d78c5813 100644
--- a/arch/arm64/kvm/at.c
+++ b/arch/arm64/kvm/at.c
@@ -1269,7 +1269,9 @@ static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
 
 skip_mmu_switch:
 	/* Clear TGE, enable S2 translation, we're rolling */
-	write_sysreg((config.hcr & ~HCR_TGE) | HCR_VM,	hcr_el2);
+	write_sysreg((config.hcr & ~HCR_TGE) |
+		     HCR_AMO | HCR_IMO | HCR_FMO | HCR_VM,
+		     hcr_el2);
 	isb();
 
 	switch (op) {
diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c
index 3d50a1bd2bdbc..ecb700bab3b8f 100644
--- a/arch/arm64/kvm/hyp/vhe/tlb.c
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -55,6 +55,15 @@ static void enter_vmid_context(struct kvm_s2_mmu *mmu,
 	 * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so
 	 * let's flip TGE before executing the TLB operation.
 	 *
+	 * One other fun complication to consider is the target EL for
+	 * asynchronous exceptions. We want to allow NMIs during tlb flushing,
+	 * so we need to ensure that the target EL for IRQs remains as EL2.
+	 * HCR_EL2.{E2H,TGE,IMO} = {1,0,0} would set the target EL for IRQs as
+	 * EL1, and IRQs at EL2 would be "C" (Interrupts not taken regardless
+	 * of the value of interrupt masks). So we need to set
+	 * HCR_EL2.{E2H,TGE,IMO} = {1,0,1} so that NMIs will still be
+	 * delivered.
+	 *
 	 * ARM erratum 1165522 requires some special handling (again),
 	 * as we need to make sure both stages of translation are in
 	 * place before clearing TGE. __load_stage2() already
@@ -63,6 +72,7 @@ static void enter_vmid_context(struct kvm_s2_mmu *mmu,
 	__load_stage2(mmu, mmu->arch);
 	val = read_sysreg(hcr_el2);
 	val &= ~HCR_TGE;
+	val |= HCR_AMO | HCR_IMO | HCR_FMO;
 	write_sysreg(val, hcr_el2);
 	isb();
 }
-- 
2.48.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ