lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240301-kvm-arm64-defer-regs-v1-1-401e3de92e97@kernel.org>
Date: Fri, 01 Mar 2024 18:05:53 +0000
From: Mark Brown <broonie@...nel.org>
To: Marc Zyngier <maz@...nel.org>, Oliver Upton <oliver.upton@...ux.dev>, 
 James Morse <james.morse@....com>, 
 Suzuki K Poulose <suzuki.poulose@....com>, 
 Catalin Marinas <catalin.marinas@....com>, Will Deacon <will@...nel.org>
Cc: Joey Gouly <joey.gouly@....com>, linux-arm-kernel@...ts.infradead.org, 
 kvmarm@...ts.linux.dev, linux-kernel@...r.kernel.org, 
 Mark Brown <broonie@...nel.org>
Subject: [PATCH] KVM: arm64: Only save S1PIE registers when dirty

Currently we save the S1PIE registers every time we exit the guest but
the expected usage pattern for these registers is that they will be
written to very infrequently, likely once during initialisation and then
never updated again.  This means that most likely most of our saves of
these registers are redundant.  Let's avoid these redundant saves by
enabling fine grained write traps for the EL0 and EL1 PIE registers when
switching to the guest and only saving if a write happened.

We track if the registers have been written by storing a mask of bits
for HFGWTR_EL2, we may be able to use the same approach for other
registers with similar access patterns.  We assume that it is likely
that both registers will be written in quick succession and mark both
PIR_EL1 and PIRE0_EL1 as dirty if either is written in order to minimise
overhead.

This will have a negative performance impact if guests do start updating
these registers frequently but since the PIE indexes have a wide impact
on the page tables it seems likely that this will not be the case.

We do not need to check for FGT support since it is mandatory for
systems with PIE.

Signed-off-by: Mark Brown <broonie@...nel.org>
---
I don't have a good sense if this is a good idea or not, or if this is a
desirable implementation of the concept - the patch is based on some
concerns about the cost of the system register context switching.  We
should be able to do something similar for some of the other registers.
---
 arch/arm64/include/asm/kvm_host.h          | 37 ++++++++++++++++++++++++++++++
 arch/arm64/kvm/hyp/include/hyp/switch.h    | 36 +++++++++++++++++++++++++++++
 arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h |  6 ++++-
 3 files changed, 78 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 21c57b812569..340567e9b206 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -475,6 +475,8 @@ enum vcpu_sysreg {
 };
 
 struct kvm_cpu_context {
+	u64	reg_dirty;		/* Mask of HFGWTR_EL2 bits */
+
 	struct user_pt_regs regs;	/* sp = sp_el0 */
 
 	u64	spsr_abt;
@@ -492,6 +494,32 @@ struct kvm_cpu_context {
 	u64 *vncr_array;
 };
 
+static inline bool __ctxt_reg_dirty(const struct kvm_cpu_context *ctxt,
+				    u64 trap)
+{
+	return ctxt->reg_dirty & trap;
+}
+
+static inline bool __ctxt_reg_set_dirty(struct kvm_cpu_context *ctxt, u64 trap)
+{
+	return ctxt->reg_dirty |= trap;
+}
+
+static inline bool __ctxt_reg_set_clean(struct kvm_cpu_context *ctxt, u64 trap)
+{
+	return ctxt->reg_dirty &= ~trap;
+}
+
+#define ctxt_reg_dirty(ctxt, trap) \
+	__ctxt_reg_dirty(ctxt, HFGxTR_EL2_##trap)
+
+
+#define ctxt_reg_set_dirty(ctxt, trap) \
+	__ctxt_reg_set_dirty(ctxt, HFGxTR_EL2_##trap)
+
+#define ctxt_reg_set_clean(ctxt, trap) \
+	__ctxt_reg_set_clean(ctxt, HFGxTR_EL2_##trap)
+
 struct kvm_host_data {
 	struct kvm_cpu_context host_ctxt;
 };
@@ -1118,6 +1146,15 @@ static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt)
 {
 	/* The host's MPIDR is immutable, so let's set it up at boot time */
 	ctxt_sys_reg(cpu_ctxt, MPIDR_EL1) = read_cpuid_mpidr();
+
+	/*
+	 * Save the S1PIE setup on first use, we assume the host will
+	 * never update.
+	 */
+	if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
+		ctxt_reg_set_dirty(cpu_ctxt, nPIR_EL1);
+		ctxt_reg_set_dirty(cpu_ctxt, nPIRE0_EL1);
+	}
 }
 
 static inline bool kvm_system_needs_idmapped_vectors(void)
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index a038320cdb08..2cccc7f2b492 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -152,6 +152,12 @@ static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
 	if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
 		w_set |= HFGxTR_EL2_TCR_EL1_MASK;
 
+	/*
+	 * Trap writes to infrequently updated registers.
+	 */
+	if (cpus_have_final_cap(ARM64_HAS_S1PIE))
+		w_clr |= HFGxTR_EL2_nPIR_EL1 | HFGxTR_EL2_nPIRE0_EL1;
+
 	if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
 		compute_clr_set(vcpu, HFGRTR_EL2, r_clr, r_set);
 		compute_clr_set(vcpu, HFGWTR_EL2, w_clr, w_set);
@@ -570,6 +576,33 @@ static bool handle_ampere1_tcr(struct kvm_vcpu *vcpu)
 	return true;
 }
 
+static inline bool kvm_hyp_handle_read_mostly_sysreg(struct kvm_vcpu *vcpu)
+{
+	u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu));
+	u64 fgt_w_set;
+
+	switch (sysreg) {
+	case SYS_PIR_EL1:
+	case SYS_PIRE0_EL1:
+		/*
+		 * PIR registers are always restored, we just need to
+		 * disable write traps.  We expect EL0 and EL1
+		 * controls to be updated close together so enable
+		 * both.
+		 */
+		if (!cpus_have_final_cap(ARM64_HAS_S1PIE))
+			return false;
+		fgt_w_set = HFGxTR_EL2_nPIRE0_EL1 | HFGxTR_EL2_nPIR_EL1;
+		break;
+	default:
+		return false;
+	}
+
+	sysreg_clear_set_s(SYS_HFGWTR_EL2, 0, fgt_w_set);
+	vcpu->arch.ctxt.reg_dirty |= fgt_w_set;
+	return true;
+}
+
 static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
 {
 	if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
@@ -590,6 +623,9 @@ static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
 	if (kvm_hyp_handle_cntpct(vcpu))
 		return true;
 
+	if (kvm_hyp_handle_read_mostly_sysreg(vcpu))
+		return true;
+
 	return false;
 }
 
diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
index bb6b571ec627..f60d27288b2a 100644
--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -55,9 +55,13 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
 	ctxt_sys_reg(ctxt, CONTEXTIDR_EL1) = read_sysreg_el1(SYS_CONTEXTIDR);
 	ctxt_sys_reg(ctxt, AMAIR_EL1)	= read_sysreg_el1(SYS_AMAIR);
 	ctxt_sys_reg(ctxt, CNTKCTL_EL1)	= read_sysreg_el1(SYS_CNTKCTL);
-	if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
+	if (ctxt_reg_dirty(ctxt, nPIR_EL1)) {
 		ctxt_sys_reg(ctxt, PIR_EL1)	= read_sysreg_el1(SYS_PIR);
+		ctxt_reg_set_clean(ctxt, nPIR_EL1);
+	}
+	if (ctxt_reg_dirty(ctxt, nPIRE0_EL1)) {
 		ctxt_sys_reg(ctxt, PIRE0_EL1)	= read_sysreg_el1(SYS_PIRE0);
+		ctxt_reg_set_clean(ctxt, nPIRE0_EL1);
 	}
 	ctxt_sys_reg(ctxt, PAR_EL1)	= read_sysreg_par();
 	ctxt_sys_reg(ctxt, TPIDR_EL1)	= read_sysreg(tpidr_el1);

---
base-commit: 54be6c6c5ae8e0d93a6c4641cb7528eb0b6ba478
change-id: 20240227-kvm-arm64-defer-regs-d29ae460d0b3

Best regards,
-- 
Mark Brown <broonie@...nel.org>


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ