[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <86v7r3yaoq.fsf@scott-ph-mail.amperecomputing.com>
Date: Wed, 16 Apr 2025 15:59:49 -0700
From: D Scott Phillips <scott@...amperecomputing.com>
To: Marc Zyngier <maz@...nel.org>
Cc: Catalin Marinas <catalin.marinas@....com>, Joey Gouly
<joey.gouly@....com>, Oliver Upton <oliver.upton@...ux.dev>, Suzuki K
Poulose <suzuki.poulose@....com>, Will Deacon <will@...nel.org>, Zenghui
Yu <yuzenghui@...wei.com>, kvmarm@...ts.linux.dev,
linux-arm-kernel@...ts.infradead.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 2/2] KVM: arm64: Avoid blocking irqs when tlb
flushing/ATing with HCR.TGE=0
Marc Zyngier <maz@...nel.org> writes:
> On Tue, 15 Apr 2025 16:46:56 +0100,
> D Scott Phillips <scott@...amperecomputing.com> wrote:
>>
>> pNMIs are intended to be deliverable during operations like guest
>> tlb flushing or nested AT, however the setting of HCR_EL2 controls
>> are accidentally blocking async exceptions.
>>
>> You can see this by doing:
>>
>> # perf record -e cycles:Hk -g ./dirty_log_perf_test -m 3 \
>> -i 4 -s anonymous -b 4G -v 32
>>
>> Where no samples will be collected in __kvm_tlb_flush_vmid_ipa_nsh()
>> between enter_vmid_context() and exit_vmid_context() then many
>> samples are collected right after the write to HCR_EL2 in
>> exit_vmid_context(), where pNMIs actually get unmasked.
>>
>> Set HCR_EL2.IMO so that pNMIs are not blocked during guest tlb
>> flushing or nested AT.
>>
>> Signed-off-by: D Scott Phillips <scott@...amperecomputing.com>
>> ---
>> arch/arm64/include/asm/hardirq.h | 3 ++-
>> arch/arm64/kvm/at.c | 4 +++-
>> arch/arm64/kvm/hyp/vhe/tlb.c | 10 ++++++++++
>> 3 files changed, 15 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h
>> index cbfa7b6f2e098..6eb3f93851023 100644
>> --- a/arch/arm64/include/asm/hardirq.h
>> +++ b/arch/arm64/include/asm/hardirq.h
>> @@ -41,7 +41,8 @@ do { \
>> \
>> ___hcr = read_sysreg(hcr_el2); \
>> if (!(___hcr & HCR_TGE)) { \
>> - write_sysreg(___hcr | HCR_TGE, hcr_el2); \
>> + write_sysreg((___hcr & ~(HCR_AMO | HCR_IMO | HCR_FMO)) |\
>> + HCR_TGE, hcr_el2); \
>> isb(); \
>> } \
>> /* \
>> diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c
>> index ff4b06ce661af..f31f0d78c5813 100644
>> --- a/arch/arm64/kvm/at.c
>> +++ b/arch/arm64/kvm/at.c
>> @@ -1269,7 +1269,9 @@ static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
>>
>> skip_mmu_switch:
>> /* Clear TGE, enable S2 translation, we're rolling */
>> - write_sysreg((config.hcr & ~HCR_TGE) | HCR_VM, hcr_el2);
>> + write_sysreg((config.hcr & ~HCR_TGE) |
>> + HCR_AMO | HCR_IMO | HCR_FMO | HCR_VM,
>> + hcr_el2);
>> isb();
>>
>> switch (op) {
>> diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c
>> index 3d50a1bd2bdbc..ecb700bab3b8f 100644
>> --- a/arch/arm64/kvm/hyp/vhe/tlb.c
>> +++ b/arch/arm64/kvm/hyp/vhe/tlb.c
>> @@ -55,6 +55,15 @@ static void enter_vmid_context(struct kvm_s2_mmu *mmu,
>> * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so
>> * let's flip TGE before executing the TLB operation.
>> *
>> + * One other fun complication to consider is the target EL for
>> + * asynchronous exceptions. We want to allow NMIs during tlb flushing,
>> + * so we need to ensure that the target EL for IRQs remains as EL2.
>> + * HCR_EL2.{E2H,TGE,IMO} = {1,0,0} would set the target EL for IRQs as
>> + * EL1, and IRQs at EL2 would be "C" (Interrupts not taken regardless
>> + * of the value of interrupt masks). So we need to set
>> + * HCR_EL2.{E2H,TGE,IMO} = {1,0,1} so that NMIs will still be
>> + * delivered.
>> + *
>> * ARM erratum 1165522 requires some special handling (again),
>> * as we need to make sure both stages of translation are in
>> * place before clearing TGE. __load_stage2() already
>> @@ -63,6 +72,7 @@ static void enter_vmid_context(struct kvm_s2_mmu *mmu,
>> __load_stage2(mmu, mmu->arch);
>> val = read_sysreg(hcr_el2);
>> val &= ~HCR_TGE;
>> + val |= HCR_AMO | HCR_IMO | HCR_FMO;
>> write_sysreg(val, hcr_el2);
>> isb();
>> }
>
> This seems terribly complicated for no good reasons. Why can't we run
> with HCR_xMO set at all times? i.e. like this:
Ah, good point Marc. You're right, this is much simpler.
Tested-by: D Scott Phillips <scott@...amperecomputing.com>
Reviewed-by: D Scott Phillips <scott@...amperecomputing.com>
> diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
> index 974d72b5905b8..bba4b0e930915 100644
> --- a/arch/arm64/include/asm/kvm_arm.h
> +++ b/arch/arm64/include/asm/kvm_arm.h
> @@ -100,7 +100,7 @@
> HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 | HCR_TID1)
> #define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA)
> #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC)
> -#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
> +#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H | HCR_AMO | HCR_IMO | HCR_FMO)
>
> #define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM)
> #define MPAMHCR_HOST_FLAGS 0
>
> There should never be a case where we don't want physical interrupts
> to be taken at EL2 when running VHE, and we should never use these
> bits to mask interrupts. This has been relaxed in the architecture to
> have an IMPDEF behaviour anyway, as it cannot be implemented on NV.
>
> Thanks,
>
> M.
>
> --
> Without deviation from the norm, progress is not possible.
Powered by blists - more mailing lists