lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ec322123-bfe7-6019-7f35-de326ee7b6c3@os.amperecomputing.com>
Date:   Fri, 15 Sep 2023 15:27:46 +0530
From:   Ganapatrao Kulkarni <gankulkarni@...amperecomputing.com>
To:     Marc Zyngier <maz@...nel.org>
Cc:     linux-kernel@...r.kernel.org, kvmarm@...ts.linux.dev,
        linux-arm-kernel@...ts.infradead.org, Christoffer.Dall@....com,
        eauger@...hat.com, miguel.luis@...cle.com,
        darren@...amperecomputing.com, scott@...amperecomputing.com
Subject: Re: [PATCH v2 2/2] KVM: arm64: timers: Save restore CVAL of a ptimer
 across guest entry and exits


Hi Marc,

On 10-09-2023 05:15 pm, Marc Zyngier wrote:
> On Mon, 04 Sep 2023 12:42:18 +0100,
> Ganapatrao Kulkarni <gankulkarni@...amperecomputing.com> wrote:
>>
>> As per FEAT_ECV, when HCR_EL2.{E2H, TGE} == {1, 1}, Enhanced Counter
>> Virtualization functionality is disabled and CNTPOFF_EL2 value is treated
>> as zero. On VHE host, E2H and TGE are set, hence it is required
>> to save Guest's CVAL to memory and increment it by CNTPOFF_EL2 at
>> guest exit to avoid false physical timer interrupts and also
>> restore back the CVAL with saved value before the guest entry.
>>
>> Signed-off-by: Ganapatrao Kulkarni <gankulkarni@...amperecomputing.com>
>> ---
>>   arch/arm64/kvm/arch_timer.c     | 60 ++++++++++++++++++++++++++++++---
>>   arch/arm64/kvm/hyp/vhe/switch.c | 13 +++++++
>>   include/kvm/arm_arch_timer.h    |  1 +
>>   3 files changed, 69 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
>> index 98b0e8ac02ae..9fe3fa6ed98a 100644
>> --- a/arch/arm64/kvm/arch_timer.c
>> +++ b/arch/arm64/kvm/arch_timer.c
>> @@ -514,6 +514,59 @@ static void set_cntpoff(u64 cntpoff)
>>   		write_sysreg_s(cntpoff, SYS_CNTPOFF_EL2);
>>   }
>>   
>> +static void ptimer_cval_save(struct arch_timer_context *ctx, u64 offset)
>> +{
>> +	unsigned long flags;
>> +	u64 cval;
>> +
>> +	local_irq_save(flags);
>> +	cval = read_sysreg_el0(SYS_CNTP_CVAL);
>> +	timer_set_cval(ctx, cval);
>> +	cval += offset;
>> +	write_sysreg_el0(cval, SYS_CNTP_CVAL);
>> +	isb();
>> +	local_irq_restore(flags);
>> +}
>> +
>> +static void ptimer_cval_restore(struct arch_timer_context *ctx, u64 offset)
>> +{
>> +	unsigned long flags;
>> +	u64 cval;
>> +
>> +	local_irq_save(flags);
>> +	cval = timer_get_cval(ctx);
>> +	write_sysreg_el0(cval, SYS_CNTP_CVAL);
>> +	isb();
>> +	local_irq_restore(flags);
>> +}
>> +
>> +void kvm_ptimer_cval_save_restore(struct kvm_vcpu *vcpu, bool save)
>> +{
>> +	struct timer_map map;
>> +	struct arch_timer_cpu *timer;
>> +	struct arch_timer_context *ctxp;
>> +	u64 offset;
>> +
>> +	get_timer_map(vcpu, &map);
>> +	ctxp = map.direct_ptimer;
>> +
>> +	if (unlikely(ctxp == NULL))
>> +		return;
>> +
>> +	offset = timer_get_offset(ctxp);
>> +	if (!offset)
>> +		return;
>> +
>> +	timer = vcpu_timer(ctxp->vcpu);
>> +	if (!timer->enabled || !ctxp->loaded)
>> +		return;
>> +
>> +	if (save)
>> +		ptimer_cval_save(ctxp, offset);
>> +	else
>> +		ptimer_cval_restore(ctxp, offset);
>> +}
>> +
> 
> I really don't want to see any of this code in the arch_timer
> backend. There is nothing here that executes in the context of the
> world-switch until this, and I think this is the wrong level of
> abstraction.
> 
> You end-up doing things that make no sense in the expected execution
> context (timer not loaded?, not enabled?, disabling interrupts?), and
> this makes the whole thing pointlessly complex.
> 
> My take on this problem is still the same (vaguely amended compared to
> the previous version). If this doesn't work for you, please explain
> what is wrong with it. But it has the shape of what I really want to
> see.
> 
> Thanks,
> 
> 	M.
> 
>  From 2516a1410c0d45a7d3ba0523847be339bfcb1e50 Mon Sep 17 00:00:00 2001
> From: Marc Zyngier <maz@...nel.org>
> Date: Tue, 22 Aug 2023 13:18:10 +0100
> Subject: [PATCH] KVM: arm64: timers: Correctly handle TGE flip with
>   CNTPOFF_EL2
> 
> Contrary to common belief, HCR_EL2.TGE has a direct and immediate
> effect on the way the EL0 physical counter is offset. Flipping
> TGE from 1 to 0 while at EL2 immediately changes the way the counter
> compared to the CVAL limit.
> 
> This means that we cannot directly save/restore the guest's view of
> CVAL, but that we instead must treat it as if CNTPOFF didn't exist.
> Only in the world switch, once we figure out that we do have CNTPOFF,
> can we must the offset back and forth depending on the polarity of
> TGE.
> 
> Fixes: 2b4825a86940 ("KVM: arm64: timers: Use CNTPOFF_EL2 to offset the physical timer")
> Reported-by: Ganapatrao Kulkarni <gankulkarni@...amperecomputing.com>
> Signed-off-by: Marc Zyngier <maz@...nel.org>
> ---
>   arch/arm64/kvm/arch_timer.c     | 13 +++-------
>   arch/arm64/kvm/hyp/vhe/switch.c | 45 +++++++++++++++++++++++++++++++++
>   include/kvm/arm_arch_timer.h    |  7 +++++
>   3 files changed, 55 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
> index 6dcdae4d38cb..a1e24228aaaa 100644
> --- a/arch/arm64/kvm/arch_timer.c
> +++ b/arch/arm64/kvm/arch_timer.c
> @@ -55,11 +55,6 @@ static struct irq_ops arch_timer_irq_ops = {
>   	.get_input_level = kvm_arch_timer_get_input_level,
>   };
>   
> -static bool has_cntpoff(void)
> -{
> -	return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF));
> -}
> -
>   static int nr_timers(struct kvm_vcpu *vcpu)
>   {
>   	if (!vcpu_has_nv(vcpu))
> @@ -180,7 +175,7 @@ u64 kvm_phys_timer_read(void)
>   	return timecounter->cc->read(timecounter->cc);
>   }
>   
> -static void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
> +void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
>   {
>   	if (vcpu_has_nv(vcpu)) {
>   		if (is_hyp_ctxt(vcpu)) {
> @@ -548,8 +543,7 @@ static void timer_save_state(struct arch_timer_context *ctx)
>   		timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL));
>   		cval = read_sysreg_el0(SYS_CNTP_CVAL);
>   
> -		if (!has_cntpoff())
> -			cval -= timer_get_offset(ctx);
> +		cval -= timer_get_offset(ctx);
>   
>   		timer_set_cval(ctx, cval);
>   
> @@ -636,8 +630,7 @@ static void timer_restore_state(struct arch_timer_context *ctx)
>   		cval = timer_get_cval(ctx);
>   		offset = timer_get_offset(ctx);
>   		set_cntpoff(offset);
> -		if (!has_cntpoff())
> -			cval += offset;
> +		cval += offset;
>   		write_sysreg_el0(cval, SYS_CNTP_CVAL);
>   		isb();
>   		write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL);
> diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
> index 6537f58b1a8c..5dcbde57f466 100644
> --- a/arch/arm64/kvm/hyp/vhe/switch.c
> +++ b/arch/arm64/kvm/hyp/vhe/switch.c
> @@ -39,6 +39,26 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
>   
>   	___activate_traps(vcpu);
>   
> +	if (has_cntpoff()) {
> +		struct timer_map map;
> +
> +		get_timer_map(vcpu, &map);
> +
> +		/*
> +		 * We're entrering the guest. Reload the correct
> +		 * values from memory now that TGE is clear.
> +		 */
> +		if (map.direct_ptimer == vcpu_ptimer(vcpu))
> +			val = __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
> +		if (map.direct_ptimer == vcpu_hptimer(vcpu))
> +			val = __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2);
> +
> +		if (map.direct_ptimer) {
> +			write_sysreg_s(val, SYS_CNTP_CVAL_EL0);
> +			isb();
> +		}
> +	}
> +
>   	val = read_sysreg(cpacr_el1);
>   	val |= CPACR_ELx_TTA;
>   	val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN |
> @@ -77,6 +97,31 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
>   
>   	write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
>   
> +	if (has_cntpoff()) {
> +		struct timer_map map;
> +		u64 val, offset;
> +
> +		get_timer_map(vcpu, &map);
> +
> +		/*
> +		 * We're exiting the guest. Save the latest CVAL value
> +		 * to memory and apply the offset now that TGE is set.
> +		 */
> +		val = read_sysreg_s(SYS_CNTP_CVAL_EL0);
> +		if (map.direct_ptimer == vcpu_ptimer(vcpu))
> +			__vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = val;
> +		if (map.direct_ptimer == vcpu_hptimer(vcpu))
> +			__vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = val;
> +
> +		offset = read_sysreg_s(SYS_CNTPOFF_EL2);
> +
> +		if (map.direct_ptimer && offset) {
> +			offset = read_sysreg_s(SYS_CNTPOFF_EL2);
> +			write_sysreg_s(val + offset, SYS_CNTP_CVAL_EL0);
> +			isb();
> +		}
> +	}
> +
>   	/*
>   	 * ARM errata 1165522 and 1530923 require the actual execution of the
>   	 * above before we can switch to the EL2/EL0 translation regime used by
> diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
> index bb3cb005873e..e748bc957d83 100644
> --- a/include/kvm/arm_arch_timer.h
> +++ b/include/kvm/arm_arch_timer.h
> @@ -82,6 +82,8 @@ struct timer_map {
>   	struct arch_timer_context *emul_ptimer;
>   };
>   
> +void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map);
> +
>   struct arch_timer_cpu {
>   	struct arch_timer_context timers[NR_KVM_TIMERS];
>   
> @@ -145,4 +147,9 @@ u64 timer_get_cval(struct arch_timer_context *ctxt);
>   void kvm_timer_cpu_up(void);
>   void kvm_timer_cpu_down(void);
>   
> +static inline bool has_cntpoff(void)
> +{
> +	return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF));
> +}
> +
>   #endif

This patch did not work.
After adding changes as in below diff, it is started working.

diff --git a/arch/arm64/kvm/hyp/vhe/switch.c 
b/arch/arm64/kvm/hyp/vhe/switch.c
index b0b07658f77d..91d2cfb03e26 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -117,7 +117,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
                         val = __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2);

                 if (map.direct_ptimer) {
-                       write_sysreg_s(val, SYS_CNTP_CVAL_EL0);
+                       write_sysreg_el0(val, SYS_CNTP_CVAL);
                         isb();
                 }
         }
@@ -161,8 +161,6 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)

         ___deactivate_traps(vcpu);

-       write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
-
         if (has_cntpoff()) {
                 struct timer_map map;
                 u64 val, offset;
@@ -173,7 +171,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
                  * We're exiting the guest. Save the latest CVAL value
                  * to memory and apply the offset now that TGE is set.
                  */
-               val = read_sysreg_s(SYS_CNTP_CVAL_EL0);
+               val = read_sysreg_el0(SYS_CNTP_CVAL);
                 if (map.direct_ptimer == vcpu_ptimer(vcpu))
                         __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = val;
                 if (map.direct_ptimer == vcpu_hptimer(vcpu))
@@ -182,12 +180,13 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
                 offset = read_sysreg_s(SYS_CNTPOFF_EL2);

                 if (map.direct_ptimer && offset) {
-                       offset = read_sysreg_s(SYS_CNTPOFF_EL2);
-                       write_sysreg_s(val + offset, SYS_CNTP_CVAL_EL0);
+                       write_sysreg_el0(val + offset, SYS_CNTP_CVAL);
                         isb();
                 }
         }

+       write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
+
         /*
          * ARM errata 1165522 and 1530923 require the actual execution 
of the
          * above before we can switch to the EL2/EL0 translation regime 
used by
[root@...06sys-r120 arm-platforms]#



Thanks,
Ganapat

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ