[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <2p4urq6ij5zq77c7viydmqqaj2doeyja43khvcjgn5pkpe7saz@eyfvuykscn22>
Date: Mon, 16 Jun 2025 22:34:58 +0800
From: Wei-Lin Chang <r09922117@...e.ntu.edu.tw>
To: Marc Zyngier <maz@...nel.org>
Cc: linux-arm-kernel@...ts.infradead.org, kvmarm@...ts.linux.dev,
linux-kernel@...r.kernel.org, Oliver Upton <oliver.upton@...ux.dev>,
Joey Gouly <joey.gouly@....com>, Suzuki K Poulose <suzuki.poulose@....com>,
Zenghui Yu <yuzenghui@...wei.com>, Catalin Marinas <catalin.marinas@....com>,
Will Deacon <will@...nel.org>, Jintack Lim <jintack@...columbia.edu>,
Christoffer Dall <christoffer.dall@....com>
Subject: Re: [PATCH] KVM: arm64: nv: Fix s_cpu_if->vgic_lr[] indexing in
vgic_v3_put_nested()
On Mon, Jun 16, 2025 at 11:54:57AM +0100, Marc Zyngier wrote:
> On Sat, 14 Jun 2025 15:57:21 +0100,
> Wei-Lin Chang <r09922117@...e.ntu.edu.tw> wrote:
> >
> > s_cpu_if->vgic_lr[] is filled continuously from index 0 to
> > s_cpu_if->used_lrs - 1, but vgic_v3_put_nested() is indexing it using
> > the positions of the set bits in shadow_if->lr_map. So correct it.
> >
> > Signed-off-by: Wei-Lin Chang <r09922117@...e.ntu.edu.tw>
> > ---
> > arch/arm64/kvm/vgic/vgic-v3-nested.c | 7 ++++---
> > 1 file changed, 4 insertions(+), 3 deletions(-)
> >
> > diff --git a/arch/arm64/kvm/vgic/vgic-v3-nested.c b/arch/arm64/kvm/vgic/vgic-v3-nested.c
> > index 4f6954c30674..29741e3f077b 100644
> > --- a/arch/arm64/kvm/vgic/vgic-v3-nested.c
> > +++ b/arch/arm64/kvm/vgic/vgic-v3-nested.c
> > @@ -343,7 +343,7 @@ void vgic_v3_put_nested(struct kvm_vcpu *vcpu)
> > struct shadow_if *shadow_if = get_shadow_if();
> > struct vgic_v3_cpu_if *s_cpu_if = &shadow_if->cpuif;
> > u64 val;
> > - int i;
> > + int i, index = 0;
> >
> > __vgic_v3_save_vmcr_aprs(s_cpu_if);
> > __vgic_v3_deactivate_traps(s_cpu_if);
> > @@ -368,10 +368,11 @@ void vgic_v3_put_nested(struct kvm_vcpu *vcpu)
> > val = __vcpu_sys_reg(vcpu, ICH_LRN(i));
> >
> > val &= ~ICH_LR_STATE;
> > - val |= s_cpu_if->vgic_lr[i] & ICH_LR_STATE;
> > + val |= s_cpu_if->vgic_lr[index] & ICH_LR_STATE;
> >
> > __vcpu_sys_reg(vcpu, ICH_LRN(i)) = val;
> > - s_cpu_if->vgic_lr[i] = 0;
> > + s_cpu_if->vgic_lr[index] = 0;
> > + index++;
> > }
> >
> > shadow_if->lr_map = 0;
>
> Nice catch, thanks a lot for tracking it down.
>
> However, I think we should get rid of this double-indexing altogether,
> or at least make it less error-prone. This thing is extremely fragile,
> and it isn't the first time we are getting bitten with it.
Hi Marc,
Yes agreed, making this more robust is better than just the bug fix.
Thanks for the effort.
>
> Looking at the code, it becomes pretty obvious that the shadow index
> is always the number of bits set in lr_map, and that we could
> completely drop the 'index' thing if we simply counted these bits
> (which isn't that expensive).
>
> I came up with the (admittedly much bigger) following fix.
>
> Thoughts?
Just one nit below:
>
> M.
>
> From 2484950b8fc3b36cca32bf5e86ffe7975a43e0e7 Mon Sep 17 00:00:00 2001
> From: Marc Zyngier <maz@...nel.org>
> Date: Sun, 15 Jun 2025 16:11:38 +0100
> Subject: [PATCH] KVM: arm64: nv: Fix tracking of shadow list registers
>
> Wei-Lin reports that the tracking of shadow list registers is
> majorly broken when resync'ing the L2 state after a run, as
> we confuse the guest's LR index with the host's, potentially
> losing the interrupt state.
>
> While this could be fixed by adding yet another side index to
> track it (Wei-Lin's fix), it may be better to refactor this
> code to avoid having a side index altogether, limiting the
> risk to introduce this class of bugs.
>
> A key observation is that the shadow index is always the number
> of bits in the lr_map bitmap. With that, the parallel indexing
> scheme can be completely dropped.
>
> While doing this, introduce a couple of helpers that abstract
> the index conversion and some of the LR repainting, making the
> whole exercise much simpler.
>
> Reported-by: Wei-Lin Chang <r09922117@...e.ntu.edu.tw>
> Signed-off-by: Marc Zyngier <maz@...nel.org>
> Link: https://lore.kernel.org/r/20250614145721.2504524-1-r09922117@csie.ntu.edu.tw
> ---
> arch/arm64/kvm/vgic/vgic-v3-nested.c | 81 ++++++++++++++--------------
> 1 file changed, 42 insertions(+), 39 deletions(-)
>
> diff --git a/arch/arm64/kvm/vgic/vgic-v3-nested.c b/arch/arm64/kvm/vgic/vgic-v3-nested.c
> index d22a8ad7bcc51..bd22e42ce9112 100644
> --- a/arch/arm64/kvm/vgic/vgic-v3-nested.c
> +++ b/arch/arm64/kvm/vgic/vgic-v3-nested.c
> @@ -36,6 +36,11 @@ struct shadow_if {
>
> static DEFINE_PER_CPU(struct shadow_if, shadow_if);
>
> +static int lr_map_idx_to_shadow_idx(struct shadow_if *shadow_if, int idx)
> +{
> + return hweight64(shadow_if->lr_map & (BIT(idx) - 1));
> +}
> +
> /*
> * Nesting GICv3 support
> *
> @@ -209,6 +214,29 @@ u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu)
> return reg;
> }
>
> +static u64 translate_lr_hw_bit(struct kvm_vcpu *vcpu, u64 lr)
I believe translate_lr_pintid() better describes what this function is
doing.
For everything else, I don't spot any issues after review.
Reviewed-by: Wei-Lin Chang <r09922117@...e.ntu.edu.tw>
> +{
> + struct vgic_irq *irq;
> +
> + if (!(lr & ICH_LR_HW))
> + return lr;
> +
> + /* We have the HW bit set, check for validity of pINTID */
> + irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
> + /* If there was no real mapping, nuke the HW bit */
> + if (!irq || !irq->hw || irq->intid > VGIC_MAX_SPI)
> + lr &= ~ICH_LR_HW;
> +
> + /* Translate the virtual mapping to the real one, even if invalid */
> + if (irq) {
> + lr &= ~ICH_LR_PHYS_ID_MASK;
> + lr |= FIELD_PREP(ICH_LR_PHYS_ID_MASK, (u64)irq->hwintid);
> + vgic_put_irq(vcpu->kvm, irq);
> + }
> +
> + return lr;
> +}
> +
> /*
> * For LRs which have HW bit set such as timer interrupts, we modify them to
> * have the host hardware interrupt number instead of the virtual one programmed
> @@ -217,58 +245,37 @@ u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu)
> static void vgic_v3_create_shadow_lr(struct kvm_vcpu *vcpu,
> struct vgic_v3_cpu_if *s_cpu_if)
> {
> - unsigned long lr_map = 0;
> - int index = 0;
> + struct shadow_if *shadow_if;
> +
> + shadow_if = container_of(s_cpu_if, struct shadow_if, cpuif);
> + shadow_if->lr_map = 0;
>
> for (int i = 0; i < kvm_vgic_global_state.nr_lr; i++) {
> u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i));
> - struct vgic_irq *irq;
>
> if (!(lr & ICH_LR_STATE))
> - lr = 0;
> -
> - if (!(lr & ICH_LR_HW))
> - goto next;
> -
> - /* We have the HW bit set, check for validity of pINTID */
> - irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
> - if (!irq || !irq->hw || irq->intid > VGIC_MAX_SPI ) {
> - /* There was no real mapping, so nuke the HW bit */
> - lr &= ~ICH_LR_HW;
> - if (irq)
> - vgic_put_irq(vcpu->kvm, irq);
> - goto next;
> - }
> -
> - /* Translate the virtual mapping to the real one */
> - lr &= ~ICH_LR_PHYS_ID_MASK;
> - lr |= FIELD_PREP(ICH_LR_PHYS_ID_MASK, (u64)irq->hwintid);
> + continue;
>
> - vgic_put_irq(vcpu->kvm, irq);
> + lr = translate_lr_hw_bit(vcpu, lr);
>
> -next:
> - s_cpu_if->vgic_lr[index] = lr;
> - if (lr) {
> - lr_map |= BIT(i);
> - index++;
> - }
> + s_cpu_if->vgic_lr[hweight64(shadow_if->lr_map)] = lr;
> + shadow_if->lr_map |= BIT(i);
> }
>
> - container_of(s_cpu_if, struct shadow_if, cpuif)->lr_map = lr_map;
> - s_cpu_if->used_lrs = index;
> + s_cpu_if->used_lrs = hweight64(shadow_if->lr_map);
> }
>
> void vgic_v3_sync_nested(struct kvm_vcpu *vcpu)
> {
> struct shadow_if *shadow_if = get_shadow_if();
> - int i, index = 0;
> + int i;
>
> for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) {
> u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i));
> struct vgic_irq *irq;
>
> if (!(lr & ICH_LR_HW) || !(lr & ICH_LR_STATE))
> - goto next;
> + continue;
>
> /*
> * If we had a HW lr programmed by the guest hypervisor, we
> @@ -277,15 +284,13 @@ void vgic_v3_sync_nested(struct kvm_vcpu *vcpu)
> */
> irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
> if (WARN_ON(!irq)) /* Shouldn't happen as we check on load */
> - goto next;
> + continue;
>
> - lr = __gic_v3_get_lr(index);
> + lr = __gic_v3_get_lr(lr_map_idx_to_shadow_idx(shadow_if, i));
> if (!(lr & ICH_LR_STATE))
> irq->active = false;
>
> vgic_put_irq(vcpu->kvm, irq);
> - next:
> - index++;
> }
> }
>
> @@ -368,13 +373,11 @@ void vgic_v3_put_nested(struct kvm_vcpu *vcpu)
> val = __vcpu_sys_reg(vcpu, ICH_LRN(i));
>
> val &= ~ICH_LR_STATE;
> - val |= s_cpu_if->vgic_lr[i] & ICH_LR_STATE;
> + val |= s_cpu_if->vgic_lr[lr_map_idx_to_shadow_idx(shadow_if, i)] & ICH_LR_STATE;
>
> __vcpu_assign_sys_reg(vcpu, ICH_LRN(i), val);
> - s_cpu_if->vgic_lr[i] = 0;
> }
>
> - shadow_if->lr_map = 0;
> vcpu->arch.vgic_cpu.vgic_v3.used_lrs = 0;
> }
>
> --
> 2.39.2
>
>
> --
> Without deviation from the norm, progress is not possible.
Powered by blists - more mailing lists