[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <86qzzkc5xa.wl-maz@kernel.org>
Date: Mon, 16 Jun 2025 11:54:57 +0100
From: Marc Zyngier <maz@...nel.org>
To: Wei-Lin Chang <r09922117@...e.ntu.edu.tw>
Cc: linux-arm-kernel@...ts.infradead.org,
kvmarm@...ts.linux.dev,
linux-kernel@...r.kernel.org,
Oliver Upton <oliver.upton@...ux.dev>,
Joey Gouly <joey.gouly@....com>,
Suzuki K Poulose <suzuki.poulose@....com>,
Zenghui Yu <yuzenghui@...wei.com>,
Catalin Marinas <catalin.marinas@....com>,
Will Deacon <will@...nel.org>,
Jintack Lim <jintack@...columbia.edu>,
Christoffer Dall <christoffer.dall@....com>
Subject: Re: [PATCH] KVM: arm64: nv: Fix s_cpu_if->vgic_lr[] indexing in vgic_v3_put_nested()
On Sat, 14 Jun 2025 15:57:21 +0100,
Wei-Lin Chang <r09922117@...e.ntu.edu.tw> wrote:
>
> s_cpu_if->vgic_lr[] is filled continuously from index 0 to
> s_cpu_if->used_lrs - 1, but vgic_v3_put_nested() is indexing it using
> the positions of the set bits in shadow_if->lr_map. So correct it.
>
> Signed-off-by: Wei-Lin Chang <r09922117@...e.ntu.edu.tw>
> ---
> arch/arm64/kvm/vgic/vgic-v3-nested.c | 7 ++++---
> 1 file changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/arch/arm64/kvm/vgic/vgic-v3-nested.c b/arch/arm64/kvm/vgic/vgic-v3-nested.c
> index 4f6954c30674..29741e3f077b 100644
> --- a/arch/arm64/kvm/vgic/vgic-v3-nested.c
> +++ b/arch/arm64/kvm/vgic/vgic-v3-nested.c
> @@ -343,7 +343,7 @@ void vgic_v3_put_nested(struct kvm_vcpu *vcpu)
> struct shadow_if *shadow_if = get_shadow_if();
> struct vgic_v3_cpu_if *s_cpu_if = &shadow_if->cpuif;
> u64 val;
> - int i;
> + int i, index = 0;
>
> __vgic_v3_save_vmcr_aprs(s_cpu_if);
> __vgic_v3_deactivate_traps(s_cpu_if);
> @@ -368,10 +368,11 @@ void vgic_v3_put_nested(struct kvm_vcpu *vcpu)
> val = __vcpu_sys_reg(vcpu, ICH_LRN(i));
>
> val &= ~ICH_LR_STATE;
> - val |= s_cpu_if->vgic_lr[i] & ICH_LR_STATE;
> + val |= s_cpu_if->vgic_lr[index] & ICH_LR_STATE;
>
> __vcpu_sys_reg(vcpu, ICH_LRN(i)) = val;
> - s_cpu_if->vgic_lr[i] = 0;
> + s_cpu_if->vgic_lr[index] = 0;
> + index++;
> }
>
> shadow_if->lr_map = 0;
Nice catch, thanks a lot for tracking it down.
However, I think we should get rid of this double-indexing altogether,
or at least make it less error-prone. This thing is extremely fragile,
and it isn't the first time we are getting bitten with it.
Looking at the code, it becomes pretty obvious that the shadow index
is always the number of bits set in lr_map, and that we could
completely drop the 'index' thing if we simply counted these bits
(which isn't that expensive).
I came up with the (admittedly much bigger) following fix.
Thoughts?
M.
From 2484950b8fc3b36cca32bf5e86ffe7975a43e0e7 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@...nel.org>
Date: Sun, 15 Jun 2025 16:11:38 +0100
Subject: [PATCH] KVM: arm64: nv: Fix tracking of shadow list registers
Wei-Lin reports that the tracking of shadow list registers is
majorly broken when resync'ing the L2 state after a run, as
we confuse the guest's LR index with the host's, potentially
losing the interrupt state.
While this could be fixed by adding yet another side index to
track it (Wei-Lin's fix), it may be better to refactor this
code to avoid having a side index altogether, limiting the
risk to introduce this class of bugs.
A key observation is that the shadow index is always the number
of bits in the lr_map bitmap. With that, the parallel indexing
scheme can be completely dropped.
While doing this, introduce a couple of helpers that abstract
the index conversion and some of the LR repainting, making the
whole exercise much simpler.
Reported-by: Wei-Lin Chang <r09922117@...e.ntu.edu.tw>
Signed-off-by: Marc Zyngier <maz@...nel.org>
Link: https://lore.kernel.org/r/20250614145721.2504524-1-r09922117@csie.ntu.edu.tw
---
arch/arm64/kvm/vgic/vgic-v3-nested.c | 81 ++++++++++++++--------------
1 file changed, 42 insertions(+), 39 deletions(-)
diff --git a/arch/arm64/kvm/vgic/vgic-v3-nested.c b/arch/arm64/kvm/vgic/vgic-v3-nested.c
index d22a8ad7bcc51..bd22e42ce9112 100644
--- a/arch/arm64/kvm/vgic/vgic-v3-nested.c
+++ b/arch/arm64/kvm/vgic/vgic-v3-nested.c
@@ -36,6 +36,11 @@ struct shadow_if {
static DEFINE_PER_CPU(struct shadow_if, shadow_if);
+static int lr_map_idx_to_shadow_idx(struct shadow_if *shadow_if, int idx)
+{
+ return hweight64(shadow_if->lr_map & (BIT(idx) - 1));
+}
+
/*
* Nesting GICv3 support
*
@@ -209,6 +214,29 @@ u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu)
return reg;
}
+static u64 translate_lr_hw_bit(struct kvm_vcpu *vcpu, u64 lr)
+{
+ struct vgic_irq *irq;
+
+ if (!(lr & ICH_LR_HW))
+ return lr;
+
+ /* We have the HW bit set, check for validity of pINTID */
+ irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
+ /* If there was no real mapping, nuke the HW bit */
+ if (!irq || !irq->hw || irq->intid > VGIC_MAX_SPI)
+ lr &= ~ICH_LR_HW;
+
+ /* Translate the virtual mapping to the real one, even if invalid */
+ if (irq) {
+ lr &= ~ICH_LR_PHYS_ID_MASK;
+ lr |= FIELD_PREP(ICH_LR_PHYS_ID_MASK, (u64)irq->hwintid);
+ vgic_put_irq(vcpu->kvm, irq);
+ }
+
+ return lr;
+}
+
/*
* For LRs which have HW bit set such as timer interrupts, we modify them to
* have the host hardware interrupt number instead of the virtual one programmed
@@ -217,58 +245,37 @@ u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu)
static void vgic_v3_create_shadow_lr(struct kvm_vcpu *vcpu,
struct vgic_v3_cpu_if *s_cpu_if)
{
- unsigned long lr_map = 0;
- int index = 0;
+ struct shadow_if *shadow_if;
+
+ shadow_if = container_of(s_cpu_if, struct shadow_if, cpuif);
+ shadow_if->lr_map = 0;
for (int i = 0; i < kvm_vgic_global_state.nr_lr; i++) {
u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i));
- struct vgic_irq *irq;
if (!(lr & ICH_LR_STATE))
- lr = 0;
-
- if (!(lr & ICH_LR_HW))
- goto next;
-
- /* We have the HW bit set, check for validity of pINTID */
- irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
- if (!irq || !irq->hw || irq->intid > VGIC_MAX_SPI ) {
- /* There was no real mapping, so nuke the HW bit */
- lr &= ~ICH_LR_HW;
- if (irq)
- vgic_put_irq(vcpu->kvm, irq);
- goto next;
- }
-
- /* Translate the virtual mapping to the real one */
- lr &= ~ICH_LR_PHYS_ID_MASK;
- lr |= FIELD_PREP(ICH_LR_PHYS_ID_MASK, (u64)irq->hwintid);
+ continue;
- vgic_put_irq(vcpu->kvm, irq);
+ lr = translate_lr_hw_bit(vcpu, lr);
-next:
- s_cpu_if->vgic_lr[index] = lr;
- if (lr) {
- lr_map |= BIT(i);
- index++;
- }
+ s_cpu_if->vgic_lr[hweight64(shadow_if->lr_map)] = lr;
+ shadow_if->lr_map |= BIT(i);
}
- container_of(s_cpu_if, struct shadow_if, cpuif)->lr_map = lr_map;
- s_cpu_if->used_lrs = index;
+ s_cpu_if->used_lrs = hweight64(shadow_if->lr_map);
}
void vgic_v3_sync_nested(struct kvm_vcpu *vcpu)
{
struct shadow_if *shadow_if = get_shadow_if();
- int i, index = 0;
+ int i;
for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) {
u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i));
struct vgic_irq *irq;
if (!(lr & ICH_LR_HW) || !(lr & ICH_LR_STATE))
- goto next;
+ continue;
/*
* If we had a HW lr programmed by the guest hypervisor, we
@@ -277,15 +284,13 @@ void vgic_v3_sync_nested(struct kvm_vcpu *vcpu)
*/
irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
if (WARN_ON(!irq)) /* Shouldn't happen as we check on load */
- goto next;
+ continue;
- lr = __gic_v3_get_lr(index);
+ lr = __gic_v3_get_lr(lr_map_idx_to_shadow_idx(shadow_if, i));
if (!(lr & ICH_LR_STATE))
irq->active = false;
vgic_put_irq(vcpu->kvm, irq);
- next:
- index++;
}
}
@@ -368,13 +373,11 @@ void vgic_v3_put_nested(struct kvm_vcpu *vcpu)
val = __vcpu_sys_reg(vcpu, ICH_LRN(i));
val &= ~ICH_LR_STATE;
- val |= s_cpu_if->vgic_lr[i] & ICH_LR_STATE;
+ val |= s_cpu_if->vgic_lr[lr_map_idx_to_shadow_idx(shadow_if, i)] & ICH_LR_STATE;
__vcpu_assign_sys_reg(vcpu, ICH_LRN(i), val);
- s_cpu_if->vgic_lr[i] = 0;
}
- shadow_if->lr_map = 0;
vcpu->arch.vgic_cpu.vgic_v3.used_lrs = 0;
}
--
2.39.2
--
Without deviation from the norm, progress is not possible.
Powered by blists - more mailing lists