[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <afd1dbe1-3055-45f4-9db1-a31e4b9a6722@linux.intel.com>
Date: Tue, 20 May 2025 14:51:37 +0800
From: Binbin Wu <binbin.wu@...ux.intel.com>
To: Sean Christopherson <seanjc@...gle.com>
Cc: Paolo Bonzini <pbonzini@...hat.com>, kvm@...r.kernel.org,
linux-kernel@...r.kernel.org, Peter Xu <peterx@...hat.com>,
Yan Zhao <yan.y.zhao@...el.com>, Maxim Levitsky <mlevitsk@...hat.com>,
James Houghton <jthoughton@...gle.com>, Pankaj Gupta <pankaj.gupta@....com>
Subject: Re: [PATCH v3 1/6] KVM: Bound the number of dirty ring entries in a
single reset at INT_MAX
On 5/17/2025 5:35 AM, Sean Christopherson wrote:
> Cap the number of ring entries that are reset in a single ioctl to INT_MAX
> to ensure userspace isn't confused by a wrap into negative space, and so
> that, in a truly pathological scenario, KVM doesn't miss a TLB flush due
> to the count wrapping to zero. While the size of the ring is fixed at
> 0x10000 entries and KVM (currently) supports at most 4096, userspace is
> allowed to harvest entries from the ring while the reset is in-progress,
> i.e. it's possible for the ring to always have harvested entries.
>
> Opportunistically return an actual error code from the helper so that a
> future fix to handle pending signals can gracefully return -EINTR. Drop
> the function comment now that the return code is a stanard 0/-errno (and
stanard -> standard
The rest looks good to me.
Reviewed-by: Binbin Wu <binbin.wu@...ux.intel.com>
> because a future commit will add a proper lockdep assertion).
>
> Opportunistically drop a similarly stale comment for kvm_dirty_ring_push().
>
> Cc: Peter Xu <peterx@...hat.com>
> Cc: Yan Zhao <yan.y.zhao@...el.com>
> Cc: Maxim Levitsky <mlevitsk@...hat.com>
> Cc: Binbin Wu <binbin.wu@...ux.intel.com>
> Fixes: fb04a1eddb1a ("KVM: X86: Implement ring-based dirty memory tracking")
> Reviewed-by: James Houghton <jthoughton@...gle.com>
> Signed-off-by: Sean Christopherson <seanjc@...gle.com>
> ---
> include/linux/kvm_dirty_ring.h | 18 +++++-------------
> virt/kvm/dirty_ring.c | 10 +++++-----
> virt/kvm/kvm_main.c | 9 ++++++---
> 3 files changed, 16 insertions(+), 21 deletions(-)
>
> diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h
> index da4d9b5f58f1..eb10d87adf7d 100644
> --- a/include/linux/kvm_dirty_ring.h
> +++ b/include/linux/kvm_dirty_ring.h
> @@ -49,9 +49,10 @@ static inline int kvm_dirty_ring_alloc(struct kvm *kvm, struct kvm_dirty_ring *r
> }
>
> static inline int kvm_dirty_ring_reset(struct kvm *kvm,
> - struct kvm_dirty_ring *ring)
> + struct kvm_dirty_ring *ring,
> + int *nr_entries_reset)
> {
> - return 0;
> + return -ENOENT;
> }
>
> static inline void kvm_dirty_ring_push(struct kvm_vcpu *vcpu,
> @@ -77,17 +78,8 @@ bool kvm_arch_allow_write_without_running_vcpu(struct kvm *kvm);
> u32 kvm_dirty_ring_get_rsvd_entries(struct kvm *kvm);
> int kvm_dirty_ring_alloc(struct kvm *kvm, struct kvm_dirty_ring *ring,
> int index, u32 size);
> -
> -/*
> - * called with kvm->slots_lock held, returns the number of
> - * processed pages.
> - */
> -int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring);
> -
> -/*
> - * returns =0: successfully pushed
> - * <0: unable to push, need to wait
> - */
> +int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring,
> + int *nr_entries_reset);
> void kvm_dirty_ring_push(struct kvm_vcpu *vcpu, u32 slot, u64 offset);
>
> bool kvm_dirty_ring_check_request(struct kvm_vcpu *vcpu);
> diff --git a/virt/kvm/dirty_ring.c b/virt/kvm/dirty_ring.c
> index d14ffc7513ee..77986f34eff8 100644
> --- a/virt/kvm/dirty_ring.c
> +++ b/virt/kvm/dirty_ring.c
> @@ -105,19 +105,19 @@ static inline bool kvm_dirty_gfn_harvested(struct kvm_dirty_gfn *gfn)
> return smp_load_acquire(&gfn->flags) & KVM_DIRTY_GFN_F_RESET;
> }
>
> -int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring)
> +int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring,
> + int *nr_entries_reset)
> {
> u32 cur_slot, next_slot;
> u64 cur_offset, next_offset;
> unsigned long mask;
> - int count = 0;
> struct kvm_dirty_gfn *entry;
> bool first_round = true;
>
> /* This is only needed to make compilers happy */
> cur_slot = cur_offset = mask = 0;
>
> - while (true) {
> + while (likely((*nr_entries_reset) < INT_MAX)) {
> entry = &ring->dirty_gfns[ring->reset_index & (ring->size - 1)];
>
> if (!kvm_dirty_gfn_harvested(entry))
> @@ -130,7 +130,7 @@ int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring)
> kvm_dirty_gfn_set_invalid(entry);
>
> ring->reset_index++;
> - count++;
> + (*nr_entries_reset)++;
> /*
> * Try to coalesce the reset operations when the guest is
> * scanning pages in the same slot.
> @@ -167,7 +167,7 @@ int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring)
>
> trace_kvm_dirty_ring_reset(ring);
>
> - return count;
> + return 0;
> }
>
> void kvm_dirty_ring_push(struct kvm_vcpu *vcpu, u32 slot, u64 offset)
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index b24db92e98f3..571688507204 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -4903,15 +4903,18 @@ static int kvm_vm_ioctl_reset_dirty_pages(struct kvm *kvm)
> {
> unsigned long i;
> struct kvm_vcpu *vcpu;
> - int cleared = 0;
> + int cleared = 0, r;
>
> if (!kvm->dirty_ring_size)
> return -EINVAL;
>
> mutex_lock(&kvm->slots_lock);
>
> - kvm_for_each_vcpu(i, vcpu, kvm)
> - cleared += kvm_dirty_ring_reset(vcpu->kvm, &vcpu->dirty_ring);
> + kvm_for_each_vcpu(i, vcpu, kvm) {
> + r = kvm_dirty_ring_reset(vcpu->kvm, &vcpu->dirty_ring, &cleared);
> + if (r)
> + break;
> + }
>
> mutex_unlock(&kvm->slots_lock);
>
Powered by blists - more mailing lists