[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CADrL8HWx-_1uVdfNyz2x2iEXEo4dJW0f_MK21NEQ=HLYfpROKg@mail.gmail.com>
Date: Mon, 27 Jan 2025 13:42:32 -0800
From: James Houghton <jthoughton@...gle.com>
To: Sean Christopherson <seanjc@...gle.com>
Cc: Paolo Bonzini <pbonzini@...hat.com>, David Matlack <dmatlack@...gle.com>,
David Rientjes <rientjes@...gle.com>, Marc Zyngier <maz@...nel.org>,
Oliver Upton <oliver.upton@...ux.dev>, Wei Xu <weixugc@...gle.com>, Yu Zhao <yuzhao@...gle.com>,
Axel Rasmussen <axelrasmussen@...gle.com>, kvm@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH v8 08/11] KVM: x86/mmu: Add infrastructure to allow
walking rmaps outside of mmu_lock
On Fri, Jan 10, 2025 at 3:19 PM Sean Christopherson <seanjc@...gle.com> wrote:
>
> On Tue, Nov 05, 2024, James Houghton wrote:
> > +static unsigned long kvm_rmap_lock(struct kvm_rmap_head *rmap_head)
> > +{
> > + unsigned long old_val, new_val;
> > +
> > + /*
> > + * Elide the lock if the rmap is empty, as lockless walkers (read-only
> > + * mode) don't need to (and can't) walk an empty rmap, nor can they add
> > + * entries to the rmap. I.e. the only paths that process empty rmaps
> > + * do so while holding mmu_lock for write, and are mutually exclusive.
> > + */
> > + old_val = atomic_long_read(&rmap_head->val);
> > + if (!old_val)
> > + return 0;
> > +
> > + do {
> > + /*
> > + * If the rmap is locked, wait for it to be unlocked before
> > + * trying acquire the lock, e.g. to bounce the cache line.
> > + */
> > + while (old_val & KVM_RMAP_LOCKED) {
> > + old_val = atomic_long_read(&rmap_head->val);
> > + cpu_relax();
> > + }
>
> As Lai Jiangshan pointed out[1][2], this should PAUSE first, then re-read the SPTE,
> and KVM needs to disable preemption while holding the lock, because this is nothing
> more than a rudimentary spinlock.
Ah! Sorry for missing this.
>
> [1] https://lore.kernel.org/all/ZrooozABEWSnwzxh@google.com
> [2] https://lore.kernel.org/all/Zrt5eNArfQA7x1qj@google.com
>
> I think this?
>
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 1a0950b77126..9dac1bbb77d4 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -873,6 +873,8 @@ static unsigned long __kvm_rmap_lock(struct kvm_rmap_head *rmap_head)
> {
> unsigned long old_val, new_val;
>
> + lockdep_assert_preemption_disabled();
> +
> /*
> * Elide the lock if the rmap is empty, as lockless walkers (read-only
> * mode) don't need to (and can't) walk an empty rmap, nor can they add
> @@ -889,8 +891,8 @@ static unsigned long __kvm_rmap_lock(struct kvm_rmap_head *rmap_head)
> * trying acquire the lock, e.g. to bounce the cache line.
> */
> while (old_val & KVM_RMAP_LOCKED) {
> - old_val = atomic_long_read(&rmap_head->val);
> cpu_relax();
> + old_val = atomic_long_read(&rmap_head->val);
> }
>
> /*
> @@ -931,6 +933,8 @@ static unsigned long kvm_rmap_lock(struct kvm *kvm,
> static void kvm_rmap_unlock(struct kvm_rmap_head *rmap_head,
> unsigned long new_val)
> {
> + lockdep_assert_held_write(&kvm->mmu_lock);
> +
> KVM_MMU_WARN_ON(new_val & KVM_RMAP_LOCKED);
> /*
> * Ensure that all accesses to the rmap have completed
> @@ -948,12 +952,21 @@ static unsigned long kvm_rmap_get(struct kvm_rmap_head *rmap_head)
>
> /*
> * If mmu_lock isn't held, rmaps can only locked in read-only mode. The actual
> - * locking is the same, but the caller is disallowed from modifying the rmap,
> - * and so the unlock flow is a nop if the rmap is/was empty.
> + * locking is the same, but preemption needs to be manually disabled (because
> + * a spinlock isn't already held) and the caller is disallowed from modifying
> + * the rmap, and so the unlock flow is a nop if the rmap is/was empty. Note,
> + * preemption must be disable *before* acquiring the bitlock. If the rmap is
> + * empty, i.e. isn't truly locked, immediately re-enable preemption.
> */
> static unsigned long kvm_rmap_lock_readonly(struct kvm_rmap_head *rmap_head)
> {
> - return __kvm_rmap_lock(rmap_head);
> + unsigned rmap_val;
> + preempt_disable();
> +
> + rmap_val = __kvm_rmap_lock(rmap_head);
> + if (!rmap_val)
> + preempt_enable();
> + return rmap_val;
> }
>
> static void kvm_rmap_unlock_readonly(struct kvm_rmap_head *rmap_head,
> @@ -964,6 +977,7 @@ static void kvm_rmap_unlock_readonly(struct kvm_rmap_head *rmap_head,
>
> KVM_MMU_WARN_ON(old_val != kvm_rmap_get(rmap_head));
> atomic_long_set(&rmap_head->val, old_val);
> + preempt_enable();
> }
>
> /*
I don't see anything wrong with these changes. Thanks! Applied.
Powered by blists - more mailing lists