[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CABayD+dHLXwQK3YdwVi6raf+CF3XOaAiAG+tfDYPiZFzqeVXpQ@mail.gmail.com>
Date: Fri, 2 Aug 2024 13:28:56 -0700
From: Steve Rutherford <srutherford@...gle.com>
To: Sean Christopherson <seanjc@...gle.com>
Cc: Marc Zyngier <maz@...nel.org>, Oliver Upton <oliver.upton@...ux.dev>,
Paolo Bonzini <pbonzini@...hat.com>, linux-arm-kernel@...ts.infradead.org,
kvmarm@...ts.linux.dev, kvm@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 2/2] KVM: Protect vCPU's "last run PID" with rwlock, not RCU
On Fri, Aug 2, 2024 at 1:01 PM Sean Christopherson <seanjc@...gle.com> wrote:
>
> To avoid jitter on KVM_RUN due to synchronize_rcu(), use a rwlock instead
> of RCU to protect vcpu->pid, a.k.a. the pid of the task last used to a
> vCPU. When userspace is doing M:N scheduling of tasks to vCPUs, e.g. to
> run SEV migration helper vCPUs during post-copy, the synchronize_rcu()
> needed to change the PID associated with the vCPU can stall for hundreds
> of milliseconds, which is problematic for latency sensitive post-copy
> operations.
>
> In the directed yield path, do not acquire the lock if it's contended,
> i.e. if the associated PID is changing, as that means the vCPU's task is
> already running.
>
> Reported-by: Steve Rutherford <srutherford@...gle.com>
> Signed-off-by: Sean Christopherson <seanjc@...gle.com>
> ---
> arch/arm64/include/asm/kvm_host.h | 2 +-
> include/linux/kvm_host.h | 3 ++-
> virt/kvm/kvm_main.c | 32 +++++++++++++++++--------------
> 3 files changed, 21 insertions(+), 16 deletions(-)
>
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index a33f5996ca9f..7199cb014806 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -1115,7 +1115,7 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
> void kvm_arm_halt_guest(struct kvm *kvm);
> void kvm_arm_resume_guest(struct kvm *kvm);
>
> -#define vcpu_has_run_once(vcpu) !!rcu_access_pointer((vcpu)->pid)
> +#define vcpu_has_run_once(vcpu) (!!READ_ONCE((vcpu)->pid))
>
> #ifndef __KVM_NVHE_HYPERVISOR__
> #define kvm_call_hyp_nvhe(f, ...) \
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 689e8be873a7..d6f4e8b2b44c 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -342,7 +342,8 @@ struct kvm_vcpu {
> #ifndef __KVM_HAVE_ARCH_WQP
> struct rcuwait wait;
> #endif
> - struct pid __rcu *pid;
> + struct pid *pid;
> + rwlock_t pid_lock;
> int sigset_active;
> sigset_t sigset;
> unsigned int halt_poll_ns;
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 91048a7ad3be..fabffd85fa34 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -486,6 +486,7 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
> vcpu->kvm = kvm;
> vcpu->vcpu_id = id;
> vcpu->pid = NULL;
> + rwlock_init(&vcpu->pid_lock);
> #ifndef __KVM_HAVE_ARCH_WQP
> rcuwait_init(&vcpu->wait);
> #endif
> @@ -513,7 +514,7 @@ static void kvm_vcpu_destroy(struct kvm_vcpu *vcpu)
> * the vcpu->pid pointer, and at destruction time all file descriptors
> * are already gone.
> */
> - put_pid(rcu_dereference_protected(vcpu->pid, 1));
> + put_pid(vcpu->pid);
>
> free_page((unsigned long)vcpu->run);
> kmem_cache_free(kvm_vcpu_cache, vcpu);
> @@ -3930,15 +3931,17 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_kick);
>
> int kvm_vcpu_yield_to(struct kvm_vcpu *target)
> {
> - struct pid *pid;
> struct task_struct *task = NULL;
> int ret;
>
> - rcu_read_lock();
> - pid = rcu_dereference(target->pid);
> - if (pid)
> - task = get_pid_task(pid, PIDTYPE_PID);
> - rcu_read_unlock();
> + if (!read_trylock(&target->pid_lock))
> + return 0;
> +
> + if (target->pid)
> + task = get_pid_task(target->pid, PIDTYPE_PID);
> +
> + read_unlock(&target->pid_lock);
> +
> if (!task)
> return 0;
> ret = yield_to(task, 1);
> @@ -4178,9 +4181,9 @@ static int vcpu_get_pid(void *data, u64 *val)
> {
> struct kvm_vcpu *vcpu = data;
>
> - rcu_read_lock();
> - *val = pid_nr(rcu_dereference(vcpu->pid));
> - rcu_read_unlock();
> + read_lock(&vcpu->pid_lock);
> + *val = pid_nr(vcpu->pid);
> + read_unlock(&vcpu->pid_lock);
> return 0;
> }
>
> @@ -4466,7 +4469,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
> r = -EINVAL;
> if (arg)
> goto out;
> - oldpid = rcu_access_pointer(vcpu->pid);
> + oldpid = vcpu->pid;
Overall this patch looks correct, but this spot took me a moment, and
I want to confirm. This skips the reader lock since writing only
happens just below, under the vcpu lock, and we've already taken that
lock?
> if (unlikely(oldpid != task_pid(current))) {
> /* The thread running this VCPU changed. */
> struct pid *newpid;
> @@ -4476,9 +4479,10 @@ static long kvm_vcpu_ioctl(struct file *filp,
> break;
>
> newpid = get_task_pid(current, PIDTYPE_PID);
> - rcu_assign_pointer(vcpu->pid, newpid);
> - if (oldpid)
> - synchronize_rcu();
> + write_lock(&vcpu->pid_lock);
> + vcpu->pid = newpid;
> + write_unlock(&vcpu->pid_lock);
> +
> put_pid(oldpid);
> }
> vcpu->wants_to_run = !READ_ONCE(vcpu->run->immediate_exit__unsafe);
> --
> 2.46.0.rc2.264.g509ed76dc8-goog
>
Powered by blists - more mailing lists