[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <95d6d0a68cb8a4277be0066c26b482e7554c7f26.camel@infradead.org>
Date: Mon, 14 Nov 2022 16:16:25 -0800
From: David Woodhouse <dwmw2@...radead.org>
To: "Durrant, Paul" <pdurrant@...zon.co.uk>,
Paolo Bonzini <pbonzini@...hat.com>,
Sean Christopherson <seanjc@...gle.com>
Cc: "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
"kvm@...r.kernel.org" <kvm@...r.kernel.org>,
"mhal@...x.co" <mhal@...x.co>,
"Kaya, Metin" <metikaya@...zon.co.uk>
Subject: Re: [PATCH 03/16] KVM: x86: set gfn-to-pfn cache length
consistently with VM word size
On Mon, 2022-11-14 at 11:27 +0000, Durrant, Paul wrote:
> > On Sun, 2022-11-13 at 13:32 +0000, David Woodhouse wrote:
> > > For the runstate area, I think we can live with using a gfn_to_hva
> > > cache instead, and writing via the userspace address (with appropriate
> > > atomicity for the RUNSTATE_runnable case as we have at the moment
> > > gating the refresh).
> >
> > Which mostly involves just reverting commit a795cd43c5b5 I think?
> >
> > IIRC the reason for that commit was mostly consistency with other
> > things that really *did* want to be switched to gpc.
>
> A straight reversion would re-introduce the page-spanning check in
> kvm_xen_vcpu_set_attr() but I think we can just leave that out.
That check matches the behaviour of kvm_gfn_to_hva_cache_init() which
deliberately clears ghc->memslot if it crosses a page. (Although I
don't see why; it only really needs to do that if it crosses over onto
a second *memslot*, surely?)
So we'd then hit this in kvm_xen_update_runstate_guest():
/* We made sure it fits in a single page */
BUG_ON(!ghc->memslot);
We'd need a fallback code path for the page-crossing (or memslot-
crossing) case, and the natural way to do that is to just use
kvm_write_guest(). In the RUNSTATE_runnable case where we can't sleep,
can we get away with using pagefault_disable() around a call to
kvm_write_guest()?
The worst case here is kind of awful; it's a compat guest with the
first half of its ->state_entry_time being on the first page, and the
second half of it being on the second page.
I'm playing with using a second GPC for the overrun onto the second
page. Debating if it's already too ugly to live before I even fix up
the actual copying part...
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 81114a376c4e..3fc08f416aa3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -647,6 +647,7 @@ struct kvm_vcpu_xen {
struct gfn_to_pfn_cache vcpu_info_cache;
struct gfn_to_pfn_cache vcpu_time_info_cache;
struct gfn_to_pfn_cache runstate_cache;
+ struct gfn_to_pfn_cache runstate2_cache;
u64 last_steal;
u64 runstate_entry_time;
u64 runstate_times[4];
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 4b8e9628fbf5..e297569bbc8d 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -201,35 +201,59 @@ static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state)
void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
{
struct kvm_vcpu_xen *vx = &v->arch.xen;
- struct gfn_to_pfn_cache *gpc = &vx->runstate_cache;
+ struct gfn_to_pfn_cache *gpc1 = &vx->runstate_cache;
+ struct gfn_to_pfn_cache *gpc2 = &vx->runstate2_cache;
uint64_t *user_times;
unsigned long flags;
- size_t user_len;
+ size_t user_len1, user_len2 = 0;
int *user_state;
kvm_xen_update_runstate(v, state);
- if (!vx->runstate_cache.active)
+ if (!gpc1->active)
return;
if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode)
- user_len = sizeof(struct vcpu_runstate_info);
+ user_len1 = sizeof(struct vcpu_runstate_info);
else
- user_len = sizeof(struct compat_vcpu_runstate_info);
+ user_len1 = sizeof(struct compat_vcpu_runstate_info);
- read_lock_irqsave(&gpc->lock, flags);
- while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa,
- user_len)) {
- read_unlock_irqrestore(&gpc->lock, flags);
+ if ((gpc1->gpa & ~PAGE_MASK) + user_len1 >= PAGE_SIZE) {
+ user_len2 = user_len1;
+ user_len1 = PAGE_SIZE - (gpc1->gpa & ~PAGE_MASK);
+ user_len2 -= user_len1;
+ }
+
+ retry:
+ read_lock_irqsave(&gpc1->lock, flags);
+ while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc1, gpc1->gpa,
+ user_len1)) {
+ read_unlock_irqrestore(&gpc1->lock, flags);
/* When invoked from kvm_sched_out() we cannot sleep */
if (state == RUNSTATE_runnable)
return;
- if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa, user_len))
+ if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc1, gpc1->gpa, user_len1))
return;
- read_lock_irqsave(&gpc->lock, flags);
+ read_lock_irqsave(&gpc1->lock, flags);
+ }
+ if (user_len2) {
+ read_lock(&gpc2->lock);
+ if (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc2, gpc2->gpa, user_len2)) {
+ read_unlock(&gpc2->lock);
+ read_unlock_irqrestore(&gpc1->lock, flags);
+
+ if (state == RUNSTATE_runnable)
+ return;
+
+ if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc2,
+ gpc2->gpa, user_len2))
+ return;
+
+ goto retry;
+ }
}
/*
@@ -584,23 +608,52 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
break;
- case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR:
+ case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR: {
+ size_t sz;
+
if (!sched_info_on()) {
r = -EOPNOTSUPP;
break;
}
if (data->u.gpa == GPA_INVALID) {
+ r = 0;
+ deactivate_out:
kvm_gpc_deactivate(vcpu->kvm,
&vcpu->arch.xen.runstate_cache);
- r = 0;
+ deactivate2_out:
+ kvm_gpc_deactivate(vcpu->kvm,
+ &vcpu->arch.xen.runstate2_cache);
break;
}
- r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate_cache,
- NULL, KVM_HOST_USES_PFN, data->u.gpa,
- sizeof(struct vcpu_runstate_info));
+ if (IS_ENABLED(CONFIG_64BIT) && vcpu->kvm->arch.xen.long_mode)
+ sz = sizeof(struct vcpu_runstate_info);
+ else
+ sz = sizeof(struct compat_vcpu_runstate_info);
+
+ /* Handle structures which cross a page boundary by using two GPCs */
+ if ((data->u.gpa & ~PAGE_MASK) + sz <= PAGE_SIZE) {
+ r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate_cache,
+ NULL, KVM_HOST_USES_PFN, data->u.gpa,
+ sizeof(struct vcpu_runstate_info));
+ goto deactivate2_out;
+ } else {
+ /* Map the end of the first page... */
+ r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate_cache,
+ NULL, KVM_HOST_USES_PFN, data->u.gpa,
+ PAGE_SIZE - (data->u.gpa & ~PAGE_MASK));
+ if (r)
+ goto deactivate2_out;
+ /* ... and the start of the second. */
+ sz -= PAGE_SIZE - (data->u.gpa & ~PAGE_MASK);
+ r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate2_cache,
+ NULL, KVM_HOST_USES_PFN,
+ (data->u.gpa + PAGE_SIZE) & PAGE_MASK, sz);
+ if (r)
+ goto deactivate_out;
+ }
break;
-
+ }
case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT:
if (!sched_info_on()) {
r = -EOPNOTSUPP;
@@ -1834,6 +1887,7 @@ void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu)
timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0);
kvm_gpc_init(&vcpu->arch.xen.runstate_cache);
+ kvm_gpc_init(&vcpu->arch.xen.runstate2_cache);
kvm_gpc_init(&vcpu->arch.xen.vcpu_info_cache);
kvm_gpc_init(&vcpu->arch.xen.vcpu_time_info_cache);
}
@@ -1844,6 +1898,7 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
kvm_xen_stop_timer(vcpu);
kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.runstate_cache);
+ kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.runstate2_cache);
kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache);
kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_time_info_cache);
Download attachment "smime.p7s" of type "application/pkcs7-signature" (5965 bytes)
Powered by blists - more mailing lists