[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <87pmkcuvxt.fsf@redhat.com>
Date: Tue, 17 May 2022 15:31:58 +0200
From: Vitaly Kuznetsov <vkuznets@...hat.com>
To: Sean Christopherson <seanjc@...gle.com>
Cc: kvm@...r.kernel.org, Paolo Bonzini <pbonzini@...hat.com>,
Wanpeng Li <wanpengli@...cent.com>,
Jim Mattson <jmattson@...gle.com>,
Michael Kelley <mikelley@...rosoft.com>,
Siddharth Chandrasekaran <sidcha@...zon.de>,
linux-hyperv@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v3 02/34] KVM: x86: hyper-v: Introduce TLB flush ring
Sean Christopherson <seanjc@...gle.com> writes:
> On Thu, Apr 14, 2022, Vitaly Kuznetsov wrote:
>> To allow flushing individual GVAs instead of always flushing the whole
>> VPID a per-vCPU structure to pass the requests is needed. Introduce a
>> simple ring write-locked structure to hold two types of entries:
>> individual GVA (GFN + up to 4095 following GFNs in the lower 12 bits)
>> and 'flush all'.
>>
>> The queuing rule is: if there's not enough space on the ring to put
>> the request and leave at least 1 entry for 'flush all' - put 'flush
>> all' entry.
>>
>> The size of the ring is arbitrary set to '16'.
>>
>> Note, kvm_hv_flush_tlb() only queues 'flush all' entries for now so
>> there's very small functional change but the infrastructure is
>> prepared to handle individual GVA flush requests.
>>
>> Signed-off-by: Vitaly Kuznetsov <vkuznets@...hat.com>
>> ---
>> arch/x86/include/asm/kvm_host.h | 16 +++++++
>> arch/x86/kvm/hyperv.c | 83 +++++++++++++++++++++++++++++++++
>> arch/x86/kvm/hyperv.h | 13 ++++++
>> arch/x86/kvm/x86.c | 5 +-
>> arch/x86/kvm/x86.h | 1 +
>> 5 files changed, 116 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>> index 1de3ad9308d8..b4dd2ff61658 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -578,6 +578,20 @@ struct kvm_vcpu_hv_synic {
>> bool dont_zero_synic_pages;
>> };
>>
>> +#define KVM_HV_TLB_FLUSH_RING_SIZE (16)
>> +
>> +struct kvm_vcpu_hv_tlb_flush_entry {
>> + u64 addr;
>
> "addr" misleading, this is overloaded to be both the virtual address and the count.
> I think we make it a moot point, but it led me astray in thinkin we could use the
> lower 12 bits for flags... until I realized those bits are already in use.
>
>> + u64 flush_all:1;
>> + u64 pad:63;
>
> This is rather odd, why not just use a bool?
My initial plan was to eventually put more flags here, i.e. there are
two additional flags which we don't currently handle:
HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES (as we don't actually look at
HV_ADDRESS_SPACE_ID)
HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY
> But why even have a "flush_all" field, can't we just use a magic value
> for write_idx to indicate "flush_all"? E.g. either an explicit #define
> or -1.
Sure, a magic value would do too and will allow us to make 'struct
kvm_vcpu_hv_tlb_flush_entry' 8 bytes instead of 16 (for the time being
as if we are to add HV_ADDRESS_SPACE_ID/additional flags the net win is
going to be zero).
>
> Writers set write_idx to -1 to indicate "flush all", vCPU/reader goes straight
> to "flush all" if write_idx is -1/invalid. That way, future writes can simply do
> nothing until read_idx == write_idx, and the vCPU/reader avoids unnecessary flushes
> if there's a "flush all" pending and other valid entries in the ring.
>
> And it allows deferring the "flush all" until the ring is truly full (unless there's
> an off-by-one / wraparound edge case I'm missing, which is likely...).
Thanks for the patch! I am, however, going to look at Maxim's suggestion
to use 'kfifo' to avoid all these uncertainties, funky locking etc. At
first glance it has everything I need here.
>
> ---
> arch/x86/include/asm/kvm_host.h | 8 +-----
> arch/x86/kvm/hyperv.c | 47 +++++++++++++--------------------
> 2 files changed, 19 insertions(+), 36 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index b6b9a71a4591..bb45cc383ce4 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -605,16 +605,10 @@ enum hv_tlb_flush_rings {
> HV_NR_TLB_FLUSH_RINGS,
> };
>
> -struct kvm_vcpu_hv_tlb_flush_entry {
> - u64 addr;
> - u64 flush_all:1;
> - u64 pad:63;
> -};
> -
> struct kvm_vcpu_hv_tlb_flush_ring {
> int read_idx, write_idx;
> spinlock_t write_lock;
> - struct kvm_vcpu_hv_tlb_flush_entry entries[KVM_HV_TLB_FLUSH_RING_SIZE];
> + u64 entries[KVM_HV_TLB_FLUSH_RING_SIZE];
> };
>
> /* Hyper-V per vcpu emulation context */
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index 1d6927538bc7..56f06cf85282 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -1837,10 +1837,13 @@ static int kvm_hv_get_tlb_flush_entries(struct kvm *kvm, struct kvm_hv_hcall *hc
> static inline int hv_tlb_flush_ring_free(struct kvm_vcpu_hv *hv_vcpu,
> int read_idx, int write_idx)
> {
> + if (write_idx < 0)
> + return 0;
> +
> if (write_idx >= read_idx)
> - return KVM_HV_TLB_FLUSH_RING_SIZE - (write_idx - read_idx) - 1;
> + return KVM_HV_TLB_FLUSH_RING_SIZE - (write_idx - read_idx);
>
> - return read_idx - write_idx - 1;
> + return read_idx - write_idx;
> }
>
> static void hv_tlb_flush_ring_enqueue(struct kvm_vcpu *vcpu,
> @@ -1869,6 +1872,9 @@ static void hv_tlb_flush_ring_enqueue(struct kvm_vcpu *vcpu,
> */
> write_idx = tlb_flush_ring->write_idx;
>
> + if (write_idx < 0 && read_idx == write_idx)
> + read_idx = write_idx = 0;
> +
> ring_free = hv_tlb_flush_ring_free(hv_vcpu, read_idx, write_idx);
> /* Full ring always contains 'flush all' entry */
> if (!ring_free)
> @@ -1879,21 +1885,13 @@ static void hv_tlb_flush_ring_enqueue(struct kvm_vcpu *vcpu,
> * entry in case another request comes in. In case there's not enough
> * space, just put 'flush all' entry there.
> */
> - if (!count || count >= ring_free - 1 || !entries) {
> - tlb_flush_ring->entries[write_idx].addr = 0;
> - tlb_flush_ring->entries[write_idx].flush_all = 1;
> - /*
> - * Advance write index only after filling in the entry to
> - * synchronize with lockless reader.
> - */
> - smp_wmb();
> - tlb_flush_ring->write_idx = (write_idx + 1) % KVM_HV_TLB_FLUSH_RING_SIZE;
> + if (!count || count > ring_free - 1 || !entries) {
> + tlb_flush_ring->write_idx = -1;
> goto out_unlock;
> }
>
> for (i = 0; i < count; i++) {
> - tlb_flush_ring->entries[write_idx].addr = entries[i];
> - tlb_flush_ring->entries[write_idx].flush_all = 0;
> + tlb_flush_ring->entries[write_idx] = entries[i];
> write_idx = (write_idx + 1) % KVM_HV_TLB_FLUSH_RING_SIZE;
> }
> /*
> @@ -1911,7 +1909,6 @@ void kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
> {
> struct kvm_vcpu_hv_tlb_flush_ring *tlb_flush_ring;
> struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
> - struct kvm_vcpu_hv_tlb_flush_entry *entry;
> int read_idx, write_idx;
> u64 address;
> u32 count;
> @@ -1940,26 +1937,18 @@ void kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
> /* Pairs with smp_wmb() in hv_tlb_flush_ring_enqueue() */
> smp_rmb();
>
> + if (write_idx < 0) {
> + kvm_vcpu_flush_tlb_guest(vcpu);
> + goto out_empty_ring;
> + }
> +
> for (i = read_idx; i != write_idx; i = (i + 1) % KVM_HV_TLB_FLUSH_RING_SIZE) {
> - entry = &tlb_flush_ring->entries[i];
> -
> - if (entry->flush_all)
> - goto out_flush_all;
> -
> - /*
> - * Lower 12 bits of 'address' encode the number of additional
> - * pages to flush.
> - */
> - address = entry->addr & PAGE_MASK;
> - count = (entry->addr & ~PAGE_MASK) + 1;
> + address = tlb_flush_ring->entries[i] & PAGE_MASK;
> + count = (tlb_flush_ring->entries[i] & ~PAGE_MASK) + 1;
> for (j = 0; j < count; j++)
> static_call(kvm_x86_flush_tlb_gva)(vcpu, address + j * PAGE_SIZE);
> }
> ++vcpu->stat.tlb_flush;
> - goto out_empty_ring;
> -
> -out_flush_all:
> - kvm_vcpu_flush_tlb_guest(vcpu);
>
> out_empty_ring:
> tlb_flush_ring->read_idx = write_idx;
>
> base-commit: 62592c7c742ae78eb1f1005a63965ece19e6effe
> --
>
--
Vitaly
Powered by blists - more mailing lists