[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <d599b411-3f1e-c67e-dd39-ad4ef42764e7@redhat.com>
Date: Wed, 22 Jan 2020 16:44:45 +0100
From: Paolo Bonzini <pbonzini@...hat.com>
To: "Suthikulpanit, Suravee" <Suravee.Suthikulpanit@....com>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
"kvm@...r.kernel.org" <kvm@...r.kernel.org>,
"iommu@...ts.linux-foundation.org" <iommu@...ts.linux-foundation.org>
Cc: "joro@...tes.org" <joro@...tes.org>,
"rkrcmar@...hat.com" <rkrcmar@...hat.com>
Subject: Re: [PATCH] svm/avic: iommu/amd: Flush IOMMU IRT after update all
entries
On 20/03/19 09:14, Suthikulpanit, Suravee wrote:
> When AVIC is enabled and the VM has discrete device assignment,
> the interrupt remapping table (IRT) is used to keep track of which
> destination APIC ID the IOMMU will inject the device interrput to.
>
> This means every time a vcpu is blocked or context-switched (i.e.
> vcpu_blocking/unblocking() and vcpu_load/put()), the information
> in IRT must be updated and the IOMMU IRT flush command must be
> issued.
>
> The current implementation flushes IOMMU IRT every time an entry
> is modified. If the assigned device has large number of interrupts
> (hence large number of entries), this would add large amount of
> overhead to vcpu context-switch. Instead, this can be optmized by
> only flush IRT once per vcpu context-switch per device after all
> IRT entries are modified.
>
> The function amd_iommu_update_ga() is refactored to only update
> IRT entry, while the amd_iommu_sync_ga() is introduced to allow
> IRT flushing to be done separately.
>
> Cc: Joerg Roedel <joro@...tes.org>
> Cc: Radim Krčmář <rkrcmar@...hat.com>
> Cc: Paolo Bonzini <pbonzini@...hat.com>
> Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@....com>
> ---
> arch/x86/kvm/svm.c | 35 ++++++++++++++++++++++++++++++++++-
> drivers/iommu/amd_iommu.c | 20 +++++++++++++++++---
> include/linux/amd-iommu.h | 13 ++++++++++---
> 3 files changed, 61 insertions(+), 7 deletions(-)
I found this patch in my inbox... I'd rather avoid allocating 8k of RAM
per vCPU. Can you make it per-VM?
Paolo
> + /*
> + * Bitmap used to store PCI devid to sync
> + * AMD IOMMU interrupt remapping table
> + */
> + unsigned long *avic_devid_sync_bitmap;
> };
>
> /*
> @@ -1984,6 +1992,7 @@ static inline int
> avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
> {
> int ret = 0;
> + int devid = 0;
> unsigned long flags;
> struct amd_svm_iommu_ir *ir;
> struct vcpu_svm *svm = to_svm(vcpu);
> @@ -2001,9 +2010,21 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
> goto out;
>
> list_for_each_entry(ir, &svm->ir_list, node) {
> - ret = amd_iommu_update_ga(cpu, r, ir->data);
> + ret = amd_iommu_update_ga(cpu, r, ir->data, &devid);
> if (ret)
> break;
> + set_bit(devid, svm->avic_devid_sync_bitmap);
> + }
> +
> + /* Sync AMD IOMMU interrupt remapping table changes for each device. */
> + devid = find_next_bit(svm->avic_devid_sync_bitmap,
> + AVIC_DEVID_BITMAP_SIZE, 0);
> +
> + while (devid < AVIC_DEVID_BITMAP_SIZE) {
> + clear_bit(devid, svm->avic_devid_sync_bitmap);
> + ret = amd_iommu_sync_ga(devid);
> + devid = find_next_bit(svm->avic_devid_sync_bitmap,
> + AVIC_DEVID_BITMAP_SIZE, devid+1);
> }
> out:
> spin_unlock_irqrestore(&svm->ir_list_lock, flags);
> @@ -2107,6 +2128,13 @@ static int avic_init_vcpu(struct vcpu_svm *svm)
> INIT_LIST_HEAD(&svm->ir_list);
> spin_lock_init(&svm->ir_list_lock);
>
> + svm->avic_devid_sync_bitmap = (void *)__get_free_pages(
> + GFP_KERNEL | __GFP_ZERO,
> + get_order(AVIC_DEVID_BITMAP_SIZE/8));
> + if (svm->avic_devid_sync_bitmap == NULL)
> + ret = -ENOMEM;
> + memset(svm->avic_devid_sync_bitmap, 0, AVIC_DEVID_BITMAP_SIZE/8);
> +
> return ret;
> }
>
> @@ -2221,6 +2249,11 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
> __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
> __free_page(virt_to_page(svm->nested.hsave));
> __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
> +
> + free_pages((unsigned long)svm->avic_devid_sync_bitmap,
> + get_order(AVIC_DEVID_BITMAP_SIZE/8));
> + svm->avic_devid_sync_bitmap = NULL;
> +
> kvm_vcpu_uninit(vcpu);
> kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu);
> kmem_cache_free(kvm_vcpu_cache, svm);
> diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
> index 2a7b78bb98b4..637bcc9192e5 100644
> --- a/drivers/iommu/amd_iommu.c
> +++ b/drivers/iommu/amd_iommu.c
> @@ -4499,7 +4499,20 @@ int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
> return 0;
> }
>
> -int amd_iommu_update_ga(int cpu, bool is_run, void *data)
> +int amd_iommu_sync_ga(int devid)
> +{
> + struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
> +
> + if (!iommu)
> + return -ENODEV;
> +
> + iommu_flush_irt(iommu, devid);
> + iommu_completion_wait(iommu);
> + return 0;
> +}
> +EXPORT_SYMBOL(amd_iommu_sync_ga);
> +
> +int amd_iommu_update_ga(int cpu, bool is_run, void *data, int *id)
> {
> unsigned long flags;
> struct amd_iommu *iommu;
> @@ -4521,6 +4534,9 @@ int amd_iommu_update_ga(int cpu, bool is_run, void *data)
> if (!table)
> return -ENODEV;
>
> + if (id)
> + *id = devid;
> +
> raw_spin_lock_irqsave(&table->lock, flags);
>
> if (ref->lo.fields_vapic.guest_mode) {
> @@ -4536,8 +4552,6 @@ int amd_iommu_update_ga(int cpu, bool is_run, void *data)
>
> raw_spin_unlock_irqrestore(&table->lock, flags);
>
> - iommu_flush_irt(iommu, devid);
> - iommu_completion_wait(iommu);
> return 0;
> }
> EXPORT_SYMBOL(amd_iommu_update_ga);
> diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h
> index 09751d349963..b94d4b33dfd7 100644
> --- a/include/linux/amd-iommu.h
> +++ b/include/linux/amd-iommu.h
> @@ -193,8 +193,9 @@ static inline int amd_iommu_detect(void) { return -ENODEV; }
> /* IOMMU AVIC Function */
> extern int amd_iommu_register_ga_log_notifier(int (*notifier)(u32));
>
> -extern int
> -amd_iommu_update_ga(int cpu, bool is_run, void *data);
> +extern int amd_iommu_update_ga(int cpu, bool is_run, void *data, int *devid);
> +
> +extern int amd_iommu_sync_ga(int devid);
>
> #else /* defined(CONFIG_AMD_IOMMU) && defined(CONFIG_IRQ_REMAP) */
>
> @@ -205,7 +206,13 @@ amd_iommu_register_ga_log_notifier(int (*notifier)(u32))
> }
>
> static inline int
> -amd_iommu_update_ga(int cpu, bool is_run, void *data)
> +amd_iommu_update_ga(int cpu, bool is_run, void *data, int *devid)
> +{
> + return 0;
> +}
> +
> +static inline int
> +amd_iommu_sync_ga(int devid)
> {
> return 0;
> }
>
Powered by blists - more mailing lists