[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <6f76183f-a903-47fd-8c84-0d9892632fca@amd.com>
Date: Fri, 11 Apr 2025 16:27:25 +0530
From: "Arun Kodilkar, Sairaj" <sarunkod@....com>
To: Sean Christopherson <seanjc@...gle.com>, Paolo Bonzini
<pbonzini@...hat.com>, Joerg Roedel <joro@...tes.org>, David Woodhouse
<dwmw2@...radead.org>, Lu Baolu <baolu.lu@...ux.intel.com>
CC: <kvm@...r.kernel.org>, <iommu@...ts.linux.dev>,
<linux-kernel@...r.kernel.org>, Maxim Levitsky <mlevitsk@...hat.com>, "Joao
Martins" <joao.m.martins@...cle.com>, David Matlack <dmatlack@...gle.com>,
Naveen N Rao <naveen.rao@....com>, Vasant Hegde <vasant.hegde@....com>
Subject: Re: [PATCH 08/67] KVM: x86: Pass new routing entries and irqfd when
updating IRTEs
On 4/5/2025 1:08 AM, Sean Christopherson wrote:
> When updating IRTEs in response to a GSI routing or IRQ bypass change,
> pass the new/current routing information along with the associated irqfd.
> This will allow KVM x86 to harden, simplify, and deduplicate its code.
>
> Since adding/removing a bypass producer is now conveniently protected with
> irqfds.lock, i.e. can't run concurrently with kvm_irq_routing_update(),
> use the routing information cached in the irqfd instead of looking up
> the information in the current GSI routing tables.
>
> Opportunistically convert an existing printk() to pr_info() and put its
> string onto a single line (old code that strictly adhered to 80 chars).
>
> Signed-off-by: Sean Christopherson <seanjc@...gle.com>
> ---
> arch/x86/include/asm/kvm_host.h | 6 ++++--
> arch/x86/kvm/svm/avic.c | 18 +++++++----------
> arch/x86/kvm/svm/svm.h | 5 +++--
> arch/x86/kvm/vmx/posted_intr.c | 19 ++++++++---------
> arch/x86/kvm/vmx/posted_intr.h | 8 ++++++--
> arch/x86/kvm/x86.c | 36 ++++++++++++++++++---------------
> include/linux/kvm_host.h | 7 +++++--
> virt/kvm/eventfd.c | 11 +++++-----
> 8 files changed, 58 insertions(+), 52 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 6e8be274c089..54f3cf73329b 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -294,6 +294,7 @@ enum x86_intercept_stage;
> */
> #define KVM_APIC_PV_EOI_PENDING 1
>
> +struct kvm_kernel_irqfd;
> struct kvm_kernel_irq_routing_entry;
>
> /*
> @@ -1828,8 +1829,9 @@ struct kvm_x86_ops {
> void (*vcpu_blocking)(struct kvm_vcpu *vcpu);
> void (*vcpu_unblocking)(struct kvm_vcpu *vcpu);
>
> - int (*pi_update_irte)(struct kvm *kvm, unsigned int host_irq,
> - uint32_t guest_irq, bool set);
> + int (*pi_update_irte)(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
> + unsigned int host_irq, uint32_t guest_irq,
> + struct kvm_kernel_irq_routing_entry *new);
> void (*pi_start_assignment)(struct kvm *kvm);
> void (*apicv_pre_state_restore)(struct kvm_vcpu *vcpu);
> void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
> diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
> index 1708ea55125a..04dfd898ea8d 100644
> --- a/arch/x86/kvm/svm/avic.c
> +++ b/arch/x86/kvm/svm/avic.c
> @@ -18,6 +18,7 @@
> #include <linux/hashtable.h>
> #include <linux/amd-iommu.h>
> #include <linux/kvm_host.h>
> +#include <linux/kvm_irqfd.h>
>
> #include <asm/irq_remapping.h>
>
> @@ -885,21 +886,14 @@ get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
> return 0;
> }
>
> -/*
> - * avic_pi_update_irte - set IRTE for Posted-Interrupts
> - *
> - * @kvm: kvm
> - * @host_irq: host irq of the interrupt
> - * @guest_irq: gsi of the interrupt
> - * @set: set or unset PI
> - * returns 0 on success, < 0 on failure
> - */
> -int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
> - uint32_t guest_irq, bool set)
> +int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
> + unsigned int host_irq, uint32_t guest_irq,
> + struct kvm_kernel_irq_routing_entry *new)
> {
> struct kvm_kernel_irq_routing_entry *e;
> struct kvm_irq_routing_table *irq_rt;
> bool enable_remapped_mode = true;
> + bool set = !!new;
> int idx, ret = 0;
>
> if (!kvm_arch_has_assigned_device(kvm) || !kvm_arch_has_irq_bypass())
> @@ -925,6 +919,8 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
> if (e->type != KVM_IRQ_ROUTING_MSI)
> continue;
>
> + WARN_ON_ONCE(new && memcmp(e, new, sizeof(*new)));
> +
>
Hi Sean,
In kvm_irq_routing_update() function, its possible that there are
multiple entries in the `kvm_irq_routing_table`, and `irqfd_update()`
ends up setting up the new entry type to 0 instead of copying the entry.
if (n_entries == 1)
irqfd->irq_entry = *e;
else
irqfd->irq_entry.type = 0;
Since irqfd_update() did not copy the entry to irqfd->entries, the "new"
will not match entry "e" obtained from irq_rt, which can trigger a false
WARN_ON.
Let me know if I am missing something here.
Regards
Sairaj Kodilkar
> /**
> * Here, we setup with legacy mode in the following cases:
> * 1. When cannot target interrupt to a specific vcpu.
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index d4490eaed55d..294d5594c724 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -731,8 +731,9 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
> void avic_vcpu_put(struct kvm_vcpu *vcpu);
> void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu);
> void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu);
> -int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
> - uint32_t guest_irq, bool set);
> +int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
> + unsigned int host_irq, uint32_t guest_irq,
> + struct kvm_kernel_irq_routing_entry *new);
> void avic_vcpu_blocking(struct kvm_vcpu *vcpu);
> void avic_vcpu_unblocking(struct kvm_vcpu *vcpu);
> void avic_ring_doorbell(struct kvm_vcpu *vcpu);
> diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
> index 78ba3d638fe8..1b6b655a2b8a 100644
> --- a/arch/x86/kvm/vmx/posted_intr.c
> +++ b/arch/x86/kvm/vmx/posted_intr.c
> @@ -2,6 +2,7 @@
> #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
>
> #include <linux/kvm_host.h>
> +#include <linux/kvm_irqfd.h>
>
> #include <asm/irq_remapping.h>
> #include <asm/cpu.h>
> @@ -259,17 +260,9 @@ void vmx_pi_start_assignment(struct kvm *kvm)
> kvm_make_all_cpus_request(kvm, KVM_REQ_UNBLOCK);
> }
>
> -/*
> - * vmx_pi_update_irte - set IRTE for Posted-Interrupts
> - *
> - * @kvm: kvm
> - * @host_irq: host irq of the interrupt
> - * @guest_irq: gsi of the interrupt
> - * @set: set or unset PI
> - * returns 0 on success, < 0 on failure
> - */
> -int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
> - uint32_t guest_irq, bool set)
> +int vmx_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
> + unsigned int host_irq, uint32_t guest_irq,
> + struct kvm_kernel_irq_routing_entry *new)
> {
> struct kvm_kernel_irq_routing_entry *e;
> struct kvm_irq_routing_table *irq_rt;
> @@ -277,6 +270,7 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
> struct kvm_lapic_irq irq;
> struct kvm_vcpu *vcpu;
> struct vcpu_data vcpu_info;
> + bool set = !!new;
> int idx, ret = 0;
>
> if (!vmx_can_use_vtd_pi(kvm))
> @@ -294,6 +288,9 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
> hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
> if (e->type != KVM_IRQ_ROUTING_MSI)
> continue;
> +
> + WARN_ON_ONCE(new && memcmp(e, new, sizeof(*new)));
> +
> /*
> * VT-d PI cannot support posting multicast/broadcast
> * interrupts to a vCPU, we still use interrupt remapping
> diff --git a/arch/x86/kvm/vmx/posted_intr.h b/arch/x86/kvm/vmx/posted_intr.h
> index ad9116a99bcc..a586d6aaf862 100644
> --- a/arch/x86/kvm/vmx/posted_intr.h
> +++ b/arch/x86/kvm/vmx/posted_intr.h
> @@ -3,6 +3,9 @@
> #define __KVM_X86_VMX_POSTED_INTR_H
>
> #include <linux/bitmap.h>
> +#include <linux/find.h>
> +#include <linux/kvm_host.h>
> +
> #include <asm/posted_intr.h>
>
> void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu);
> @@ -10,8 +13,9 @@ void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu);
> void pi_wakeup_handler(void);
> void __init pi_init_cpu(int cpu);
> bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu);
> -int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
> - uint32_t guest_irq, bool set);
> +int vmx_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
> + unsigned int host_irq, uint32_t guest_irq,
> + struct kvm_kernel_irq_routing_entry *new);
> void vmx_pi_start_assignment(struct kvm *kvm);
>
> static inline int pi_find_highest_vector(struct pi_desc *pi_desc)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index dcc173852dc5..23376fcd928c 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -13570,31 +13570,31 @@ int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
> struct kvm_kernel_irqfd *irqfd =
> container_of(cons, struct kvm_kernel_irqfd, consumer);
> struct kvm *kvm = irqfd->kvm;
> - int ret;
> + int ret = 0;
>
> kvm_arch_start_assignment(irqfd->kvm);
>
> spin_lock_irq(&kvm->irqfds.lock);
> irqfd->producer = prod;
>
> - ret = kvm_x86_call(pi_update_irte)(irqfd->kvm,
> - prod->irq, irqfd->gsi, 1);
> - if (ret)
> - kvm_arch_end_assignment(irqfd->kvm);
> -
> + if (irqfd->irq_entry.type == KVM_IRQ_ROUTING_MSI) {
> + ret = kvm_x86_call(pi_update_irte)(irqfd, irqfd->kvm, prod->irq,
> + irqfd->gsi, &irqfd->irq_entry);
> + if (ret)
> + kvm_arch_end_assignment(irqfd->kvm);
> + }
> spin_unlock_irq(&kvm->irqfds.lock);
>
> -
> return ret;
> }
>
> void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
> struct irq_bypass_producer *prod)
> {
> - int ret;
> struct kvm_kernel_irqfd *irqfd =
> container_of(cons, struct kvm_kernel_irqfd, consumer);
> struct kvm *kvm = irqfd->kvm;
> + int ret;
>
> WARN_ON(irqfd->producer != prod);
>
> @@ -13607,11 +13607,13 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
> spin_lock_irq(&kvm->irqfds.lock);
> irqfd->producer = NULL;
>
> - ret = kvm_x86_call(pi_update_irte)(irqfd->kvm,
> - prod->irq, irqfd->gsi, 0);
> - if (ret)
> - printk(KERN_INFO "irq bypass consumer (token %p) unregistration"
> - " fails: %d\n", irqfd->consumer.token, ret);
> + if (irqfd->irq_entry.type == KVM_IRQ_ROUTING_MSI) {
> + ret = kvm_x86_call(pi_update_irte)(irqfd, irqfd->kvm, prod->irq,
> + irqfd->gsi, NULL);
> + if (ret)
> + pr_info("irq bypass consumer (token %p) unregistration fails: %d\n",
> + irqfd->consumer.token, ret);
> + }
>
> spin_unlock_irq(&kvm->irqfds.lock);
>
> @@ -13619,10 +13621,12 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
> kvm_arch_end_assignment(irqfd->kvm);
> }
>
> -int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
> - uint32_t guest_irq, bool set)
> +int kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd,
> + struct kvm_kernel_irq_routing_entry *old,
> + struct kvm_kernel_irq_routing_entry *new)
> {
> - return kvm_x86_call(pi_update_irte)(kvm, host_irq, guest_irq, set);
> + return kvm_x86_call(pi_update_irte)(irqfd, irqfd->kvm, irqfd->producer->irq,
> + irqfd->gsi, new);
> }
>
> bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old,
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 5438a1b446a6..2d9f3aeb766a 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -2383,6 +2383,8 @@ struct kvm_vcpu *kvm_get_running_vcpu(void);
> struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
>
> #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
> +struct kvm_kernel_irqfd;
> +
> bool kvm_arch_has_irq_bypass(void);
> int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *,
> struct irq_bypass_producer *);
> @@ -2390,8 +2392,9 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *,
> struct irq_bypass_producer *);
> void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *);
> void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *);
> -int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
> - uint32_t guest_irq, bool set);
> +int kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd,
> + struct kvm_kernel_irq_routing_entry *old,
> + struct kvm_kernel_irq_routing_entry *new);
> bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *,
> struct kvm_kernel_irq_routing_entry *);
> #endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */
> diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> index 249ba5b72e9b..ad71e3e4d1c3 100644
> --- a/virt/kvm/eventfd.c
> +++ b/virt/kvm/eventfd.c
> @@ -285,9 +285,9 @@ void __attribute__((weak)) kvm_arch_irq_bypass_start(
> {
> }
>
> -int __attribute__((weak)) kvm_arch_update_irqfd_routing(
> - struct kvm *kvm, unsigned int host_irq,
> - uint32_t guest_irq, bool set)
> +int __weak kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd,
> + struct kvm_kernel_irq_routing_entry *old,
> + struct kvm_kernel_irq_routing_entry *new)
> {
> return 0;
> }
> @@ -619,9 +619,8 @@ void kvm_irq_routing_update(struct kvm *kvm)
> #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
> if (irqfd->producer &&
> kvm_arch_irqfd_route_changed(&old, &irqfd->irq_entry)) {
> - int ret = kvm_arch_update_irqfd_routing(
> - irqfd->kvm, irqfd->producer->irq,
> - irqfd->gsi, 1);
> + int ret = kvm_arch_update_irqfd_routing(irqfd, &old, &irqfd->irq_entry);
> +
> WARN_ON(ret);
> }
> #endif
Powered by blists - more mailing lists