lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <6f76183f-a903-47fd-8c84-0d9892632fca@amd.com>
Date: Fri, 11 Apr 2025 16:27:25 +0530
From: "Arun Kodilkar, Sairaj" <sarunkod@....com>
To: Sean Christopherson <seanjc@...gle.com>, Paolo Bonzini
	<pbonzini@...hat.com>, Joerg Roedel <joro@...tes.org>, David Woodhouse
	<dwmw2@...radead.org>, Lu Baolu <baolu.lu@...ux.intel.com>
CC: <kvm@...r.kernel.org>, <iommu@...ts.linux.dev>,
	<linux-kernel@...r.kernel.org>, Maxim Levitsky <mlevitsk@...hat.com>, "Joao
 Martins" <joao.m.martins@...cle.com>, David Matlack <dmatlack@...gle.com>,
	Naveen N Rao <naveen.rao@....com>, Vasant Hegde <vasant.hegde@....com>
Subject: Re: [PATCH 08/67] KVM: x86: Pass new routing entries and irqfd when
 updating IRTEs

On 4/5/2025 1:08 AM, Sean Christopherson wrote:
> When updating IRTEs in response to a GSI routing or IRQ bypass change,
> pass the new/current routing information along with the associated irqfd.
> This will allow KVM x86 to harden, simplify, and deduplicate its code.
> 
> Since adding/removing a bypass producer is now conveniently protected with
> irqfds.lock, i.e. can't run concurrently with kvm_irq_routing_update(),
> use the routing information cached in the irqfd instead of looking up
> the information in the current GSI routing tables.
> 
> Opportunistically convert an existing printk() to pr_info() and put its
> string onto a single line (old code that strictly adhered to 80 chars).
> 
> Signed-off-by: Sean Christopherson <seanjc@...gle.com>
> ---
>   arch/x86/include/asm/kvm_host.h |  6 ++++--
>   arch/x86/kvm/svm/avic.c         | 18 +++++++----------
>   arch/x86/kvm/svm/svm.h          |  5 +++--
>   arch/x86/kvm/vmx/posted_intr.c  | 19 ++++++++---------
>   arch/x86/kvm/vmx/posted_intr.h  |  8 ++++++--
>   arch/x86/kvm/x86.c              | 36 ++++++++++++++++++---------------
>   include/linux/kvm_host.h        |  7 +++++--
>   virt/kvm/eventfd.c              | 11 +++++-----
>   8 files changed, 58 insertions(+), 52 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 6e8be274c089..54f3cf73329b 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -294,6 +294,7 @@ enum x86_intercept_stage;
>    */
>   #define KVM_APIC_PV_EOI_PENDING	1
>   
> +struct kvm_kernel_irqfd;
>   struct kvm_kernel_irq_routing_entry;
>   
>   /*
> @@ -1828,8 +1829,9 @@ struct kvm_x86_ops {
>   	void (*vcpu_blocking)(struct kvm_vcpu *vcpu);
>   	void (*vcpu_unblocking)(struct kvm_vcpu *vcpu);
>   
> -	int (*pi_update_irte)(struct kvm *kvm, unsigned int host_irq,
> -			      uint32_t guest_irq, bool set);
> +	int (*pi_update_irte)(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
> +			      unsigned int host_irq, uint32_t guest_irq,
> +			      struct kvm_kernel_irq_routing_entry *new);
>   	void (*pi_start_assignment)(struct kvm *kvm);
>   	void (*apicv_pre_state_restore)(struct kvm_vcpu *vcpu);
>   	void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
> diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
> index 1708ea55125a..04dfd898ea8d 100644
> --- a/arch/x86/kvm/svm/avic.c
> +++ b/arch/x86/kvm/svm/avic.c
> @@ -18,6 +18,7 @@
>   #include <linux/hashtable.h>
>   #include <linux/amd-iommu.h>
>   #include <linux/kvm_host.h>
> +#include <linux/kvm_irqfd.h>
>   
>   #include <asm/irq_remapping.h>
>   
> @@ -885,21 +886,14 @@ get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
>   	return 0;
>   }
>   
> -/*
> - * avic_pi_update_irte - set IRTE for Posted-Interrupts
> - *
> - * @kvm: kvm
> - * @host_irq: host irq of the interrupt
> - * @guest_irq: gsi of the interrupt
> - * @set: set or unset PI
> - * returns 0 on success, < 0 on failure
> - */
> -int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
> -			uint32_t guest_irq, bool set)
> +int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
> +			unsigned int host_irq, uint32_t guest_irq,
> +			struct kvm_kernel_irq_routing_entry *new)
>   {
>   	struct kvm_kernel_irq_routing_entry *e;
>   	struct kvm_irq_routing_table *irq_rt;
>   	bool enable_remapped_mode = true;
> +	bool set = !!new;
>   	int idx, ret = 0;
>   
>   	if (!kvm_arch_has_assigned_device(kvm) || !kvm_arch_has_irq_bypass())
> @@ -925,6 +919,8 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
>   		if (e->type != KVM_IRQ_ROUTING_MSI)
>   			continue;
>   
> +		WARN_ON_ONCE(new && memcmp(e, new, sizeof(*new)));
> +
> 

Hi Sean,

In kvm_irq_routing_update() function, its possible that there are
multiple entries in the `kvm_irq_routing_table`, and `irqfd_update()`
ends up setting up the new entry type to 0 instead of copying the entry.

if (n_entries == 1)
     irqfd->irq_entry = *e;
else
     irqfd->irq_entry.type = 0;

Since irqfd_update() did not copy the entry to irqfd->entries, the "new" 
will not match entry "e" obtained from irq_rt, which can trigger a false 
WARN_ON.

Let me know if I am missing something here.

Regards
Sairaj Kodilkar

>  		/**
>   		 * Here, we setup with legacy mode in the following cases:
>   		 * 1. When cannot target interrupt to a specific vcpu.
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index d4490eaed55d..294d5594c724 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -731,8 +731,9 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
>   void avic_vcpu_put(struct kvm_vcpu *vcpu);
>   void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu);
>   void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu);
> -int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
> -			uint32_t guest_irq, bool set);
> +int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
> +			unsigned int host_irq, uint32_t guest_irq,
> +			struct kvm_kernel_irq_routing_entry *new);
>   void avic_vcpu_blocking(struct kvm_vcpu *vcpu);
>   void avic_vcpu_unblocking(struct kvm_vcpu *vcpu);
>   void avic_ring_doorbell(struct kvm_vcpu *vcpu);
> diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
> index 78ba3d638fe8..1b6b655a2b8a 100644
> --- a/arch/x86/kvm/vmx/posted_intr.c
> +++ b/arch/x86/kvm/vmx/posted_intr.c
> @@ -2,6 +2,7 @@
>   #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
>   
>   #include <linux/kvm_host.h>
> +#include <linux/kvm_irqfd.h>
>   
>   #include <asm/irq_remapping.h>
>   #include <asm/cpu.h>
> @@ -259,17 +260,9 @@ void vmx_pi_start_assignment(struct kvm *kvm)
>   	kvm_make_all_cpus_request(kvm, KVM_REQ_UNBLOCK);
>   }
>   
> -/*
> - * vmx_pi_update_irte - set IRTE for Posted-Interrupts
> - *
> - * @kvm: kvm
> - * @host_irq: host irq of the interrupt
> - * @guest_irq: gsi of the interrupt
> - * @set: set or unset PI
> - * returns 0 on success, < 0 on failure
> - */
> -int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
> -		       uint32_t guest_irq, bool set)
> +int vmx_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
> +		       unsigned int host_irq, uint32_t guest_irq,
> +		       struct kvm_kernel_irq_routing_entry *new)
>   {
>   	struct kvm_kernel_irq_routing_entry *e;
>   	struct kvm_irq_routing_table *irq_rt;
> @@ -277,6 +270,7 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
>   	struct kvm_lapic_irq irq;
>   	struct kvm_vcpu *vcpu;
>   	struct vcpu_data vcpu_info;
> +	bool set = !!new;
>   	int idx, ret = 0;
>   
>   	if (!vmx_can_use_vtd_pi(kvm))
> @@ -294,6 +288,9 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
>   	hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
>   		if (e->type != KVM_IRQ_ROUTING_MSI)
>   			continue;
> +
> +		WARN_ON_ONCE(new && memcmp(e, new, sizeof(*new)));
> +
>   		/*
>   		 * VT-d PI cannot support posting multicast/broadcast
>   		 * interrupts to a vCPU, we still use interrupt remapping
> diff --git a/arch/x86/kvm/vmx/posted_intr.h b/arch/x86/kvm/vmx/posted_intr.h
> index ad9116a99bcc..a586d6aaf862 100644
> --- a/arch/x86/kvm/vmx/posted_intr.h
> +++ b/arch/x86/kvm/vmx/posted_intr.h
> @@ -3,6 +3,9 @@
>   #define __KVM_X86_VMX_POSTED_INTR_H
>   
>   #include <linux/bitmap.h>
> +#include <linux/find.h>
> +#include <linux/kvm_host.h>
> +
>   #include <asm/posted_intr.h>
>   
>   void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu);
> @@ -10,8 +13,9 @@ void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu);
>   void pi_wakeup_handler(void);
>   void __init pi_init_cpu(int cpu);
>   bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu);
> -int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
> -		       uint32_t guest_irq, bool set);
> +int vmx_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
> +		       unsigned int host_irq, uint32_t guest_irq,
> +		       struct kvm_kernel_irq_routing_entry *new);
>   void vmx_pi_start_assignment(struct kvm *kvm);
>   
>   static inline int pi_find_highest_vector(struct pi_desc *pi_desc)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index dcc173852dc5..23376fcd928c 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -13570,31 +13570,31 @@ int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
>   	struct kvm_kernel_irqfd *irqfd =
>   		container_of(cons, struct kvm_kernel_irqfd, consumer);
>   	struct kvm *kvm = irqfd->kvm;
> -	int ret;
> +	int ret = 0;
>   
>   	kvm_arch_start_assignment(irqfd->kvm);
>   
>   	spin_lock_irq(&kvm->irqfds.lock);
>   	irqfd->producer = prod;
>   
> -	ret = kvm_x86_call(pi_update_irte)(irqfd->kvm,
> -					   prod->irq, irqfd->gsi, 1);
> -	if (ret)
> -		kvm_arch_end_assignment(irqfd->kvm);
> -
> +	if (irqfd->irq_entry.type == KVM_IRQ_ROUTING_MSI) {
> +		ret = kvm_x86_call(pi_update_irte)(irqfd, irqfd->kvm, prod->irq,
> +						   irqfd->gsi, &irqfd->irq_entry);
> +		if (ret)
> +			kvm_arch_end_assignment(irqfd->kvm);
> +	}
>   	spin_unlock_irq(&kvm->irqfds.lock);
>   
> -
>   	return ret;
>   }
>   
>   void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
>   				      struct irq_bypass_producer *prod)
>   {
> -	int ret;
>   	struct kvm_kernel_irqfd *irqfd =
>   		container_of(cons, struct kvm_kernel_irqfd, consumer);
>   	struct kvm *kvm = irqfd->kvm;
> +	int ret;
>   
>   	WARN_ON(irqfd->producer != prod);
>   
> @@ -13607,11 +13607,13 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
>   	spin_lock_irq(&kvm->irqfds.lock);
>   	irqfd->producer = NULL;
>   
> -	ret = kvm_x86_call(pi_update_irte)(irqfd->kvm,
> -					   prod->irq, irqfd->gsi, 0);
> -	if (ret)
> -		printk(KERN_INFO "irq bypass consumer (token %p) unregistration"
> -		       " fails: %d\n", irqfd->consumer.token, ret);
> +	if (irqfd->irq_entry.type == KVM_IRQ_ROUTING_MSI) {
> +		ret = kvm_x86_call(pi_update_irte)(irqfd, irqfd->kvm, prod->irq,
> +						   irqfd->gsi, NULL);
> +		if (ret)
> +			pr_info("irq bypass consumer (token %p) unregistration fails: %d\n",
> +				irqfd->consumer.token, ret);
> +	}
>   
>   	spin_unlock_irq(&kvm->irqfds.lock);
>   
> @@ -13619,10 +13621,12 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
>   	kvm_arch_end_assignment(irqfd->kvm);
>   }
>   
> -int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
> -				   uint32_t guest_irq, bool set)
> +int kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd,
> +				  struct kvm_kernel_irq_routing_entry *old,
> +				  struct kvm_kernel_irq_routing_entry *new)
>   {
> -	return kvm_x86_call(pi_update_irte)(kvm, host_irq, guest_irq, set);
> +	return kvm_x86_call(pi_update_irte)(irqfd, irqfd->kvm, irqfd->producer->irq,
> +					    irqfd->gsi, new);
>   }
>   
>   bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old,
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 5438a1b446a6..2d9f3aeb766a 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -2383,6 +2383,8 @@ struct kvm_vcpu *kvm_get_running_vcpu(void);
>   struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
>   
>   #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
> +struct kvm_kernel_irqfd;
> +
>   bool kvm_arch_has_irq_bypass(void);
>   int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *,
>   			   struct irq_bypass_producer *);
> @@ -2390,8 +2392,9 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *,
>   			   struct irq_bypass_producer *);
>   void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *);
>   void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *);
> -int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
> -				  uint32_t guest_irq, bool set);
> +int kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd,
> +				  struct kvm_kernel_irq_routing_entry *old,
> +				  struct kvm_kernel_irq_routing_entry *new);
>   bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *,
>   				  struct kvm_kernel_irq_routing_entry *);
>   #endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */
> diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> index 249ba5b72e9b..ad71e3e4d1c3 100644
> --- a/virt/kvm/eventfd.c
> +++ b/virt/kvm/eventfd.c
> @@ -285,9 +285,9 @@ void __attribute__((weak)) kvm_arch_irq_bypass_start(
>   {
>   }
>   
> -int  __attribute__((weak)) kvm_arch_update_irqfd_routing(
> -				struct kvm *kvm, unsigned int host_irq,
> -				uint32_t guest_irq, bool set)
> +int __weak kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd,
> +					 struct kvm_kernel_irq_routing_entry *old,
> +					 struct kvm_kernel_irq_routing_entry *new)
>   {
>   	return 0;
>   }
> @@ -619,9 +619,8 @@ void kvm_irq_routing_update(struct kvm *kvm)
>   #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
>   		if (irqfd->producer &&
>   		    kvm_arch_irqfd_route_changed(&old, &irqfd->irq_entry)) {
> -			int ret = kvm_arch_update_irqfd_routing(
> -					irqfd->kvm, irqfd->producer->irq,
> -					irqfd->gsi, 1);
> +			int ret = kvm_arch_update_irqfd_routing(irqfd, &old, &irqfd->irq_entry);
> +
>   			WARN_ON(ret);
>   		}
>   #endif


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ