lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20120718102029.GA4650@redhat.com>
Date:	Wed, 18 Jul 2012 13:20:29 +0300
From:	"Michael S. Tsirkin" <mst@...hat.com>
To:	Gleb Natapov <gleb@...hat.com>
Cc:	Alex Williamson <alex.williamson@...hat.com>, avi@...hat.com,
	kvm@...r.kernel.org, linux-kernel@...r.kernel.org,
	jan.kiszka@...mens.com
Subject: Re: [PATCH v5 3/4] kvm: Create kvm_clear_irq()

On Wed, Jul 18, 2012 at 09:27:42AM +0300, Gleb Natapov wrote:
> On Tue, Jul 17, 2012 at 07:14:52PM +0300, Michael S. Tsirkin wrote:
> > > _Seems_ racy, or _is_ racy?  Please identify the race.
> > 
> > Look at this:
> > 
> > static inline int kvm_irq_line_state(unsigned long *irq_state,
> >                                      int irq_source_id, int level)
> > {
> >         /* Logical OR for level trig interrupt */
> >         if (level)
> >                 set_bit(irq_source_id, irq_state);
> >         else
> >                 clear_bit(irq_source_id, irq_state);
> > 
> >         return !!(*irq_state);
> > }
> > 
> > 
> > Now:
> > If other CPU changes some other bit after the atomic change,
> > it looks like !!(*irq_state) might return a stale value.
> > 
> > CPU 0 clears bit 0. CPU 1 sets bit 1. CPU 1 sets level to 1.
> > If CPU 0 sees a stale value now it will return 0 here
> > and interrupt will get cleared.
> > 
> This will hardly happen on x86 especially since bit is set with
> serialized instruction.

Probably. But it does make me a bit uneasy.  Why don't we pass
irq_source_id to kvm_pic_set_irq/kvm_ioapic_set_irq, and move
kvm_irq_line_state to under pic_lock/ioapic_lock?  We can then use
__set_bit/__clear_bit in kvm_irq_line_state, making the ordering simpler
and saving an atomic op in the process.

> But there is actually a race here.
> CPU 0 clears bit 0. CPU 0 read irq_state as 0. CPU 1 sets level to 1.
> CPU 1 calls kvm_ioapic_set_irq(1). CPU 0 calls kvm_ioapic_set_irq(0).
> No ioapic thinks the level is 0 but irq_state is not 0.
> 
> This untested and un-compiled patch should fix it.
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index ef91d79..e22c78b 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -825,7 +825,7 @@ int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
>  void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
>  bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
>  
> -int kvm_pic_set_irq(void *opaque, int irq, int level);
> +int kvm_pic_set_irq(void *opaque, int irq);
>  
>  void kvm_inject_nmi(struct kvm_vcpu *vcpu);
>  
> diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
> index 81cf4fa..0d6988f 100644
> --- a/arch/x86/kvm/i8259.c
> +++ b/arch/x86/kvm/i8259.c
> @@ -188,12 +188,13 @@ void kvm_pic_update_irq(struct kvm_pic *s)
>  	pic_unlock(s);
>  }
>  
> -int kvm_pic_set_irq(void *opaque, int irq, int level)
> +int kvm_pic_set_irq(void *opaque, int irq)
>  {
>  	struct kvm_pic *s = opaque;
> -	int ret = -1;
> +	int ret = -1, level;
>  
>  	pic_lock(s);
> +	level = !!s->irq_states[irq];
>  	if (irq >= 0 && irq < PIC_NUM_PINS) {
>  		ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
>  		pic_update_irq(s);
> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
> index 26fd54d..6ad6a6b 100644
> --- a/virt/kvm/ioapic.c
> +++ b/virt/kvm/ioapic.c
> @@ -191,14 +191,15 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
>  	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe);
>  }
>  
> -int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
> +int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq)
>  {
>  	u32 old_irr;
>  	u32 mask = 1 << irq;
>  	union kvm_ioapic_redirect_entry entry;
> -	int ret = 1;
> +	int ret = 1, level;
>  
>  	spin_lock(&ioapic->lock);
> +	level = !!ioapic->irq_states[irq];
>  	old_irr = ioapic->irr;
>  	if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
>  		entry = ioapic->redirtbl[irq];
> diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
> index 32872a0..65894dd 100644
> --- a/virt/kvm/ioapic.h
> +++ b/virt/kvm/ioapic.h
> @@ -74,7 +74,7 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
>  bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector);
>  int kvm_ioapic_init(struct kvm *kvm);
>  void kvm_ioapic_destroy(struct kvm *kvm);
> -int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
> +int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq);
>  void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
>  int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
>  		struct kvm_lapic_irq *irq);
> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> index a6a0365..db0ccef 100644
> --- a/virt/kvm/irq_comm.c
> +++ b/virt/kvm/irq_comm.c
> @@ -33,7 +33,7 @@
>  
>  #include "ioapic.h"
>  
> -static inline int kvm_irq_line_state(unsigned long *irq_state,
> +static inline void kvm_irq_line_state(unsigned long *irq_state,
>  				     int irq_source_id, int level)
>  {
>  	/* Logical OR for level trig interrupt */
> @@ -41,8 +41,6 @@ static inline int kvm_irq_line_state(unsigned long *irq_state,
>  		set_bit(irq_source_id, irq_state);
>  	else
>  		clear_bit(irq_source_id, irq_state);
> -
> -	return !!(*irq_state);
>  }
>  
>  static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
> @@ -50,9 +48,9 @@ static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
>  {
>  #ifdef CONFIG_X86
>  	struct kvm_pic *pic = pic_irqchip(kvm);
> -	level = kvm_irq_line_state(&pic->irq_states[e->irqchip.pin],
> +	kvm_irq_line_state(&pic->irq_states[e->irqchip.pin],
>  				   irq_source_id, level);
> -	return kvm_pic_set_irq(pic, e->irqchip.pin, level);
> +	return kvm_pic_set_irq(pic, e->irqchip.pin);
>  #else
>  	return -1;
>  #endif
> @@ -62,10 +60,10 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
>  			      struct kvm *kvm, int irq_source_id, int level)
>  {
>  	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
> -	level = kvm_irq_line_state(&ioapic->irq_states[e->irqchip.pin],
> +	kvm_irq_line_state(&ioapic->irq_states[e->irqchip.pin],
>  				   irq_source_id, level);
>  
> -	return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, level);
> +	return kvm_ioapic_set_irq(ioapic, e->irqchip.pin);
>  }
>  
>  inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
> --
> 			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ