lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20090629140632.GC14059@amt.cnet>
Date:	Mon, 29 Jun 2009 11:06:32 -0300
From:	Marcelo Tosatti <mtosatti@...hat.com>
To:	"Michael S. Tsirkin" <mst@...hat.com>
Cc:	Gregory Haskins <ghaskins@...ell.com>, avi@...hat.com,
	kvm@...r.kernel.org, linux-kernel@...r.kernel.org,
	paulmck@...ux.vnet.ibm.com, markmc@...hat.com
Subject: Re: [PATCHv2] kvm: remove in_range and switch to rwsem for iobus

On Sun, Jun 28, 2009 at 10:34:25PM +0300, Michael S. Tsirkin wrote:
> This changes bus accesses to use high-level kvm_io_bus_read/kvm_io_bus_write
> functions, which utilize read/write semaphore intead of mutex.  in_range now
> becomes unused so it is removed from device ops in favor of read/write
> callbacks performing range checks internally.
> 
> This allows aliasing (mostly for in-kernel virtio), as well as better error
> handling by making it possible to pass errors up to userspace. And it's enough
> to look at the diffstat to see that it's a better API anyway.

Can you please expand a little on this? Still don't get why making
->in_range part of ->read / ->write is a good thing. Aliasing?

Agree that proliferation of locks is a bad thing.

> While we are at it, document locking rules for kvm_io_device_ops.
> 
> Note: since the use of the new bus_lock is localized to a small number of
> places, it will be easy to switch to srcu in the future if we so desire.
> 
> Signed-off-by: Michael S. Tsirkin <mst@...hat.com>
> ---
> 
> OK, here's an updated version of the patch. I punted on rcu and went
> with rw semaphore for now, but it will be easy to change now that the
> use of the semaphore is fairly isolated. Works for me, but please
> review carefully.
> 
>  arch/ia64/kvm/kvm-ia64.c  |   28 +++--------
>  arch/x86/kvm/i8254.c      |   53 +++++++++++----------
>  arch/x86/kvm/i8259.c      |   22 +++++----
>  arch/x86/kvm/lapic.c      |   44 ++++++++----------
>  arch/x86/kvm/x86.c        |  112 ++++++++++++++-------------------------------
>  include/linux/kvm_host.h  |   10 +++-
>  virt/kvm/coalesced_mmio.c |   28 ++++++------
>  virt/kvm/ioapic.c         |   24 +++++----
>  virt/kvm/iodev.h          |   42 +++++++----------
>  virt/kvm/kvm_main.c       |   41 ++++++++++++-----
>  10 files changed, 184 insertions(+), 220 deletions(-)
> 
> diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
> index c1c5cb6..5bf5100 100644
> --- a/arch/ia64/kvm/kvm-ia64.c
> +++ b/arch/ia64/kvm/kvm-ia64.c
> @@ -210,16 +210,6 @@ int kvm_dev_ioctl_check_extension(long ext)
>  
>  }
>  
> -static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
> -					gpa_t addr, int len, int is_write)
> -{
> -	struct kvm_io_device *dev;
> -
> -	dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len, is_write);
> -
> -	return dev;
> -}
> -
>  static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
>  {
>  	kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
> @@ -231,6 +221,7 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
>  {
>  	struct kvm_mmio_req *p;
>  	struct kvm_io_device *mmio_dev;
> +	int r;
>  
>  	p = kvm_get_vcpu_ioreq(vcpu);
>  
> @@ -247,16 +238,13 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
>  	kvm_run->exit_reason = KVM_EXIT_MMIO;
>  	return 0;
>  mmio:
> -	mmio_dev = vcpu_find_mmio_dev(vcpu, p->addr, p->size, !p->dir);
> -	if (mmio_dev) {
> -		if (!p->dir)
> -			kvm_iodevice_write(mmio_dev, p->addr, p->size,
> -						&p->data);
> -		else
> -			kvm_iodevice_read(mmio_dev, p->addr, p->size,
> -						&p->data);
> -
> -	} else
> +	if (p->dir)
> +		r = kvm_io_bus_read(vcpu->kvm, &vcpu->kvm->mmio_bus, p->addr,
> +				    p->size, &p->data);
> +	else
> +		r = kvm_io_bus_write(vcpu->kvm, &vcpu->kvm->mmio_bus, p->addr,
> +				     p->size, &p->data);
> +	if (r)
>  		printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr);
>  	p->state = STATE_IORESP_READY;
>  
> diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
> index 331705f..3155ffa 100644
> --- a/arch/x86/kvm/i8254.c
> +++ b/arch/x86/kvm/i8254.c
> @@ -357,8 +357,14 @@ static inline struct kvm_pit *speaker_to_pit(struct kvm_io_device *dev)
>  	return container_of(dev, struct kvm_pit, speaker_dev);
>  }
>  
> -static void pit_ioport_write(struct kvm_io_device *this,
> -			     gpa_t addr, int len, const void *data)
> +static inline int pit_in_range(gpa_t addr)
> +{
> +	return ((addr >= KVM_PIT_BASE_ADDRESS) &&
> +		(addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
> +}
> +
> +static int pit_ioport_write(struct kvm_io_device *this,
> +			    gpa_t addr, int len, const void *data)
>  {
>  	struct kvm_pit *pit = dev_to_pit(this);
>  	struct kvm_kpit_state *pit_state = &pit->pit_state;
> @@ -366,6 +372,8 @@ static void pit_ioport_write(struct kvm_io_device *this,
>  	int channel, access;
>  	struct kvm_kpit_channel_state *s;
>  	u32 val = *(u32 *) data;
> +	if (!pit_in_range(addr))
> +		return -EOPNOTSUPP;
>  
>  	val  &= 0xff;
>  	addr &= KVM_PIT_CHANNEL_MASK;
> @@ -428,16 +436,19 @@ static void pit_ioport_write(struct kvm_io_device *this,
>  	}
>  
>  	mutex_unlock(&pit_state->lock);
> +	return 0;
>  }
>  
> -static void pit_ioport_read(struct kvm_io_device *this,
> -			    gpa_t addr, int len, void *data)
> +static int pit_ioport_read(struct kvm_io_device *this,
> +			   gpa_t addr, int len, void *data)
>  {
>  	struct kvm_pit *pit = dev_to_pit(this);
>  	struct kvm_kpit_state *pit_state = &pit->pit_state;
>  	struct kvm *kvm = pit->kvm;
>  	int ret, count;
>  	struct kvm_kpit_channel_state *s;
> +	if (!pit_in_range(addr))
> +		return -EOPNOTSUPP;
>  
>  	addr &= KVM_PIT_CHANNEL_MASK;
>  	s = &pit_state->channels[addr];
> @@ -492,37 +503,36 @@ static void pit_ioport_read(struct kvm_io_device *this,
>  	memcpy(data, (char *)&ret, len);
>  
>  	mutex_unlock(&pit_state->lock);
> +	return 0;
>  }
>  
> -static int pit_in_range(struct kvm_io_device *this, gpa_t addr,
> -			int len, int is_write)
> -{
> -	return ((addr >= KVM_PIT_BASE_ADDRESS) &&
> -		(addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
> -}
> -
> -static void speaker_ioport_write(struct kvm_io_device *this,
> -				 gpa_t addr, int len, const void *data)
> +static int speaker_ioport_write(struct kvm_io_device *this,
> +				gpa_t addr, int len, const void *data)
>  {
>  	struct kvm_pit *pit = speaker_to_pit(this);
>  	struct kvm_kpit_state *pit_state = &pit->pit_state;
>  	struct kvm *kvm = pit->kvm;
>  	u32 val = *(u32 *) data;
> +	if (addr != KVM_SPEAKER_BASE_ADDRESS)
> +		return -EOPNOTSUPP;
>  
>  	mutex_lock(&pit_state->lock);
>  	pit_state->speaker_data_on = (val >> 1) & 1;
>  	pit_set_gate(kvm, 2, val & 1);
>  	mutex_unlock(&pit_state->lock);
> +	return 0;
>  }
>  
> -static void speaker_ioport_read(struct kvm_io_device *this,
> -				gpa_t addr, int len, void *data)
> +static int speaker_ioport_read(struct kvm_io_device *this,
> +			       gpa_t addr, int len, void *data)
>  {
>  	struct kvm_pit *pit = speaker_to_pit(this);
>  	struct kvm_kpit_state *pit_state = &pit->pit_state;
>  	struct kvm *kvm = pit->kvm;
>  	unsigned int refresh_clock;
>  	int ret;
> +	if (addr != KVM_SPEAKER_BASE_ADDRESS)
> +		return -EOPNOTSUPP;
>  
>  	/* Refresh clock toggles at about 15us. We approximate as 2^14ns. */
>  	refresh_clock = ((unsigned int)ktime_to_ns(ktime_get()) >> 14) & 1;
> @@ -534,12 +544,7 @@ static void speaker_ioport_read(struct kvm_io_device *this,
>  		len = sizeof(ret);
>  	memcpy(data, (char *)&ret, len);
>  	mutex_unlock(&pit_state->lock);
> -}
> -
> -static int speaker_in_range(struct kvm_io_device *this, gpa_t addr,
> -			    int len, int is_write)
> -{
> -	return (addr == KVM_SPEAKER_BASE_ADDRESS);
> +	return 0;
>  }
>  
>  void kvm_pit_reset(struct kvm_pit *pit)
> @@ -573,13 +578,11 @@ static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
>  static const struct kvm_io_device_ops pit_dev_ops = {
>  	.read     = pit_ioport_read,
>  	.write    = pit_ioport_write,
> -	.in_range = pit_in_range,
>  };
>  
>  static const struct kvm_io_device_ops speaker_dev_ops = {
>  	.read     = speaker_ioport_read,
>  	.write    = speaker_ioport_write,
> -	.in_range = speaker_in_range,
>  };
>  
>  struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
> @@ -620,11 +623,11 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
>  	kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
>  
>  	kvm_iodevice_init(&pit->dev, &pit_dev_ops);
> -	kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
> +	kvm_io_bus_register_dev(kvm, &kvm->pio_bus, &pit->dev);
>  
>  	if (flags & KVM_PIT_SPEAKER_DUMMY) {
>  		kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops);
> -		kvm_io_bus_register_dev(&kvm->pio_bus, &pit->speaker_dev);
> +		kvm_io_bus_register_dev(kvm, &kvm->pio_bus, &pit->speaker_dev);
>  	}
>  
>  	return pit;
> diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
> index 148c52a..1d1bb75 100644
> --- a/arch/x86/kvm/i8259.c
> +++ b/arch/x86/kvm/i8259.c
> @@ -430,8 +430,7 @@ static u32 elcr_ioport_read(void *opaque, u32 addr1)
>  	return s->elcr;
>  }
>  
> -static int picdev_in_range(struct kvm_io_device *this, gpa_t addr,
> -			   int len, int is_write)
> +static int picdev_in_range(gpa_t addr)
>  {
>  	switch (addr) {
>  	case 0x20:
> @@ -451,16 +450,18 @@ static inline struct kvm_pic *to_pic(struct kvm_io_device *dev)
>  	return container_of(dev, struct kvm_pic, dev);
>  }
>  
> -static void picdev_write(struct kvm_io_device *this,
> +static int picdev_write(struct kvm_io_device *this,
>  			 gpa_t addr, int len, const void *val)
>  {
>  	struct kvm_pic *s = to_pic(this);
>  	unsigned char data = *(unsigned char *)val;
> +	if (!picdev_in_range(addr))
> +		return -EOPNOTSUPP;
>  
>  	if (len != 1) {
>  		if (printk_ratelimit())
>  			printk(KERN_ERR "PIC: non byte write\n");
> -		return;
> +		return 0;
>  	}
>  	pic_lock(s);
>  	switch (addr) {
> @@ -476,18 +477,21 @@ static void picdev_write(struct kvm_io_device *this,
>  		break;
>  	}
>  	pic_unlock(s);
> +	return 0;
>  }
>  
> -static void picdev_read(struct kvm_io_device *this,
> -			gpa_t addr, int len, void *val)
> +static int picdev_read(struct kvm_io_device *this,
> +		       gpa_t addr, int len, void *val)
>  {
>  	struct kvm_pic *s = to_pic(this);
>  	unsigned char data = 0;
> +	if (!picdev_in_range(addr))
> +		return -EOPNOTSUPP;
>  
>  	if (len != 1) {
>  		if (printk_ratelimit())
>  			printk(KERN_ERR "PIC: non byte read\n");
> -		return;
> +		return 0;
>  	}
>  	pic_lock(s);
>  	switch (addr) {
> @@ -504,6 +508,7 @@ static void picdev_read(struct kvm_io_device *this,
>  	}
>  	*(unsigned char *)val = data;
>  	pic_unlock(s);
> +	return 0;
>  }
>  
>  /*
> @@ -526,7 +531,6 @@ static void pic_irq_request(void *opaque, int level)
>  static const struct kvm_io_device_ops picdev_ops = {
>  	.read     = picdev_read,
>  	.write    = picdev_write,
> -	.in_range = picdev_in_range,
>  };
>  
>  struct kvm_pic *kvm_create_pic(struct kvm *kvm)
> @@ -548,6 +552,6 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
>  	 * Initialize PIO device
>  	 */
>  	kvm_iodevice_init(&s->dev, &picdev_ops);
> -	kvm_io_bus_register_dev(&kvm->pio_bus, &s->dev);
> +	kvm_io_bus_register_dev(kvm, &kvm->pio_bus, &s->dev);
>  	return s;
>  }
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 2e02865..265a765 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -546,18 +546,27 @@ static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
>  	return container_of(dev, struct kvm_lapic, dev);
>  }
>  
> -static void apic_mmio_read(struct kvm_io_device *this,
> -			   gpa_t address, int len, void *data)
> +static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
> +{
> +	return apic_hw_enabled(apic) &&
> +	    addr >= apic->base_address &&
> +	    addr < apic->base_address + LAPIC_MMIO_LENGTH;
> +}
> +
> +static int apic_mmio_read(struct kvm_io_device *this,
> +			  gpa_t address, int len, void *data)
>  {
>  	struct kvm_lapic *apic = to_lapic(this);
>  	unsigned int offset = address - apic->base_address;
>  	unsigned char alignment = offset & 0xf;
>  	u32 result;
> +	if (!apic_mmio_in_range(apic, address))
> +		return -EOPNOTSUPP;
>  
>  	if ((alignment + len) > 4) {
>  		printk(KERN_ERR "KVM_APIC_READ: alignment error %lx %d",
>  		       (unsigned long)address, len);
> -		return;
> +		return 0;
>  	}
>  	result = __apic_read(apic, offset & ~0xf);
>  
> @@ -574,6 +583,7 @@ static void apic_mmio_read(struct kvm_io_device *this,
>  		       "should be 1,2, or 4 instead\n", len);
>  		break;
>  	}
> +	return 0;
>  }
>  
>  static void update_divide_count(struct kvm_lapic *apic)
> @@ -629,13 +639,15 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
>  		apic->vcpu->kvm->arch.vapics_in_nmi_mode--;
>  }
>  
> -static void apic_mmio_write(struct kvm_io_device *this,
> -			    gpa_t address, int len, const void *data)
> +static int apic_mmio_write(struct kvm_io_device *this,
> +			   gpa_t address, int len, const void *data)
>  {
>  	struct kvm_lapic *apic = to_lapic(this);
>  	unsigned int offset = address - apic->base_address;
>  	unsigned char alignment = offset & 0xf;
>  	u32 val;
> +	if (!apic_mmio_in_range(apic, address))
> +		return -EOPNOTSUPP;
>  
>  	/*
>  	 * APIC register must be aligned on 128-bits boundary.
> @@ -646,7 +658,7 @@ static void apic_mmio_write(struct kvm_io_device *this,
>  		/* Don't shout loud, $infamous_os would cause only noise. */
>  		apic_debug("apic write: bad size=%d %lx\n",
>  			   len, (long)address);
> -		return;
> +		return 0;
>  	}
>  
>  	val = *(u32 *) data;
> @@ -729,7 +741,7 @@ static void apic_mmio_write(struct kvm_io_device *this,
>  		hrtimer_cancel(&apic->lapic_timer.timer);
>  		apic_set_reg(apic, APIC_TMICT, val);
>  		start_apic_timer(apic);
> -		return;
> +		return 0;
>  
>  	case APIC_TDCR:
>  		if (val & 4)
> @@ -743,22 +755,7 @@ static void apic_mmio_write(struct kvm_io_device *this,
>  			   offset);
>  		break;
>  	}
> -
> -}
> -
> -static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr,
> -			   int len, int size)
> -{
> -	struct kvm_lapic *apic = to_lapic(this);
> -	int ret = 0;
> -
> -
> -	if (apic_hw_enabled(apic) &&
> -	    (addr >= apic->base_address) &&
> -	    (addr < (apic->base_address + LAPIC_MMIO_LENGTH)))
> -		ret = 1;
> -
> -	return ret;
> +	return 0;
>  }
>  
>  void kvm_free_lapic(struct kvm_vcpu *vcpu)
> @@ -938,7 +935,6 @@ static struct kvm_timer_ops lapic_timer_ops = {
>  static const struct kvm_io_device_ops apic_mmio_ops = {
>  	.read     = apic_mmio_read,
>  	.write    = apic_mmio_write,
> -	.in_range = apic_mmio_range,
>  };
>  
>  int kvm_create_lapic(struct kvm_vcpu *vcpu)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 5a66bb9..badc23f 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2260,35 +2260,23 @@ static void kvm_init_msr_list(void)
>  	num_msrs_to_save = j;
>  }
>  
> -/*
> - * Only apic need an MMIO device hook, so shortcut now..
> - */
> -static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu,
> -						gpa_t addr, int len,
> -						int is_write)
> +static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
> +			   const void *v)
>  {
> -	struct kvm_io_device *dev;
> +	if (vcpu->arch.apic &&
> +	    !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v))
> +		return 0;
>  
> -	if (vcpu->arch.apic) {
> -		dev = &vcpu->arch.apic->dev;
> -		if (kvm_iodevice_in_range(dev, addr, len, is_write))
> -			return dev;
> -	}
> -	return NULL;
> +	return kvm_io_bus_write(vcpu->kvm, &vcpu->kvm->mmio_bus, addr, len, v);
>  }
>  
> -
> -static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
> -						gpa_t addr, int len,
> -						int is_write)
> +static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
>  {
> -	struct kvm_io_device *dev;
> +	if (vcpu->arch.apic &&
> +	    !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v))
> +		return 0;
>  
> -	dev = vcpu_find_pervcpu_dev(vcpu, addr, len, is_write);
> -	if (dev == NULL)
> -		dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len,
> -					  is_write);
> -	return dev;
> +	return kvm_io_bus_read(vcpu->kvm, &vcpu->kvm->mmio_bus, addr, len, v);
>  }
>  
>  static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
> @@ -2357,7 +2345,6 @@ static int emulator_read_emulated(unsigned long addr,
>  				  unsigned int bytes,
>  				  struct kvm_vcpu *vcpu)
>  {
> -	struct kvm_io_device *mmio_dev;
>  	gpa_t                 gpa;
>  
>  	if (vcpu->mmio_read_completed) {
> @@ -2382,13 +2369,8 @@ mmio:
>  	/*
>  	 * Is this MMIO handled locally?
>  	 */
> -	mutex_lock(&vcpu->kvm->lock);
> -	mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 0);
> -	mutex_unlock(&vcpu->kvm->lock);
> -	if (mmio_dev) {
> -		kvm_iodevice_read(mmio_dev, gpa, bytes, val);
> +	if (!vcpu_mmio_read(vcpu, gpa, bytes, val))
>  		return X86EMUL_CONTINUE;
> -	}
>  
>  	vcpu->mmio_needed = 1;
>  	vcpu->mmio_phys_addr = gpa;
> @@ -2415,7 +2397,6 @@ static int emulator_write_emulated_onepage(unsigned long addr,
>  					   unsigned int bytes,
>  					   struct kvm_vcpu *vcpu)
>  {
> -	struct kvm_io_device *mmio_dev;
>  	gpa_t                 gpa;
>  
>  	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
> @@ -2436,13 +2417,8 @@ mmio:
>  	/*
>  	 * Is this MMIO handled locally?
>  	 */
> -	mutex_lock(&vcpu->kvm->lock);
> -	mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 1);
> -	mutex_unlock(&vcpu->kvm->lock);
> -	if (mmio_dev) {
> -		kvm_iodevice_write(mmio_dev, gpa, bytes, val);
> +	if (!vcpu_mmio_write(vcpu, gpa, bytes, val))
>  		return X86EMUL_CONTINUE;
> -	}
>  
>  	vcpu->mmio_needed = 1;
>  	vcpu->mmio_phys_addr = gpa;
> @@ -2758,48 +2734,42 @@ int complete_pio(struct kvm_vcpu *vcpu)
>  	return 0;
>  }
>  
> -static void kernel_pio(struct kvm_io_device *pio_dev,
> -		       struct kvm_vcpu *vcpu,
> -		       void *pd)
> +static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
>  {
>  	/* TODO: String I/O for in kernel device */
> +	int r;
>  
>  	if (vcpu->arch.pio.in)
> -		kvm_iodevice_read(pio_dev, vcpu->arch.pio.port,
> -				  vcpu->arch.pio.size,
> -				  pd);
> +		r = kvm_io_bus_read(vcpu->kvm, &vcpu->kvm->pio_bus,
> +				    vcpu->arch.pio.port,
> +				    vcpu->arch.pio.size, pd);
>  	else
> -		kvm_iodevice_write(pio_dev, vcpu->arch.pio.port,
> -				   vcpu->arch.pio.size,
> -				   pd);
> +		r = kvm_io_bus_write(vcpu->kvm, &vcpu->kvm->pio_bus,
> +				     vcpu->arch.pio.port,
> +				     vcpu->arch.pio.size, pd);
> +	return r;
>  }
>  
> -static void pio_string_write(struct kvm_io_device *pio_dev,
> -			     struct kvm_vcpu *vcpu)
> +static int pio_string_write(struct kvm_vcpu *vcpu)
>  {
>  	struct kvm_pio_request *io = &vcpu->arch.pio;
>  	void *pd = vcpu->arch.pio_data;
> -	int i;
> +	int i, r = 0;
>  
>  	for (i = 0; i < io->cur_count; i++) {
> -		kvm_iodevice_write(pio_dev, io->port,
> -				   io->size,
> -				   pd);
> +		if (kvm_io_bus_write(vcpu->kvm, &vcpu->kvm->pio_bus,
> +				     io->port, io->size, pd)) {
> +			r = -EOPNOTSUPP;
> +			break;
> +		}
>  		pd += io->size;
>  	}
> -}
> -
> -static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
> -					       gpa_t addr, int len,
> -					       int is_write)
> -{
> -	return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr, len, is_write);
> +	return r;
>  }
>  
>  int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
>  		  int size, unsigned port)
>  {
> -	struct kvm_io_device *pio_dev;
>  	unsigned long val;
>  
>  	vcpu->run->exit_reason = KVM_EXIT_IO;
> @@ -2819,11 +2789,7 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
>  	val = kvm_register_read(vcpu, VCPU_REGS_RAX);
>  	memcpy(vcpu->arch.pio_data, &val, 4);
>  
> -	mutex_lock(&vcpu->kvm->lock);
> -	pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
> -	mutex_unlock(&vcpu->kvm->lock);
> -	if (pio_dev) {
> -		kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
> +	if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
>  		complete_pio(vcpu);
>  		return 1;
>  	}
> @@ -2837,7 +2803,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
>  {
>  	unsigned now, in_page;
>  	int ret = 0;
> -	struct kvm_io_device *pio_dev;
>  
>  	vcpu->run->exit_reason = KVM_EXIT_IO;
>  	vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
> @@ -2881,12 +2846,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
>  
>  	vcpu->arch.pio.guest_gva = address;
>  
> -	mutex_lock(&vcpu->kvm->lock);
> -	pio_dev = vcpu_find_pio_dev(vcpu, port,
> -				    vcpu->arch.pio.cur_count,
> -				    !vcpu->arch.pio.in);
> -	mutex_unlock(&vcpu->kvm->lock);
> -
>  	if (!vcpu->arch.pio.in) {
>  		/* string PIO write */
>  		ret = pio_copy_data(vcpu);
> @@ -2894,16 +2853,13 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
>  			kvm_inject_gp(vcpu, 0);
>  			return 1;
>  		}
> -		if (ret == 0 && pio_dev) {
> -			pio_string_write(pio_dev, vcpu);
> +		if (ret == 0 && !pio_string_write(vcpu)) {
>  			complete_pio(vcpu);
>  			if (vcpu->arch.pio.count == 0)
>  				ret = 1;
>  		}
> -	} else if (pio_dev)
> -		pr_unimpl(vcpu, "no string pio read support yet, "
> -		       "port %x size %d count %ld\n",
> -			port, size, count);
> +	}
> +	/* no string PIO read support yet */
>  
>  	return ret;
>  }
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 2451f48..8337f8e 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -42,6 +42,7 @@
>  
>  #define KVM_USERSPACE_IRQ_SOURCE_ID	0
>  
> +struct kvm;
>  struct kvm_vcpu;
>  extern struct kmem_cache *kvm_vcpu_cache;
>  
> @@ -59,9 +60,11 @@ struct kvm_io_bus {
>  
>  void kvm_io_bus_init(struct kvm_io_bus *bus);
>  void kvm_io_bus_destroy(struct kvm_io_bus *bus);
> -struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus,
> -					  gpa_t addr, int len, int is_write);
> -void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
> +int kvm_io_bus_write(struct kvm *, struct kvm_io_bus *bus, gpa_t addr, int len,
> +		     const void *val);
> +int kvm_io_bus_read(struct kvm *, struct kvm_io_bus *bus, gpa_t addr, int len,
> +		    void *val);
> +void kvm_io_bus_register_dev(struct kvm *, struct kvm_io_bus *bus,
>  			     struct kvm_io_device *dev);
>  
>  struct kvm_vcpu {
> @@ -138,6 +141,7 @@ struct kvm {
>  	atomic_t online_vcpus;
>  	struct list_head vm_list;
>  	struct mutex lock;
> +	struct rw_semaphore bus_lock;
>  	struct kvm_io_bus mmio_bus;
>  	struct kvm_io_bus pio_bus;
>  #ifdef CONFIG_HAVE_KVM_EVENTFD
> diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
> index 397f419..0140218 100644
> --- a/virt/kvm/coalesced_mmio.c
> +++ b/virt/kvm/coalesced_mmio.c
> @@ -19,17 +19,15 @@ static inline struct kvm_coalesced_mmio_dev *to_mmio(struct kvm_io_device *dev)
>  	return container_of(dev, struct kvm_coalesced_mmio_dev, dev);
>  }
>  
> -static int coalesced_mmio_in_range(struct kvm_io_device *this,
> -				   gpa_t addr, int len, int is_write)
> +static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev,
> +				   gpa_t addr, int len)
>  {
> -	struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
>  	struct kvm_coalesced_mmio_zone *zone;
>  	struct kvm_coalesced_mmio_ring *ring;
>  	unsigned avail;
>  	int i;
>  
> -	if (!is_write)
> -		return 0;
> +	/* kvm->bus_lock is taken by the caller */
>  
>  	/* Are we able to batch it ? */
>  
> @@ -60,11 +58,13 @@ static int coalesced_mmio_in_range(struct kvm_io_device *this,
>  	return 0;
>  }
>  
> -static void coalesced_mmio_write(struct kvm_io_device *this,
> -				 gpa_t addr, int len, const void *val)
> +static int coalesced_mmio_write(struct kvm_io_device *this,
> +				gpa_t addr, int len, const void *val)
>  {
>  	struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
>  	struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring;
> +	if (!coalesced_mmio_in_range(dev, addr, len))
> +		return -EOPNOTSUPP;
>  
>  	spin_lock(&dev->lock);
>  
> @@ -76,6 +76,7 @@ static void coalesced_mmio_write(struct kvm_io_device *this,
>  	smp_wmb();
>  	ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX;
>  	spin_unlock(&dev->lock);
> +	return 0;
>  }
>  
>  static void coalesced_mmio_destructor(struct kvm_io_device *this)
> @@ -87,7 +88,6 @@ static void coalesced_mmio_destructor(struct kvm_io_device *this)
>  
>  static const struct kvm_io_device_ops coalesced_mmio_ops = {
>  	.write      = coalesced_mmio_write,
> -	.in_range   = coalesced_mmio_in_range,
>  	.destructor = coalesced_mmio_destructor,
>  };
>  
> @@ -102,7 +102,7 @@ int kvm_coalesced_mmio_init(struct kvm *kvm)
>  	kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops);
>  	dev->kvm = kvm;
>  	kvm->coalesced_mmio_dev = dev;
> -	kvm_io_bus_register_dev(&kvm->mmio_bus, &dev->dev);
> +	kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &dev->dev);
>  
>  	return 0;
>  }
> @@ -115,16 +115,16 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
>  	if (dev == NULL)
>  		return -EINVAL;
>  
> -	mutex_lock(&kvm->lock);
> +	down_write(&kvm->bus_lock);
>  	if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) {
> -		mutex_unlock(&kvm->lock);
> +		up_write(&kvm->bus_lock);
>  		return -ENOBUFS;
>  	}
>  
>  	dev->zone[dev->nb_zones] = *zone;
>  	dev->nb_zones++;
>  
> -	mutex_unlock(&kvm->lock);
> +	up_write(&kvm->bus_lock);
>  	return 0;
>  }
>  
> @@ -138,7 +138,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
>  	if (dev == NULL)
>  		return -EINVAL;
>  
> -	mutex_lock(&kvm->lock);
> +	down_write(&kvm->bus_lock);
>  
>  	i = dev->nb_zones;
>  	while(i) {
> @@ -156,7 +156,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
>  		i--;
>  	}
>  
> -	mutex_unlock(&kvm->lock);
> +	up_write(&kvm->bus_lock);
>  
>  	return 0;
>  }
> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
> index d8b2eca..2d352f7 100644
> --- a/virt/kvm/ioapic.c
> +++ b/virt/kvm/ioapic.c
> @@ -227,20 +227,19 @@ static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev)
>  	return container_of(dev, struct kvm_ioapic, dev);
>  }
>  
> -static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr,
> -			   int len, int is_write)
> +static inline int ioapic_in_range(struct kvm_ioapic *ioapic, gpa_t addr)
>  {
> -	struct kvm_ioapic *ioapic = to_ioapic(this);
> -
>  	return ((addr >= ioapic->base_address &&
>  		 (addr < ioapic->base_address + IOAPIC_MEM_LENGTH)));
>  }
>  
> -static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
> -			     void *val)
> +static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
> +			    void *val)
>  {
>  	struct kvm_ioapic *ioapic = to_ioapic(this);
>  	u32 result;
> +	if (!ioapic_in_range(ioapic, addr))
> +		return -ENOTSUPP;
>  
>  	ioapic_debug("addr %lx\n", (unsigned long)addr);
>  	ASSERT(!(addr & 0xf));	/* check alignment */
> @@ -273,13 +272,16 @@ static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
>  		printk(KERN_WARNING "ioapic: wrong length %d\n", len);
>  	}
>  	mutex_unlock(&ioapic->kvm->irq_lock);
> +	return 0;
>  }
>  
> -static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
> -			      const void *val)
> +static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
> +			     const void *val)
>  {
>  	struct kvm_ioapic *ioapic = to_ioapic(this);
>  	u32 data;
> +	if (!ioapic_in_range(ioapic, addr))
> +		return -ENOTSUPP;
>  
>  	ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n",
>  		     (void*)addr, len, val);
> @@ -290,7 +292,7 @@ static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
>  		data = *(u32 *) val;
>  	else {
>  		printk(KERN_WARNING "ioapic: Unsupported size %d\n", len);
> -		return;
> +		return 0;
>  	}
>  
>  	addr &= 0xff;
> @@ -312,6 +314,7 @@ static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
>  		break;
>  	}
>  	mutex_unlock(&ioapic->kvm->irq_lock);
> +	return 0;
>  }
>  
>  void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
> @@ -329,7 +332,6 @@ void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
>  static const struct kvm_io_device_ops ioapic_mmio_ops = {
>  	.read     = ioapic_mmio_read,
>  	.write    = ioapic_mmio_write,
> -	.in_range = ioapic_in_range,
>  };
>  
>  int kvm_ioapic_init(struct kvm *kvm)
> @@ -343,7 +345,7 @@ int kvm_ioapic_init(struct kvm *kvm)
>  	kvm_ioapic_reset(ioapic);
>  	kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
>  	ioapic->kvm = kvm;
> -	kvm_io_bus_register_dev(&kvm->mmio_bus, &ioapic->dev);
> +	kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &ioapic->dev);
>  	return 0;
>  }
>  
> diff --git a/virt/kvm/iodev.h b/virt/kvm/iodev.h
> index 2c67f5a..c4b07eb 100644
> --- a/virt/kvm/iodev.h
> +++ b/virt/kvm/iodev.h
> @@ -17,20 +17,24 @@
>  #define __KVM_IODEV_H__
>  
>  #include <linux/kvm_types.h>
> +#include <asm/errno.h>
>  
>  struct kvm_io_device;
>  
> +/**
> + * read and write handlers are called under kvm bus_lock.
> + * They return 0 if the transaction has been handled,
> + * or non-zero to have it passed to the next device.
> + **/
>  struct kvm_io_device_ops {
> -	void (*read)(struct kvm_io_device *this,
> +	int (*read)(struct kvm_io_device *this,
> +		    gpa_t addr,
> +		    int len,
> +		    void *val);
> +	int (*write)(struct kvm_io_device *this,
>  		     gpa_t addr,
>  		     int len,
> -		     void *val);
> -	void (*write)(struct kvm_io_device *this,
> -		      gpa_t addr,
> -		      int len,
> -		      const void *val);
> -	int (*in_range)(struct kvm_io_device *this, gpa_t addr, int len,
> -			int is_write);
> +		     const void *val);
>  	void (*destructor)(struct kvm_io_device *this);
>  };
>  
> @@ -45,26 +49,16 @@ static inline void kvm_iodevice_init(struct kvm_io_device *dev,
>  	dev->ops = ops;
>  }
>  
> -static inline void kvm_iodevice_read(struct kvm_io_device *dev,
> -				     gpa_t addr,
> -				     int len,
> -				     void *val)
> +static inline int kvm_iodevice_read(struct kvm_io_device *dev,
> +				    gpa_t addr, int l, void *v)
>  {
> -	dev->ops->read(dev, addr, len, val);
> +	return dev->ops->read ? dev->ops->read(dev, addr, l, v) : -EOPNOTSUPP;
>  }
>  
> -static inline void kvm_iodevice_write(struct kvm_io_device *dev,
> -				      gpa_t addr,
> -				      int len,
> -				      const void *val)
> +static inline int kvm_iodevice_write(struct kvm_io_device *dev,
> +				     gpa_t addr, int l, const void *v)
>  {
> -	dev->ops->write(dev, addr, len, val);
> -}
> -
> -static inline int kvm_iodevice_in_range(struct kvm_io_device *dev,
> -					gpa_t addr, int len, int is_write)
> -{
> -	return dev->ops->in_range(dev, addr, len, is_write);
> +	return dev->ops->write ? dev->ops->write(dev, addr, l, v) : -EOPNOTSUPP;
>  }
>  
>  static inline void kvm_iodevice_destructor(struct kvm_io_device *dev)
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 58d6bc6..3b19839 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -981,6 +981,7 @@ static struct kvm *kvm_create_vm(void)
>  	kvm_io_bus_init(&kvm->pio_bus);
>  	kvm_irqfd_init(kvm);
>  	mutex_init(&kvm->lock);
> +	init_rwsem(&kvm->bus_lock);
>  	mutex_init(&kvm->irq_lock);
>  	kvm_io_bus_init(&kvm->mmio_bus);
>  	init_rwsem(&kvm->slots_lock);
> @@ -2484,26 +2485,42 @@ void kvm_io_bus_destroy(struct kvm_io_bus *bus)
>  	}
>  }
>  
> -struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus,
> -					  gpa_t addr, int len, int is_write)
> +int kvm_io_bus_write(struct kvm *kvm, struct kvm_io_bus *bus, gpa_t addr,
> +		     int len, const void *val)
>  {
> -	int i;
> -
> -	for (i = 0; i < bus->dev_count; i++) {
> -		struct kvm_io_device *pos = bus->devs[i];
> -
> -		if (kvm_iodevice_in_range(pos, addr, len, is_write))
> -			return pos;
> -	}
> +	int i, r = -EOPNOTSUPP;
> +	down_read(&kvm->bus_lock);
> +	for (i = 0; i < bus->dev_count; i++)
> +		if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) {
> +			r = 0;
> +			break;
> +		}
> +	up_read(&kvm->bus_lock);
> +	return r;
> +}
>  
> -	return NULL;
> +int kvm_io_bus_read(struct kvm *kvm, struct kvm_io_bus *bus, gpa_t addr,
> +		    int len, void *val)
> +{
> +	int i, r = -EOPNOTSUPP;
> +	down_read(&kvm->bus_lock);
> +	for (i = 0; i < bus->dev_count; i++)
> +		if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) {
> +			r = 0;
> +			break;
> +		}
> +	up_read(&kvm->bus_lock);
> +	return r;
>  }
>  
> -void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev)
> +void kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus,
> +			     struct kvm_io_device *dev)
>  {
> +	down_write(&kvm->bus_lock);
>  	BUG_ON(bus->dev_count > (NR_IOBUS_DEVS-1));
>  
>  	bus->devs[bus->dev_count++] = dev;
> +	up_write(&kvm->bus_lock);
>  }
>  
>  static struct notifier_block kvm_cpu_notifier = {
> -- 
> 1.6.2.2
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ