lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date:   Thu, 28 Jan 2021 18:03:33 +0800
From:   Shuo A Liu <shuo.a.liu@...el.com>
To:     Greg Kroah-Hartman <gregkh@...uxfoundation.org>
Cc:     linux-kernel@...r.kernel.org, x86@...nel.org,
        "H . Peter Anvin" <hpa@...or.com>,
        Thomas Gleixner <tglx@...utronix.de>,
        Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
        Yu Wang <yu1.wang@...el.com>,
        Reinette Chatre <reinette.chatre@...el.com>,
        Zhi Wang <zhi.a.wang@...el.com>,
        Zhenyu Wang <zhenyuw@...ux.intel.com>
Subject: Re: [PATCH v8 15/18] virt: acrn: Introduce ioeventfd

On Wed 27.Jan'21 at 15:20:34 +0100, Greg Kroah-Hartman wrote:
>On Fri, Jan 22, 2021 at 07:01:19PM +0800, shuo.a.liu@...el.com wrote:
>> From: Shuo Liu <shuo.a.liu@...el.com>
>>
>> ioeventfd is a mechanism to register PIO/MMIO regions to trigger an
>> eventfd signal when written to by a User VM. ACRN userspace can register
>> any arbitrary I/O address with a corresponding eventfd and then pass the
>> eventfd to a specific end-point of interest for handling.
>>
>> Vhost is a kernel-level virtio server which uses eventfd for signalling.
>> To support vhost on ACRN, ioeventfd is introduced in HSM.
>>
>> A new I/O client dedicated to ioeventfd is associated with a User VM
>> during VM creation. HSM provides ioctls to associate an I/O region with
>> a eventfd. The I/O client signals a eventfd once its corresponding I/O
>> region is matched with an I/O request.
>>
>> Signed-off-by: Shuo Liu <shuo.a.liu@...el.com>
>> Reviewed-by: Zhi Wang <zhi.a.wang@...el.com>
>> Reviewed-by: Reinette Chatre <reinette.chatre@...el.com>
>> Cc: Zhi Wang <zhi.a.wang@...el.com>
>> Cc: Zhenyu Wang <zhenyuw@...ux.intel.com>
>> Cc: Yu Wang <yu1.wang@...el.com>
>> Cc: Reinette Chatre <reinette.chatre@...el.com>
>> Cc: Greg Kroah-Hartman <gregkh@...uxfoundation.org>
>> ---
>>  drivers/virt/acrn/Kconfig     |   1 +
>>  drivers/virt/acrn/Makefile    |   2 +-
>>  drivers/virt/acrn/acrn_drv.h  |  10 ++
>>  drivers/virt/acrn/hsm.c       |   8 +
>>  drivers/virt/acrn/ioeventfd.c | 273 ++++++++++++++++++++++++++++++++++
>>  drivers/virt/acrn/vm.c        |   2 +
>>  include/uapi/linux/acrn.h     |  29 ++++
>>  7 files changed, 324 insertions(+), 1 deletion(-)
>>  create mode 100644 drivers/virt/acrn/ioeventfd.c
>>
>> diff --git a/drivers/virt/acrn/Kconfig b/drivers/virt/acrn/Kconfig
>> index 36c80378c30c..3e1a61c9d8d8 100644
>> --- a/drivers/virt/acrn/Kconfig
>> +++ b/drivers/virt/acrn/Kconfig
>> @@ -2,6 +2,7 @@
>>  config ACRN_HSM
>>  	tristate "ACRN Hypervisor Service Module"
>>  	depends on ACRN_GUEST
>> +	select EVENTFD
>>  	help
>>  	  ACRN Hypervisor Service Module (HSM) is a kernel module which
>>  	  communicates with ACRN userspace through ioctls and talks to
>> diff --git a/drivers/virt/acrn/Makefile b/drivers/virt/acrn/Makefile
>> index 21721cbf6a80..755b583b32ca 100644
>> --- a/drivers/virt/acrn/Makefile
>> +++ b/drivers/virt/acrn/Makefile
>> @@ -1,3 +1,3 @@
>>  # SPDX-License-Identifier: GPL-2.0
>>  obj-$(CONFIG_ACRN_HSM)	:= acrn.o
>> -acrn-y := hsm.o vm.o mm.o ioreq.o
>> +acrn-y := hsm.o vm.o mm.o ioreq.o ioeventfd.o
>> diff --git a/drivers/virt/acrn/acrn_drv.h b/drivers/virt/acrn/acrn_drv.h
>> index 8a7d7721f505..e3f8190bd972 100644
>> --- a/drivers/virt/acrn/acrn_drv.h
>> +++ b/drivers/virt/acrn/acrn_drv.h
>> @@ -156,6 +156,9 @@ extern rwlock_t acrn_vm_list_lock;
>>   * @ioreq_page:			The page of the I/O request shared buffer
>>   * @pci_conf_addr:		Address of a PCI configuration access emulation
>>   * @monitor_page:		Page of interrupt statistics of User VM
>> + * @ioeventfds_lock:		Lock to protect ioeventfds list
>> + * @ioeventfds:			List to link all hsm_ioeventfd
>> + * @ioeventfd_client:		I/O client for ioeventfds of the VM
>>   */
>>  struct acrn_vm {
>>  	struct list_head		list;
>> @@ -172,6 +175,9 @@ struct acrn_vm {
>>  	struct page			*ioreq_page;
>>  	u32				pci_conf_addr;
>>  	struct page			*monitor_page;
>> +	struct mutex			ioeventfds_lock;
>> +	struct list_head		ioeventfds;
>> +	struct acrn_ioreq_client	*ioeventfd_client;
>>  };
>>
>>  struct acrn_vm *acrn_vm_create(struct acrn_vm *vm,
>> @@ -204,4 +210,8 @@ void acrn_ioreq_range_del(struct acrn_ioreq_client *client,
>>
>>  int acrn_msi_inject(struct acrn_vm *vm, u64 msi_addr, u64 msi_data);
>>
>> +int acrn_ioeventfd_init(struct acrn_vm *vm);
>> +int acrn_ioeventfd_config(struct acrn_vm *vm, struct acrn_ioeventfd *args);
>> +void acrn_ioeventfd_deinit(struct acrn_vm *vm);
>> +
>>  #endif /* __ACRN_HSM_DRV_H */
>> diff --git a/drivers/virt/acrn/hsm.c b/drivers/virt/acrn/hsm.c
>> index 449a28acb219..2cb5e5fe1b7a 100644
>> --- a/drivers/virt/acrn/hsm.c
>> +++ b/drivers/virt/acrn/hsm.c
>> @@ -111,6 +111,7 @@ static long acrn_dev_ioctl(struct file *filp, unsigned int cmd,
>>  	struct acrn_vcpu_regs *cpu_regs;
>>  	struct acrn_ioreq_notify notify;
>>  	struct acrn_ptdev_irq *irq_info;
>> +	struct acrn_ioeventfd ioeventfd;
>>  	struct acrn_vm_memmap memmap;
>>  	struct acrn_msi_entry *msi;
>>  	struct acrn_pcidev *pcidev;
>> @@ -332,6 +333,13 @@ static long acrn_dev_ioctl(struct file *filp, unsigned int cmd,
>>
>>  		ret = pmcmd_ioctl(cstate_cmd, (void __user *)ioctl_param);
>>  		break;
>> +	case ACRN_IOCTL_IOEVENTFD:
>> +		if (copy_from_user(&ioeventfd, (void __user *)ioctl_param,
>> +				   sizeof(ioeventfd)))
>> +			return -EFAULT;
>> +
>> +		ret = acrn_ioeventfd_config(vm, &ioeventfd);
>> +		break;
>>  	default:
>>  		dev_dbg(acrn_dev.this_device, "Unknown IOCTL 0x%x!\n", cmd);
>>  		ret = -ENOTTY;
>> diff --git a/drivers/virt/acrn/ioeventfd.c b/drivers/virt/acrn/ioeventfd.c
>> new file mode 100644
>> index 000000000000..ac4037e9f947
>> --- /dev/null
>> +++ b/drivers/virt/acrn/ioeventfd.c
>> @@ -0,0 +1,273 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * ACRN HSM eventfd - use eventfd objects to signal expected I/O requests
>> + *
>> + * Copyright (C) 2020 Intel Corporation. All rights reserved.
>> + *
>> + * Authors:
>> + *	Shuo Liu <shuo.a.liu@...el.com>
>> + *	Yakui Zhao <yakui.zhao@...el.com>
>> + */
>> +
>> +#include <linux/eventfd.h>
>> +#include <linux/slab.h>
>> +
>> +#include "acrn_drv.h"
>> +
>> +/**
>> + * struct hsm_ioeventfd - Properties of HSM ioeventfd
>> + * @list:	Entry within &acrn_vm.ioeventfds of ioeventfds of a VM
>> + * @eventfd:	Eventfd of the HSM ioeventfd
>> + * @addr:	Address of I/O range
>> + * @data:	Data for matching
>> + * @length:	Length of I/O range
>> + * @type:	Type of I/O range (ACRN_IOREQ_TYPE_MMIO/ACRN_IOREQ_TYPE_PORTIO)
>> + * @wildcard:	Data matching or not
>> + */
>> +struct hsm_ioeventfd {
>> +	struct list_head	list;
>> +	struct eventfd_ctx	*eventfd;
>> +	u64			addr;
>> +	u64			data;
>> +	int			length;
>> +	int			type;
>> +	bool			wildcard;
>> +};
>> +
>> +static inline int ioreq_type_from_flags(int flags)
>> +{
>> +	return flags & ACRN_IOEVENTFD_FLAG_PIO ?
>> +		       ACRN_IOREQ_TYPE_PORTIO : ACRN_IOREQ_TYPE_MMIO;
>> +}
>> +
>> +static void acrn_ioeventfd_shutdown(struct acrn_vm *vm, struct hsm_ioeventfd *p)
>> +{
>> +	lockdep_assert_held(&vm->ioeventfds_lock);
>> +
>> +	eventfd_ctx_put(p->eventfd);
>> +	list_del(&p->list);
>> +	kfree(p);
>> +}
>> +
>> +static bool hsm_ioeventfd_is_conflict(struct acrn_vm *vm,
>> +				      struct hsm_ioeventfd *ioeventfd)
>> +{
>> +	struct hsm_ioeventfd *p;
>> +
>> +	lockdep_assert_held(&vm->ioeventfds_lock);
>> +
>> +	/* Either one is wildcard, the data matching will be skipped. */
>> +	list_for_each_entry(p, &vm->ioeventfds, list)
>> +		if (p->eventfd == ioeventfd->eventfd &&
>> +		    p->addr == ioeventfd->addr &&
>> +		    p->type == ioeventfd->type &&
>> +		    (p->wildcard || ioeventfd->wildcard ||
>> +			p->data == ioeventfd->data))
>> +			return true;
>> +
>> +	return false;
>> +}
>> +
>> +/*
>> + * Assign an eventfd to a VM and create a HSM ioeventfd associated with the
>> + * eventfd. The properties of the HSM ioeventfd are built from a &struct
>> + * acrn_ioeventfd.
>> + */
>> +static int acrn_ioeventfd_assign(struct acrn_vm *vm,
>> +				 struct acrn_ioeventfd *args)
>> +{
>> +	struct eventfd_ctx *eventfd;
>> +	struct hsm_ioeventfd *p;
>> +	int ret;
>> +
>> +	/* Check for range overflow */
>> +	if (args->addr + args->len < args->addr)
>> +		return -EINVAL;
>> +
>> +	/*
>> +	 * Currently, acrn_ioeventfd is used to support vhost. 1,2,4,8 width
>> +	 * accesses can cover vhost's requirements.
>> +	 */
>> +	if (!(args->len == 1 || args->len == 2 ||
>> +	      args->len == 4 || args->len == 8))
>> +		return -EINVAL;
>> +
>> +	eventfd = eventfd_ctx_fdget(args->fd);
>> +	if (IS_ERR(eventfd))
>> +		return PTR_ERR(eventfd);
>> +
>> +	p = kzalloc(sizeof(*p), GFP_KERNEL);
>> +	if (!p) {
>> +		ret = -ENOMEM;
>> +		goto fail;
>> +	}
>> +
>> +	INIT_LIST_HEAD(&p->list);
>> +	p->addr = args->addr;
>> +	p->length = args->len;
>> +	p->eventfd = eventfd;
>> +	p->type = ioreq_type_from_flags(args->flags);
>> +
>> +	/*
>> +	 * ACRN_IOEVENTFD_FLAG_DATAMATCH flag is set in virtio 1.0 support, the
>> +	 * writing of notification register of each virtqueue may trigger the
>> +	 * notification. There is no data matching requirement.
>> +	 */
>> +	if (args->flags & ACRN_IOEVENTFD_FLAG_DATAMATCH)
>> +		p->data = args->data;
>> +	else
>> +		p->wildcard = true;
>> +
>> +	mutex_lock(&vm->ioeventfds_lock);
>> +
>> +	if (hsm_ioeventfd_is_conflict(vm, p)) {
>> +		ret = -EEXIST;
>> +		goto unlock_fail;
>> +	}
>> +
>> +	/* register the I/O range into ioreq client */
>> +	ret = acrn_ioreq_range_add(vm->ioeventfd_client, p->type,
>> +				   p->addr, p->addr + p->length - 1);
>> +	if (ret < 0)
>> +		goto unlock_fail;
>> +
>> +	list_add_tail(&p->list, &vm->ioeventfds);
>> +	mutex_unlock(&vm->ioeventfds_lock);
>> +
>> +	return 0;
>> +
>> +unlock_fail:
>> +	mutex_unlock(&vm->ioeventfds_lock);
>> +	kfree(p);
>> +fail:
>> +	eventfd_ctx_put(eventfd);
>> +	return ret;
>> +}
>> +
>> +static int acrn_ioeventfd_deassign(struct acrn_vm *vm,
>> +				   struct acrn_ioeventfd *args)
>> +{
>> +	struct hsm_ioeventfd *p;
>> +	struct eventfd_ctx *eventfd;
>> +
>> +	eventfd = eventfd_ctx_fdget(args->fd);
>> +	if (IS_ERR(eventfd))
>> +		return PTR_ERR(eventfd);
>> +
>> +	mutex_lock(&vm->ioeventfds_lock);
>> +	list_for_each_entry(p, &vm->ioeventfds, list) {
>> +		if (p->eventfd != eventfd)
>> +			continue;
>> +
>> +		acrn_ioreq_range_del(vm->ioeventfd_client, p->type,
>> +				     p->addr, p->addr + p->length - 1);
>> +		acrn_ioeventfd_shutdown(vm, p);
>> +		break;
>> +	}
>> +	mutex_unlock(&vm->ioeventfds_lock);
>> +
>> +	eventfd_ctx_put(eventfd);
>> +	return 0;
>> +}
>> +
>> +static struct hsm_ioeventfd *hsm_ioeventfd_match(struct acrn_vm *vm, u64 addr,
>> +						 u64 data, int len, int type)
>> +{
>> +	struct hsm_ioeventfd *p = NULL;
>> +
>> +	lockdep_assert_held(&vm->ioeventfds_lock);
>> +
>> +	list_for_each_entry(p, &vm->ioeventfds, list) {
>> +		if (p->type == type && p->addr == addr && p->length >= len &&
>> +		    (p->wildcard || p->data == data))
>> +			return p;
>> +	}
>> +
>> +	return NULL;
>> +}
>> +
>> +static int acrn_ioeventfd_handler(struct acrn_ioreq_client *client,
>> +				  struct acrn_io_request *req)
>> +{
>> +	struct hsm_ioeventfd *p;
>> +	u64 addr, val;
>> +	int size;
>> +
>> +	if (req->type == ACRN_IOREQ_TYPE_MMIO) {
>> +		/*
>> +		 * I/O requests are dispatched by range check only, so a
>> +		 * acrn_ioreq_client need process both READ and WRITE accesses
>> +		 * of same range. READ accesses are safe to be ignored here
>> +		 * because virtio PCI devices write the notify registers for
>> +		 * notification.
>> +		 */
>> +		if (req->reqs.mmio_request.direction == ACRN_IOREQ_DIR_READ) {
>> +			/* reading does nothing and return 0 */
>> +			req->reqs.mmio_request.value = 0;
>> +			return 0;
>> +		}
>> +		addr = req->reqs.mmio_request.address;
>> +		size = req->reqs.mmio_request.size;
>> +		val = req->reqs.mmio_request.value;
>> +	} else {
>> +		if (req->reqs.pio_request.direction == ACRN_IOREQ_DIR_READ) {
>> +			/* reading does nothing and return 0 */
>> +			req->reqs.pio_request.value = 0;
>> +			return 0;
>> +		}
>> +		addr = req->reqs.pio_request.address;
>> +		size = req->reqs.pio_request.size;
>> +		val = req->reqs.pio_request.value;
>> +	}
>> +
>> +	mutex_lock(&client->vm->ioeventfds_lock);
>> +	p = hsm_ioeventfd_match(client->vm, addr, val, size, req->type);
>> +	if (p)
>> +		eventfd_signal(p->eventfd, 1);
>> +	mutex_unlock(&client->vm->ioeventfds_lock);
>> +
>> +	return 0;
>> +}
>> +
>> +int acrn_ioeventfd_config(struct acrn_vm *vm, struct acrn_ioeventfd *args)
>> +{
>> +	int ret;
>> +
>> +	if (args->flags & ACRN_IOEVENTFD_FLAG_DEASSIGN)
>> +		ret = acrn_ioeventfd_deassign(vm, args);
>> +	else
>> +		ret = acrn_ioeventfd_assign(vm, args);
>> +
>> +	return ret;
>> +}
>> +
>> +int acrn_ioeventfd_init(struct acrn_vm *vm)
>> +{
>> +	char name[ACRN_NAME_LEN];
>> +
>> +	mutex_init(&vm->ioeventfds_lock);
>> +	INIT_LIST_HEAD(&vm->ioeventfds);
>> +	snprintf(name, sizeof(name), "ioeventfd-%u", vm->vmid);
>> +	vm->ioeventfd_client = acrn_ioreq_client_create(vm,
>> +							acrn_ioeventfd_handler,
>> +							NULL, false, name);
>> +	if (!vm->ioeventfd_client) {
>> +		dev_err(acrn_dev.this_device, "Failed to create ioeventfd ioreq client!\n");
>> +		return -EINVAL;
>> +	}
>> +
>> +	dev_dbg(acrn_dev.this_device, "VM %u ioeventfd init.\n", vm->vmid);
>> +	return 0;
>> +}
>> +
>> +void acrn_ioeventfd_deinit(struct acrn_vm *vm)
>> +{
>> +	struct hsm_ioeventfd *p, *next;
>> +
>> +	dev_dbg(acrn_dev.this_device, "VM %u ioeventfd deinit.\n", vm->vmid);
>> +	acrn_ioreq_client_destroy(vm->ioeventfd_client);
>> +	mutex_lock(&vm->ioeventfds_lock);
>> +	list_for_each_entry_safe(p, next, &vm->ioeventfds, list)
>> +		acrn_ioeventfd_shutdown(vm, p);
>> +	mutex_unlock(&vm->ioeventfds_lock);
>> +}
>> diff --git a/drivers/virt/acrn/vm.c b/drivers/virt/acrn/vm.c
>> index db597f27690a..2b322d956a8c 100644
>> --- a/drivers/virt/acrn/vm.c
>> +++ b/drivers/virt/acrn/vm.c
>> @@ -50,6 +50,7 @@ struct acrn_vm *acrn_vm_create(struct acrn_vm *vm,
>>  	list_add(&vm->list, &acrn_vm_list);
>>  	write_unlock_bh(&acrn_vm_list_lock);
>>
>> +	acrn_ioeventfd_init(vm);
>>  	dev_dbg(acrn_dev.this_device, "VM %u created.\n", vm->vmid);
>>  	return vm;
>>  }
>> @@ -67,6 +68,7 @@ int acrn_vm_destroy(struct acrn_vm *vm)
>>  	list_del_init(&vm->list);
>>  	write_unlock_bh(&acrn_vm_list_lock);
>>
>> +	acrn_ioeventfd_deinit(vm);
>>  	acrn_ioreq_deinit(vm);
>>  	if (vm->monitor_page) {
>>  		put_page(vm->monitor_page);
>> diff --git a/include/uapi/linux/acrn.h b/include/uapi/linux/acrn.h
>> index f929a6264bbc..23a5b767c3e2 100644
>> --- a/include/uapi/linux/acrn.h
>> +++ b/include/uapi/linux/acrn.h
>> @@ -479,6 +479,32 @@ enum acrn_pm_cmd_type {
>>  	ACRN_PMCMD_GET_CX_DATA,
>>  };
>>
>> +#define ACRN_IOEVENTFD_FLAG_PIO		0x01
>> +#define ACRN_IOEVENTFD_FLAG_DATAMATCH	0x02
>> +#define ACRN_IOEVENTFD_FLAG_DEASSIGN	0x04
>> +/**
>> + * struct acrn_ioeventfd - Data to operate a &struct hsm_ioeventfd
>> + * @fd:		The fd of eventfd associated with a hsm_ioeventfd
>> + * @flags:	Logical-OR of ACRN_IOEVENTFD_FLAG_*
>> + * @addr:	The start address of IO range of ioeventfd
>> + * @len:	The length of IO range of ioeventfd
>> + * @reserved:	Reserved
>> + * @data:	Data for data matching
>> + *
>> + * Without flag ACRN_IOEVENTFD_FLAG_DEASSIGN, ioctl ACRN_IOCTL_IOEVENTFD
>> + * creates a &struct hsm_ioeventfd with properties originated from &struct
>> + * acrn_ioeventfd. With flag ACRN_IOEVENTFD_FLAG_DEASSIGN, ioctl
>> + * ACRN_IOCTL_IOEVENTFD destroys the &struct hsm_ioeventfd matching the fd.
>> + */
>> +struct acrn_ioeventfd {
>> +	__u32	fd;
>> +	__u32	flags;
>> +	__u64	addr;
>> +	__u32	len;
>> +	__u32	reserved;
>
>You are not checking your reserved fields for 0.
>
>I thought I asked about this before, please do so for all of your
>ioctls, otherwise they are not "reserved" at all.

OK. Sorry for leaving someones out. I will check them all.

BTW, would you like to continue to review on this version or need i
re-send a new version with above fix?

Thanks
shuo


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ