lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1513656340.2743.3.camel@au1.ibm.com>
Date:   Tue, 19 Dec 2017 15:05:40 +1100
From:   Benjamin Herrenschmidt <benh@....ibm.com>
To:     Frederic Barrat <fbarrat@...ux.vnet.ibm.com>,
        linuxppc-dev@...ts.ozlabs.org, linux-kernel@...r.kernel.org
Cc:     arnd@...db.de, gregkh@...uxfoundation.org, mpe@...erman.id.au,
        andrew.donnellan@....ibm.com, alastair@....ibm.com,
        Cédric Le Goater <clg@...d.org>
Subject: Re: [PATCH 07/13] ocxl: Add AFU interrupt support

On Mon, 2017-12-18 at 16:21 +0100, Frederic Barrat wrote:
> Add user APIs through ioctl to allocate, free, and be notified of an
> AFU interrupt.
> 
> For opencapi, an AFU can trigger an interrupt on the host by sending a
> specific command targeting a 64-bit object handle. On POWER9, this is
> implemented by mapping a special page in the address space of a
> process and a write to that page will trigger an interrupt.

We need to figure out how that plays with KVM. +Cedric..

For all those "generic xive" interrupts, whether they are used for
OpenCAPI, plain guest IPIs, NX interrupts etc... but also for actual
pass-through ones, we'll need a mechanism to map the trigger and ESB
pages into qemu.

We can't have a bazillion VMAs and KVM memory regions either, so we'll
need some kind of mechanism/driver which allows for a single fairly
large mmap'ed VMA which can then be "populated" with interrupt control
pages.

The issue of course is that we can't really do a "generic" system that
allows to map any interrupt, it's a security issue. So we need the
interrupt "owner" to be the one allowing this. VFIO for PCI for
example, possibly a specific VFIO variant for OpenCAPI, something else
for guest IPIs ?

Food for thoughts...

Ben.

> 
> Signed-off-by: Frederic Barrat <fbarrat@...ux.vnet.ibm.com>
> ---
>  arch/powerpc/include/asm/pnv-ocxl.h   |   3 +
>  arch/powerpc/platforms/powernv/ocxl.c |  30 +++++
>  drivers/misc/ocxl/afu_irq.c           | 204 ++++++++++++++++++++++++++++++++++
>  drivers/misc/ocxl/context.c           |  40 ++++++-
>  drivers/misc/ocxl/file.c              |  33 ++++++
>  drivers/misc/ocxl/link.c              |  28 +++++
>  drivers/misc/ocxl/ocxl_internal.h     |   7 ++
>  include/uapi/misc/ocxl.h              |   9 ++
>  8 files changed, 352 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/misc/ocxl/afu_irq.c
> 
> diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h
> index 5a7ae7f28209..1e26f0a39500 100644
> --- a/arch/powerpc/include/asm/pnv-ocxl.h
> +++ b/arch/powerpc/include/asm/pnv-ocxl.h
> @@ -37,4 +37,7 @@ extern int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask,
>  extern void pnv_ocxl_spa_release(void *platform_data);
>  extern int pnv_ocxl_spa_remove_pe(void *platform_data, int pe_handle);
>  
> +extern int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr);
> +extern void pnv_ocxl_free_xive_irq(u32 irq);
> +
>  #endif /* _ASM_PVN_OCXL_H */
> diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c
> index 6c79924b95c8..96cafba6aef1 100644
> --- a/arch/powerpc/platforms/powernv/ocxl.c
> +++ b/arch/powerpc/platforms/powernv/ocxl.c
> @@ -9,6 +9,7 @@
>  
>  #include <asm/pnv-ocxl.h>
>  #include <asm/opal.h>
> +#include <asm/xive.h>
>  #include <misc/ocxl-config.h>
>  #include "pci.h"
>  
> @@ -487,3 +488,32 @@ int pnv_ocxl_spa_remove_pe(void *platform_data, int pe_handle)
>  	return rc;
>  }
>  EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe);
> +
> +int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr)
> +{
> +	__be64 flags, trigger_page;
> +	s64 rc;
> +	u32 hwirq;
> +
> +	hwirq = xive_native_alloc_irq();
> +	if (!hwirq)
> +		return -ENOENT;
> +
> +	rc = opal_xive_get_irq_info(hwirq, &flags, NULL, &trigger_page, NULL,
> +				NULL);
> +	if (rc || !trigger_page) {
> +		xive_native_free_irq(hwirq);
> +		return -ENOENT;
> +	}
> +	*irq = hwirq;
> +	*trigger_addr = be64_to_cpu(trigger_page);
> +	return 0;
> +
> +}
> +EXPORT_SYMBOL_GPL(pnv_ocxl_alloc_xive_irq);
> +
> +void pnv_ocxl_free_xive_irq(u32 irq)
> +{
> +	xive_native_free_irq(irq);
> +}
> +EXPORT_SYMBOL_GPL(pnv_ocxl_free_xive_irq);
> diff --git a/drivers/misc/ocxl/afu_irq.c b/drivers/misc/ocxl/afu_irq.c
> new file mode 100644
> index 000000000000..0b217a854837
> --- /dev/null
> +++ b/drivers/misc/ocxl/afu_irq.c
> @@ -0,0 +1,204 @@
> +/*
> + * Copyright 2017 IBM Corp.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +
> +#include <linux/interrupt.h>
> +#include <linux/eventfd.h>
> +#include <asm/pnv-ocxl.h>
> +#include "ocxl_internal.h"
> +
> +struct afu_irq {
> +	int id;
> +	int hw_irq;
> +	unsigned int virq;
> +	char *name;
> +	u64 trigger_page;
> +	struct eventfd_ctx *ev_ctx;
> +};
> +
> +static int irq_offset_to_id(struct ocxl_context *ctx, u64 offset)
> +{
> +	return (offset - ctx->afu->irq_base_offset) >> PAGE_SHIFT;
> +}
> +
> +static u64 irq_id_to_offset(struct ocxl_context *ctx, int id)
> +{
> +	return ctx->afu->irq_base_offset + (id << PAGE_SHIFT);
> +}
> +
> +static irqreturn_t afu_irq_handler(int virq, void *data)
> +{
> +	struct afu_irq *irq = (struct afu_irq *) data;
> +
> +	if (irq->ev_ctx)
> +		eventfd_signal(irq->ev_ctx, 1);
> +	return IRQ_HANDLED;
> +}
> +
> +static int setup_afu_irq(struct ocxl_context *ctx, struct afu_irq *irq)
> +{
> +	int rc;
> +
> +	irq->virq = irq_create_mapping(NULL, irq->hw_irq);
> +	if (!irq->virq) {
> +		pr_err("irq_create_mapping failed\n");
> +		return -ENOMEM;
> +	}
> +	pr_debug("hw_irq %d mapped to virq %u\n", irq->hw_irq, irq->virq);
> +
> +	irq->name = kasprintf(GFP_KERNEL, "ocxl-afu-%u", irq->virq);
> +	if (!irq->name) {
> +		irq_dispose_mapping(irq->virq);
> +		return -ENOMEM;
> +	}
> +
> +	rc = request_irq(irq->virq, afu_irq_handler, 0, irq->name, irq);
> +	if (rc) {
> +		kfree(irq->name);
> +		irq->name = NULL;
> +		irq_dispose_mapping(irq->virq);
> +		pr_err("request_irq failed: %d\n", rc);
> +		return rc;
> +	}
> +	return 0;
> +}
> +
> +static void release_afu_irq(struct afu_irq *irq)
> +{
> +	free_irq(irq->virq, irq);
> +	irq_dispose_mapping(irq->virq);
> +	kfree(irq->name);
> +}
> +
> +int ocxl_afu_irq_alloc(struct ocxl_context *ctx, u64 *irq_offset)
> +{
> +	struct afu_irq *irq;
> +	int rc;
> +
> +	irq = kzalloc(sizeof(struct afu_irq), GFP_KERNEL);
> +	if (!irq)
> +		return -ENOMEM;
> +
> +	/*
> +	 * We limit the number of afu irqs per context and per link to
> +	 * avoid a single process or user depleting the pool of IPIs
> +	 */
> +
> +	mutex_lock(&ctx->irq_lock);
> +
> +	irq->id = idr_alloc(&ctx->irq_idr, irq, 0, MAX_IRQ_PER_CONTEXT,
> +			GFP_KERNEL);
> +	if (irq->id < 0) {
> +		rc = -ENOSPC;
> +		goto err_unlock;
> +	}
> +
> +	rc = ocxl_link_irq_alloc(ctx->afu->fn->link, &irq->hw_irq,
> +				&irq->trigger_page);
> +	if (rc)
> +		goto err_idr;
> +
> +	rc = setup_afu_irq(ctx, irq);
> +	if (rc)
> +		goto err_alloc;
> +
> +	*irq_offset = irq_id_to_offset(ctx, irq->id);
> +
> +	mutex_unlock(&ctx->irq_lock);
> +	return 0;
> +
> +err_alloc:
> +	ocxl_link_free_irq(ctx->afu->fn->link, irq->hw_irq);
> +err_idr:
> +	idr_remove(&ctx->irq_idr, irq->id);
> +err_unlock:
> +	mutex_unlock(&ctx->irq_lock);
> +	kfree(irq);
> +	return rc;
> +}
> +
> +static void afu_irq_free(struct afu_irq *irq, struct ocxl_context *ctx)
> +{
> +	if (ctx->mapping)
> +		unmap_mapping_range(ctx->mapping,
> +				irq_id_to_offset(ctx, irq->id),
> +				1 << PAGE_SHIFT, 1);
> +	release_afu_irq(irq);
> +	if (irq->ev_ctx)
> +		eventfd_ctx_put(irq->ev_ctx);
> +	ocxl_link_free_irq(ctx->afu->fn->link, irq->hw_irq);
> +	kfree(irq);
> +}
> +
> +int ocxl_afu_irq_free(struct ocxl_context *ctx, u64 irq_offset)
> +{
> +	struct afu_irq *irq;
> +	int id = irq_offset_to_id(ctx, irq_offset);
> +
> +	mutex_lock(&ctx->irq_lock);
> +
> +	irq = idr_find(&ctx->irq_idr, id);
> +	if (!irq) {
> +		mutex_unlock(&ctx->irq_lock);
> +		return -EINVAL;
> +	}
> +	idr_remove(&ctx->irq_idr, irq->id);
> +	afu_irq_free(irq, ctx);
> +	mutex_unlock(&ctx->irq_lock);
> +	return 0;
> +}
> +
> +void ocxl_afu_irq_free_all(struct ocxl_context *ctx)
> +{
> +	struct afu_irq *irq;
> +	int id;
> +
> +	mutex_lock(&ctx->irq_lock);
> +	idr_for_each_entry(&ctx->irq_idr, irq, id)
> +		afu_irq_free(irq, ctx);
> +	mutex_unlock(&ctx->irq_lock);
> +}
> +
> +int ocxl_afu_irq_set_fd(struct ocxl_context *ctx, u64 irq_offset, int eventfd)
> +{
> +	struct afu_irq *irq;
> +	struct eventfd_ctx *ev_ctx;
> +	int rc = 0, id = irq_offset_to_id(ctx, irq_offset);
> +
> +	mutex_lock(&ctx->irq_lock);
> +	irq = idr_find(&ctx->irq_idr, id);
> +	if (!irq) {
> +		rc = -EINVAL;
> +		goto unlock;
> +	}
> +
> +	ev_ctx = eventfd_ctx_fdget(eventfd);
> +	if (IS_ERR(ev_ctx)) {
> +		rc = -EINVAL;
> +		goto unlock;
> +	}
> +
> +	irq->ev_ctx = ev_ctx;
> +unlock:
> +	mutex_unlock(&ctx->irq_lock);
> +	return rc;
> +}
> +
> +u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, u64 irq_offset)
> +{
> +	struct afu_irq *irq;
> +	int id = irq_offset_to_id(ctx, irq_offset);
> +	u64 addr = 0;
> +
> +	mutex_lock(&ctx->irq_lock);
> +	irq = idr_find(&ctx->irq_idr, id);
> +	if (irq)
> +		addr = irq->trigger_page;
> +	mutex_unlock(&ctx->irq_lock);
> +	return addr;
> +}
> diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c
> index 0bc0dd97d784..19575269ed22 100644
> --- a/drivers/misc/ocxl/context.c
> +++ b/drivers/misc/ocxl/context.c
> @@ -38,6 +38,8 @@ int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu,
>  	mutex_init(&ctx->mapping_lock);
>  	init_waitqueue_head(&ctx->events_wq);
>  	mutex_init(&ctx->xsl_error_lock);
> +	mutex_init(&ctx->irq_lock);
> +	idr_init(&ctx->irq_idr);
>  	/*
>  	 * Keep a reference on the AFU to make sure it's valid for the
>  	 * duration of the life of the context
> @@ -87,6 +89,19 @@ int ocxl_context_attach(struct ocxl_context *ctx, u64 amr)
>  	return rc;
>  }
>  
> +static int map_afu_irq(struct vm_area_struct *vma, unsigned long address,
> +		u64 offset, struct ocxl_context *ctx)
> +{
> +	u64 trigger_addr;
> +
> +	trigger_addr = ocxl_afu_irq_get_addr(ctx, offset);
> +	if (!trigger_addr)
> +		return VM_FAULT_SIGBUS;
> +
> +	vm_insert_pfn(vma, address, trigger_addr >> PAGE_SHIFT);
> +	return VM_FAULT_NOPAGE;
> +}
> +
>  static int map_pp_mmio(struct vm_area_struct *vma, unsigned long address,
>  		u64 offset, struct ocxl_context *ctx)
>  {
> @@ -125,7 +140,10 @@ static int ocxl_mmap_fault(struct vm_fault *vmf)
>  	pr_debug("%s: pasid %d address 0x%lx offset 0x%llx\n", __func__,
>  		ctx->pasid, vmf->address, offset);
>  
> -	rc = map_pp_mmio(vma, vmf->address, offset, ctx);
> +	if (offset < ctx->afu->irq_base_offset)
> +		rc = map_pp_mmio(vma, vmf->address, offset, ctx);
> +	else
> +		rc = map_afu_irq(vma, vmf->address, offset, ctx);
>  	return rc;
>  }
>  
> @@ -133,6 +151,19 @@ static const struct vm_operations_struct ocxl_vmops = {
>  	.fault = ocxl_mmap_fault,
>  };
>  
> +static int check_mmap_afu_irq(struct ocxl_context *ctx,
> +			struct vm_area_struct *vma)
> +{
> +	/* only one page */
> +	if (vma_pages(vma) != 1)
> +		return -EINVAL;
> +
> +	/* check offset validty */
> +	if (!ocxl_afu_irq_get_addr(ctx, vma->vm_pgoff << PAGE_SHIFT))
> +		return -EINVAL;
> +	return 0;
> +}
> +
>  static int check_mmap_mmio(struct ocxl_context *ctx,
>  			struct vm_area_struct *vma)
>  {
> @@ -146,7 +177,10 @@ int ocxl_context_mmap(struct ocxl_context *ctx, struct vm_area_struct *vma)
>  {
>  	int rc;
>  
> -	rc = check_mmap_mmio(ctx, vma);
> +	if ((vma->vm_pgoff << PAGE_SHIFT) < ctx->afu->irq_base_offset)
> +		rc = check_mmap_mmio(ctx, vma);
> +	else
> +		rc = check_mmap_afu_irq(ctx, vma);
>  	if (rc)
>  		return rc;
>  
> @@ -231,6 +265,8 @@ void ocxl_context_free(struct ocxl_context *ctx)
>  	idr_remove(&ctx->afu->contexts_idr, ctx->pasid);
>  	mutex_unlock(&ctx->afu->contexts_lock);
>  
> +	ocxl_afu_irq_free_all(ctx);
> +	idr_destroy(&ctx->irq_idr);
>  	/* reference to the AFU taken in ocxl_context_init */
>  	ocxl_afu_put(ctx->afu);
>  	kfree(ctx);
> diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c
> index a51386eff4f5..0a73e2c11ba6 100644
> --- a/drivers/misc/ocxl/file.c
> +++ b/drivers/misc/ocxl/file.c
> @@ -110,12 +110,17 @@ static long afu_ioctl_attach(struct ocxl_context *ctx,
>  }
>  
>  #define CMD_STR(x) (x == OCXL_IOCTL_ATTACH ? "ATTACH" :			\
> +			x == OCXL_IOCTL_IRQ_ALLOC ? "IRQ_ALLOC" :	\
> +			x == OCXL_IOCTL_IRQ_FREE ? "IRQ_FREE" :		\
> +			x == OCXL_IOCTL_IRQ_SET_FD ? "IRQ_SET_FD" :	\
>  			"UNKNOWN")
>  
>  static long afu_ioctl(struct file *file, unsigned int cmd,
>  		unsigned long args)
>  {
>  	struct ocxl_context *ctx = file->private_data;
> +	struct ocxl_ioctl_irq_fd irq_fd;
> +	u64 irq_offset;
>  	long rc;
>  
>  	pr_debug("%s for context %d, command %s\n", __func__, ctx->pasid,
> @@ -130,6 +135,34 @@ static long afu_ioctl(struct file *file, unsigned int cmd,
>  				(struct ocxl_ioctl_attach __user *) args);
>  		break;
>  
> +	case OCXL_IOCTL_IRQ_ALLOC:
> +		rc = ocxl_afu_irq_alloc(ctx, &irq_offset);
> +		if (!rc) {
> +			rc = copy_to_user((u64 *) args, &irq_offset,
> +					sizeof(irq_offset));
> +			if (rc)
> +				ocxl_afu_irq_free(ctx, irq_offset);
> +		}
> +		break;
> +
> +	case OCXL_IOCTL_IRQ_FREE:
> +		rc = copy_from_user(&irq_offset, (u64 *) args,
> +				sizeof(irq_offset));
> +		if (rc)
> +			return -EFAULT;
> +		rc = ocxl_afu_irq_free(ctx, irq_offset);
> +		break;
> +
> +	case OCXL_IOCTL_IRQ_SET_FD:
> +		rc = copy_from_user(&irq_fd, (u64 *) args, sizeof(irq_fd));
> +		if (rc)
> +			return -EFAULT;
> +		if (irq_fd.reserved)
> +			return -EINVAL;
> +		rc = ocxl_afu_irq_set_fd(ctx, irq_fd.irq_offset,
> +					irq_fd.eventfd);
> +		break;
> +
>  	default:
>  		rc = -EINVAL;
>  	}
> diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c
> index 6b184cd7d2a6..5f12564eea99 100644
> --- a/drivers/misc/ocxl/link.c
> +++ b/drivers/misc/ocxl/link.c
> @@ -608,3 +608,31 @@ int ocxl_link_remove_pe(void *link_handle, int pasid)
>  	mutex_unlock(&spa->spa_lock);
>  	return rc;
>  }
> +
> +int ocxl_link_irq_alloc(void *link_handle, int *hw_irq, u64 *trigger_addr)
> +{
> +	struct link *link = (struct link *) link_handle;
> +	int rc, irq;
> +	u64 addr;
> +
> +	if (atomic_dec_if_positive(&link->irq_available) < 0)
> +		return -ENOSPC;
> +
> +	rc = pnv_ocxl_alloc_xive_irq(&irq, &addr);
> +	if (rc) {
> +		atomic_inc(&link->irq_available);
> +		return rc;
> +	}
> +
> +	*hw_irq = irq;
> +	*trigger_addr = addr;
> +	return 0;
> +}
> +
> +void ocxl_link_free_irq(void *link_handle, int hw_irq)
> +{
> +	struct link *link = (struct link *) link_handle;
> +
> +	pnv_ocxl_free_xive_irq(hw_irq);
> +	atomic_inc(&link->irq_available);
> +}
> diff --git a/drivers/misc/ocxl/ocxl_internal.h b/drivers/misc/ocxl/ocxl_internal.h
> index e07f7d523275..829369c5f004 100644
> --- a/drivers/misc/ocxl/ocxl_internal.h
> +++ b/drivers/misc/ocxl/ocxl_internal.h
> @@ -197,4 +197,11 @@ extern void ocxl_context_free(struct ocxl_context *ctx);
>  extern int ocxl_sysfs_add_afu(struct ocxl_afu *afu);
>  extern void ocxl_sysfs_remove_afu(struct ocxl_afu *afu);
>  
> +extern int ocxl_afu_irq_alloc(struct ocxl_context *ctx, u64 *irq_offset);
> +extern int ocxl_afu_irq_free(struct ocxl_context *ctx, u64 irq_offset);
> +extern void ocxl_afu_irq_free_all(struct ocxl_context *ctx);
> +extern int ocxl_afu_irq_set_fd(struct ocxl_context *ctx, u64 irq_offset,
> +			int eventfd);
> +extern u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, u64 irq_offset);
> +
>  #endif /* _OCXL_INTERNAL_H_ */
> diff --git a/include/uapi/misc/ocxl.h b/include/uapi/misc/ocxl.h
> index 71fa387f2efd..488e75228c33 100644
> --- a/include/uapi/misc/ocxl.h
> +++ b/include/uapi/misc/ocxl.h
> @@ -39,9 +39,18 @@ struct ocxl_ioctl_attach {
>  	__u64 reserved3;
>  };
>  
> +struct ocxl_ioctl_irq_fd {
> +	__u64 irq_offset;
> +	__s32 eventfd;
> +	__u32 reserved;
> +};
> +
>  /* ioctl numbers */
>  #define OCXL_MAGIC 0xCA
>  /* AFU devices */
>  #define OCXL_IOCTL_ATTACH	_IOW(OCXL_MAGIC, 0x10, struct ocxl_ioctl_attach)
> +#define OCXL_IOCTL_IRQ_ALLOC	_IOR(OCXL_MAGIC, 0x11, __u64)
> +#define OCXL_IOCTL_IRQ_FREE	_IOW(OCXL_MAGIC, 0x12, __u64)
> +#define OCXL_IOCTL_IRQ_SET_FD	_IOW(OCXL_MAGIC, 0x13, struct ocxl_ioctl_irq_fd)
>  
>  #endif /* _UAPI_MISC_OCXL_H */

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ