[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <d2a83144-a1f3-2125-adc2-4c6081b3f947@au1.ibm.com>
Date: Wed, 3 Jan 2018 18:30:59 +1100
From: Andrew Donnellan <andrew.donnellan@....ibm.com>
To: Frederic Barrat <fbarrat@...ux.vnet.ibm.com>,
linuxppc-dev@...ts.ozlabs.org, linux-kernel@...r.kernel.org
Cc: arnd@...db.de, gregkh@...uxfoundation.org, mpe@...erman.id.au,
alastair@....ibm.com
Subject: Re: [PATCH 06/13] ocxl: Driver code for 'generic' opencapi devices
On 19/12/17 02:21, Frederic Barrat wrote:
> Add an ocxl driver to handle generic opencapi devices. Of course, it's
> not meant to be the only opencapi driver, any device is free to
> implement its own. But if a host application only needs basic services
> like attaching to an opencapi adapter, have translation faults handled
> or allocate AFU interrupts, it should suffice.
>
> The AFU config space must follow the opencapi specification and use
> the expected vendor/device ID to be seen by the generic driver.
>
> The driver exposes the device AFUs as a char device in /dev/ocxl/
>
> Note that the driver currently doesn't handle memory attached to the
> opencapi device.
>
> Signed-off-by: Frederic Barrat <fbarrat@...ux.vnet.ibm.com>
> Signed-off-by: Andrew Donnellan <andrew.donnellan@....ibm.com>
> Signed-off-by: Alastair D'Silva <alastair@...ilva.org>
A bunch of sparse warnings we should look at. (there's a few more that
appear in later patches too)
> ---
> drivers/misc/ocxl/config.c | 718 ++++++++++++++++++++++++++++++++++++++
> drivers/misc/ocxl/context.c | 237 +++++++++++++
> drivers/misc/ocxl/file.c | 405 +++++++++++++++++++++
> drivers/misc/ocxl/link.c | 610 ++++++++++++++++++++++++++++++++
> drivers/misc/ocxl/main.c | 40 +++
> drivers/misc/ocxl/ocxl_internal.h | 200 +++++++++++
> drivers/misc/ocxl/pasid.c | 114 ++++++
> drivers/misc/ocxl/pci.c | 592 +++++++++++++++++++++++++++++++
> drivers/misc/ocxl/sysfs.c | 150 ++++++++
> include/uapi/misc/ocxl.h | 47 +++
> 10 files changed, 3113 insertions(+)
> create mode 100644 drivers/misc/ocxl/config.c
> create mode 100644 drivers/misc/ocxl/context.c
> create mode 100644 drivers/misc/ocxl/file.c
> create mode 100644 drivers/misc/ocxl/link.c
> create mode 100644 drivers/misc/ocxl/main.c
> create mode 100644 drivers/misc/ocxl/ocxl_internal.h
> create mode 100644 drivers/misc/ocxl/pasid.c
> create mode 100644 drivers/misc/ocxl/pci.c
> create mode 100644 drivers/misc/ocxl/sysfs.c
> create mode 100644 include/uapi/misc/ocxl.h
>
> diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
> new file mode 100644
> index 000000000000..bb2fde5967e2
> --- /dev/null
> +++ b/drivers/misc/ocxl/config.c
> @@ -0,0 +1,718 @@
> +/*
> + * Copyright 2017 IBM Corp.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +
> +#include <linux/pci.h>
> +#include <asm/pnv-ocxl.h>
> +#include <misc/ocxl-config.h>
> +#include "ocxl_internal.h"
> +
> +#define EXTRACT_BIT(val, bit) (!!(val & BIT(bit)))
> +#define EXTRACT_BITS(val, s, e) ((val & GENMASK(e, s)) >> s)
> +
> +#define OCXL_DVSEC_AFU_IDX_MASK GENMASK(5, 0)
> +#define OCXL_DVSEC_ACTAG_MASK GENMASK(11, 0)
> +#define OCXL_DVSEC_PASID_MASK GENMASK(19, 0)
> +#define OCXL_DVSEC_PASID_LOG_MASK GENMASK(4, 0)
> +
> +#define OCXL_DVSEC_TEMPL_VERSION 0x0
> +#define OCXL_DVSEC_TEMPL_NAME 0x4
> +#define OCXL_DVSEC_TEMPL_AFU_VERSION 0x1C
> +#define OCXL_DVSEC_TEMPL_MMIO_GLOBAL 0x20
> +#define OCXL_DVSEC_TEMPL_MMIO_GLOBAL_SZ 0x28
> +#define OCXL_DVSEC_TEMPL_MMIO_PP 0x30
> +#define OCXL_DVSEC_TEMPL_MMIO_PP_SZ 0x38
> +#define OCXL_DVSEC_TEMPL_MEM_SZ 0x3C
> +#define OCXL_DVSEC_TEMPL_WWID 0x40
> +
> +#define OCXL_MAX_AFU_PER_FUNCTION 64
> +#define OCXL_TEMPL_LEN 0x58
> +#define OCXL_TEMPL_NAME_LEN 24
> +#define OCXL_CFG_TIMEOUT 3
> +
> +static int find_dvsec(struct pci_dev *dev, int dvsec_id)
> +{
> + int vsec = 0;
> + u16 vendor, id;
> +
> + while ((vsec = pci_find_next_ext_capability(dev, vsec,
> + OCXL_EXT_CAP_ID_DVSEC))) {
> + pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET,
> + &vendor);
> + pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id);
> + if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id)
> + return vsec;
> + }
> + return 0;
> +}
> +
> +static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx)
> +{
> + int vsec = 0;
> + u16 vendor, id;
> + u8 idx;
> +
> + while ((vsec = pci_find_next_ext_capability(dev, vsec,
> + OCXL_EXT_CAP_ID_DVSEC))) {
> + pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET,
> + &vendor);
> + pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id);
> +
> + if (vendor == PCI_VENDOR_ID_IBM &&
> + id == OCXL_DVSEC_AFU_CTRL_ID) {
> + pci_read_config_byte(dev,
> + vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX,
> + &idx);
> + if (idx == afu_idx)
> + return vsec;
> + }
> + }
> + return 0;
> +}
> +
> +static int read_pasid(struct pci_dev *dev, struct ocxl_fn_config *fn)
> +{
> + u16 val;
> + int pos;
> +
> + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_PASID);
> + if (!pos) {
> + /*
> + * PASID capability is not mandatory, but there
> + * shouldn't be any AFU
> + */
> + dev_dbg(&dev->dev, "Function doesn't require any PASID\n");
> + fn->max_pasid_log = -1;
> + goto out;
> + }
> + pci_read_config_word(dev, pos + PCI_PASID_CAP, &val);
> + fn->max_pasid_log = EXTRACT_BITS(val, 8, 12);
> +
> +out:
> + dev_dbg(&dev->dev, "PASID capability:\n");
> + dev_dbg(&dev->dev, " Max PASID log = %d\n", fn->max_pasid_log);
> + return 0;
> +}
> +
> +static int read_dvsec_tl(struct pci_dev *dev, struct ocxl_fn_config *fn)
> +{
> + int pos;
> +
> + pos = find_dvsec(dev, OCXL_DVSEC_TL_ID);
> + if (!pos && PCI_FUNC(dev->devfn) == 0) {
> + dev_err(&dev->dev, "Can't find TL DVSEC\n");
> + return -ENODEV;
> + }
> + if (pos && PCI_FUNC(dev->devfn) != 0) {
> + dev_err(&dev->dev, "TL DVSEC is only allowed on function 0\n");
> + return -ENODEV;
> + }
> + fn->dvsec_tl_pos = pos;
> + return 0;
> +}
> +
> +static int read_dvsec_function(struct pci_dev *dev, struct ocxl_fn_config *fn)
> +{
> + int pos, afu_present;
> + u32 val;
> +
> + pos = find_dvsec(dev, OCXL_DVSEC_FUNC_ID);
> + if (!pos) {
> + dev_err(&dev->dev, "Can't find function DVSEC\n");
> + return -ENODEV;
> + }
> + fn->dvsec_function_pos = pos;
> +
> + pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val);
> + afu_present = EXTRACT_BIT(val, 31);
> + if (!afu_present) {
> + fn->max_afu_index = -1;
> + dev_dbg(&dev->dev, "Function doesn't define any AFU\n");
> + goto out;
> + }
> + fn->max_afu_index = EXTRACT_BITS(val, 24, 29);
> +
> +out:
> + dev_dbg(&dev->dev, "Function DVSEC:\n");
> + dev_dbg(&dev->dev, " Max AFU index = %d\n", fn->max_afu_index);
> + return 0;
> +}
> +
> +static int read_dvsec_afu_info(struct pci_dev *dev, struct ocxl_fn_config *fn)
> +{
> + int pos;
> +
> + if (fn->max_afu_index < 0) {
> + fn->dvsec_afu_info_pos = -1;
> + return 0;
> + }
> +
> + pos = find_dvsec(dev, OCXL_DVSEC_AFU_INFO_ID);
> + if (!pos) {
> + dev_err(&dev->dev, "Can't find AFU information DVSEC\n");
> + return -ENODEV;
> + }
> + fn->dvsec_afu_info_pos = pos;
> + return 0;
> +}
> +
> +static int read_dvsec_vendor(struct pci_dev *dev)
> +{
> + int pos;
> + u32 cfg, tlx, dlx;
> +
> + /*
> + * vendor specific DVSEC is optional
> + *
> + * It's currently only used on function 0 to specify the
> + * version of some logic blocks. Some older images may not
> + * even have it so we ignore any errors
> + */
> + if (PCI_FUNC(dev->devfn) != 0)
> + return 0;
> +
> + pos = find_dvsec(dev, OCXL_DVSEC_VENDOR_ID);
> + if (!pos)
> + return 0;
> +
> + pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_CFG_VERS, &cfg);
> + pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_TLX_VERS, &tlx);
> + pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_DLX_VERS, &dlx);
> +
> + dev_dbg(&dev->dev, "Vendor specific DVSEC:\n");
> + dev_dbg(&dev->dev, " CFG version = 0x%x\n", cfg);
> + dev_dbg(&dev->dev, " TLX version = 0x%x\n", tlx);
> + dev_dbg(&dev->dev, " DLX version = 0x%x\n", dlx);
> + return 0;
> +}
> +
> +static int validate_function(struct pci_dev *dev, struct ocxl_fn_config *fn)
> +{
> + if (fn->max_pasid_log == -1 && fn->max_afu_index >= 0) {
> + dev_err(&dev->dev,
> + "AFUs are defined but no PASIDs are requested\n");
> + return -EINVAL;
> + }
> +
> + if (fn->max_afu_index > OCXL_MAX_AFU_PER_FUNCTION) {
> + dev_err(&dev->dev,
> + "Max AFU index out of architectural limit (%d vs %d)\n",
> + fn->max_afu_index, OCXL_MAX_AFU_PER_FUNCTION);
> + return -EINVAL;
> + }
> + return 0;
> +}
> +
> +int ocxl_config_read_function(struct pci_dev *dev, struct ocxl_fn_config *fn)
> +{
> + int rc;
> +
> + rc = read_pasid(dev, fn);
> + if (rc) {
> + dev_err(&dev->dev, "Invalid PASID configuration: %d\n", rc);
> + return -ENODEV;
> + }
> +
> + rc = read_dvsec_tl(dev, fn);
> + if (rc) {
> + dev_err(&dev->dev,
> + "Invalid Transaction Layer DVSEC configuration: %d\n",
> + rc);
> + return -ENODEV;
> + }
> +
> + rc = read_dvsec_function(dev, fn);
> + if (rc) {
> + dev_err(&dev->dev,
> + "Invalid Function DVSEC configuration: %d\n", rc);
> + return -ENODEV;
> + }
> +
> + rc = read_dvsec_afu_info(dev, fn);
> + if (rc) {
> + dev_err(&dev->dev, "Invalid AFU configuration: %d\n", rc);
> + return -ENODEV;
> + }
> +
> + rc = read_dvsec_vendor(dev);
> + if (rc) {
> + dev_err(&dev->dev,
> + "Invalid vendor specific DVSEC configuration: %d\n",
> + rc);
> + return -ENODEV;
> + }
> +
> + rc = validate_function(dev, fn);
> + return rc;
> +}
> +
> +static int read_afu_info(struct pci_dev *dev, struct ocxl_fn_config *fn,
> + int offset, u32 *data)
> +{
> + u32 val;
> + unsigned long timeout = jiffies + (HZ * OCXL_CFG_TIMEOUT);
> + int pos = fn->dvsec_afu_info_pos;
> +
> + /* Protect 'data valid' bit */
> + if (EXTRACT_BIT(offset, 31)) {
> + dev_err(&dev->dev, "Invalid offset in AFU info DVSEC\n");
> + return -EINVAL;
> + }
> +
> + pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, offset);
> + pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, &val);
> + while (!EXTRACT_BIT(val, 31)) {
> + if (time_after_eq(jiffies, timeout)) {
> + dev_err(&dev->dev,
> + "Timeout while reading AFU info DVSEC (offset=%d)\n",
> + offset);
> + return -EBUSY;
> + }
> + cpu_relax();
> + pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, &val);
> + }
> + pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_DATA, data);
> + return 0;
> +}
> +
> +int ocxl_config_check_afu_index(struct pci_dev *dev,
> + struct ocxl_fn_config *fn, int afu_idx)
> +{
> + u32 val;
> + int rc, templ_major, templ_minor, len;
> +
> + pci_write_config_word(dev, fn->dvsec_afu_info_pos, afu_idx);
> + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_VERSION, &val);
> + if (rc)
> + return rc;
> +
> + /* AFU index map can have holes */
> + if (!val)
> + return 0;
> +
> + templ_major = EXTRACT_BITS(val, 8, 15);
> + templ_minor = EXTRACT_BITS(val, 0, 7);
> + dev_dbg(&dev->dev, "AFU descriptor template version %d.%d\n",
> + templ_major, templ_minor);
> +
> + len = EXTRACT_BITS(val, 16, 31);
> + if (len != OCXL_TEMPL_LEN) {
> + dev_warn(&dev->dev,
> + "Unexpected template length in AFU information (%#x)\n",
> + len);
> + }
> + return 1;
> +}
> +
> +static int read_afu_name(struct pci_dev *dev, struct ocxl_fn_config *fn,
> + struct ocxl_afu_config *afu)
> +{
> + int i, rc;
> + u32 val, *ptr;
> +
> + BUILD_BUG_ON(OCXL_AFU_NAME_SZ < OCXL_TEMPL_NAME_LEN);
> + for (i = 0; i < OCXL_TEMPL_NAME_LEN; i += 4) {
> + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_NAME + i, &val);
> + if (rc)
> + return rc;
> + ptr = (u32 *) &afu->name[i];
> + *ptr = val;
> + }
> + afu->name[OCXL_AFU_NAME_SZ - 1] = '\0'; /* play safe */
> + return 0;
> +}
> +
> +static int read_afu_mmio(struct pci_dev *dev, struct ocxl_fn_config *fn,
> + struct ocxl_afu_config *afu)
> +{
> + int rc;
> + u32 val;
> +
> + /*
> + * Global MMIO
> + */
> + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL, &val);
> + if (rc)
> + return rc;
> + afu->global_mmio_bar = EXTRACT_BITS(val, 0, 2);
> + afu->global_mmio_offset = EXTRACT_BITS(val, 16, 31) << 16;
> +
> + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL + 4, &val);
> + if (rc)
> + return rc;
> + afu->global_mmio_offset += (u64) val << 32;
> +
> + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL_SZ, &val);
> + if (rc)
> + return rc;
> + afu->global_mmio_size = val;
> +
> + /*
> + * Per-process MMIO
> + */
> + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP, &val);
> + if (rc)
> + return rc;
> + afu->pp_mmio_bar = EXTRACT_BITS(val, 0, 2);
> + afu->pp_mmio_offset = EXTRACT_BITS(val, 16, 31) << 16;
> +
> + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP + 4, &val);
> + if (rc)
> + return rc;
> + afu->pp_mmio_offset += (u64) val << 32;
> +
> + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP_SZ, &val);
> + if (rc)
> + return rc;
> + afu->pp_mmio_stride = val;
> +
> + return 0;
> +}
> +
> +static int read_afu_control(struct pci_dev *dev, struct ocxl_afu_config *afu)
> +{
> + int pos;
> + u8 val8;
> + u16 val16;
> +
> + pos = find_dvsec_afu_ctrl(dev, afu->idx);
> + if (!pos) {
> + dev_err(&dev->dev, "Can't find AFU control DVSEC for AFU %d\n",
> + afu->idx);
> + return -ENODEV;
> + }
> + afu->dvsec_afu_control_pos = pos;
> +
> + pci_read_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_SUP, &val8);
> + afu->pasid_supported_log = EXTRACT_BITS(val8, 0, 4);
> +
> + pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP, &val16);
> + afu->actag_supported = EXTRACT_BITS(val16, 0, 11);
> + return 0;
> +}
> +
> +static bool char_allowed(int c)
> +{
> + /*
> + * Permitted Characters : Alphanumeric, hyphen, underscore, comma
> + */
> + if ((c >= 0x30 && c <= 0x39) /* digits */ ||
> + (c >= 0x41 && c <= 0x5A) /* upper case */ ||
> + (c >= 0x61 && c <= 0x7A) /* lower case */ ||
> + c == 0 /* NULL */ ||
> + c == 0x2D /* - */ ||
> + c == 0x5F /* _ */ ||
> + c == 0x2C /* , */)
> + return true;
> + return false;
> +}
> +
> +static int validate_afu(struct pci_dev *dev, struct ocxl_afu_config *afu)
> +{
> + int i;
> +
> + if (!afu->name[0]) {
> + dev_err(&dev->dev, "Empty AFU name\n");
> + return -EINVAL;
> + }
> + for (i = 0; i < OCXL_TEMPL_NAME_LEN; i++) {
> + if (!char_allowed(afu->name[i])) {
> + dev_err(&dev->dev,
> + "Invalid character in AFU name\n");
> + return -EINVAL;
> + }
> + }
> +
> + if (afu->global_mmio_bar != 0 &&
> + afu->global_mmio_bar != 2 &&
> + afu->global_mmio_bar != 4) {
> + dev_err(&dev->dev, "Invalid global MMIO bar number\n");
> + return -EINVAL;
> + }
> + if (afu->pp_mmio_bar != 0 &&
> + afu->pp_mmio_bar != 2 &&
> + afu->pp_mmio_bar != 4) {
> + dev_err(&dev->dev, "Invalid per-process MMIO bar number\n");
> + return -EINVAL;
> + }
> + return 0;
> +}
> +
> +int ocxl_config_read_afu(struct pci_dev *dev, struct ocxl_fn_config *fn,
> + struct ocxl_afu_config *afu, u8 afu_idx)
> +{
> + int rc;
> + u32 val32;
> +
> + /*
> + * First, we need to write the AFU idx for the AFU we want to
> + * access.
> + */
> + WARN_ON((afu_idx & OCXL_DVSEC_AFU_IDX_MASK) != afu_idx);
> + afu->idx = afu_idx;
> + pci_write_config_byte(dev,
> + fn->dvsec_afu_info_pos + OCXL_DVSEC_AFU_INFO_AFU_IDX,
> + afu->idx);
> +
> + rc = read_afu_name(dev, fn, afu);
> + if (rc)
> + return rc;
> +
> + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_AFU_VERSION, &val32);
> + if (rc)
> + return rc;
> + afu->version_major = EXTRACT_BITS(val32, 24, 31);
> + afu->version_minor = EXTRACT_BITS(val32, 16, 23);
> + afu->afuc_type = EXTRACT_BITS(val32, 14, 15);
> + afu->afum_type = EXTRACT_BITS(val32, 12, 13);
> + afu->profile = EXTRACT_BITS(val32, 0, 7);
> +
> + rc = read_afu_mmio(dev, fn, afu);
> + if (rc)
> + return rc;
> +
> + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MEM_SZ, &val32);
> + if (rc)
> + return rc;
> + afu->log_mem_size = EXTRACT_BITS(val32, 0, 7);
> +
> + rc = read_afu_control(dev, afu);
> + if (rc)
> + return rc;
> +
> + dev_dbg(&dev->dev, "AFU configuration:\n");
> + dev_dbg(&dev->dev, " name = %s\n", afu->name);
> + dev_dbg(&dev->dev, " version = %d.%d\n", afu->version_major,
> + afu->version_minor);
> + dev_dbg(&dev->dev, " global mmio bar = %hhu\n", afu->global_mmio_bar);
> + dev_dbg(&dev->dev, " global mmio offset = %#llx\n",
> + afu->global_mmio_offset);
> + dev_dbg(&dev->dev, " global mmio size = %#x\n", afu->global_mmio_size);
> + dev_dbg(&dev->dev, " pp mmio bar = %hhu\n", afu->pp_mmio_bar);
> + dev_dbg(&dev->dev, " pp mmio offset = %#llx\n", afu->pp_mmio_offset);
> + dev_dbg(&dev->dev, " pp mmio stride = %#x\n", afu->pp_mmio_stride);
> + dev_dbg(&dev->dev, " mem size (log) = %hhu\n", afu->log_mem_size);
> + dev_dbg(&dev->dev, " pasid supported (log) = %u\n",
> + afu->pasid_supported_log);
> + dev_dbg(&dev->dev, " actag supported = %u\n",
> + afu->actag_supported);
> +
> + rc = validate_afu(dev, afu);
> + return rc;
> +}
> +
> +int ocxl_config_get_actag_info(struct pci_dev *dev, u16 *base, u16 *enabled,
> + u16 *supported)
> +{
> + int rc;
> +
> + /*
> + * This is really a simple wrapper for the kernel API, to
> + * avoid an external driver using ocxl as a library to call
> + * platform-dependent code
> + */
> + rc = pnv_ocxl_get_actag(dev, base, enabled, supported);
> + if (rc) {
> + dev_err(&dev->dev, "Can't get actag for device: %d\n", rc);
> + return rc;
> + }
> + return 0;
> +}
> +
> +void ocxl_config_set_afu_actag(struct pci_dev *dev, int pos, int actag_base,
> + int actag_count)
> +{
> + u16 val;
> +
> + val = actag_count & OCXL_DVSEC_ACTAG_MASK;
> + pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_EN, val);
> +
> + val = actag_base & OCXL_DVSEC_ACTAG_MASK;
> + pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_BASE, val);
> +}
> +
> +int ocxl_config_get_pasid_info(struct pci_dev *dev, int *count)
> +{
> + return pnv_ocxl_get_pasid_count(dev, count);
> +}
> +
> +void ocxl_config_set_afu_pasid(struct pci_dev *dev, int pos, int pasid_base,
> + u32 pasid_count_log)
> +{
> + u8 val8;
> + u32 val32;
> +
> + val8 = pasid_count_log & OCXL_DVSEC_PASID_LOG_MASK;
> + pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_EN, val8);
> +
> + pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_BASE,
> + &val32);
> + val32 &= ~OCXL_DVSEC_PASID_MASK;
> + val32 |= pasid_base & OCXL_DVSEC_PASID_MASK;
> + pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_BASE,
> + val32);
> +}
> +
> +void ocxl_config_set_afu_state(struct pci_dev *dev, int pos, int enable)
> +{
> + u8 val;
> +
> + pci_read_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ENABLE, &val);
> + if (enable)
> + val |= 1;
> + else
> + val &= 0xFE;
> + pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ENABLE, val);
> +}
> +
> +int ocxl_config_set_TL(struct pci_dev *dev, int tl_dvsec)
> +{
> + u32 val, *ptr32;
> + u8 timers;
> + int i, rc;
> + long recv_cap;
> + char *recv_rate;
> +
> + /*
> + * Skip on function != 0, as the TL can only be defined on 0
> + */
> + if (PCI_FUNC(dev->devfn) != 0)
> + return 0;
> +
> + recv_rate = kzalloc(PNV_OCXL_TL_RATE_BUF_SIZE, GFP_KERNEL);
> + if (!recv_rate)
> + return -ENOMEM;
> + /*
> + * The spec defines 64 templates for messages in the
> + * Transaction Layer (TL).
> + *
> + * The host and device each support a subset, so we need to
> + * configure the transmitters on each side to send only
> + * templates the receiver understands, at a rate the receiver
> + * can process. Per the spec, template 0 must be supported by
> + * everybody. That's the template which has been used by the
> + * host and device so far.
> + *
> + * The sending rate limit must be set before the template is
> + * enabled.
> + */
> +
> + /*
> + * Device -> host
> + */
> + rc = pnv_ocxl_get_tl_cap(dev, &recv_cap, recv_rate,
> + PNV_OCXL_TL_RATE_BUF_SIZE);
> + if (rc)
> + goto out;
> +
> + for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) {
> + ptr32 = (u32 *) &recv_rate[i];
> + pci_write_config_dword(dev,
> + tl_dvsec + OCXL_DVSEC_TL_SEND_RATE + i,
> + be32_to_cpu(*ptr32));
drivers/misc/ocxl/config.c:618:33: warning: cast to restricted __be32
> + }
> + val = recv_cap >> 32;
> + pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP, val);
> + val = recv_cap & GENMASK(31, 0);
> + pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP + 4, val);
> +
> + /*
> + * Host -> device
> + */
> + for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) {
> + pci_read_config_dword(dev,
> + tl_dvsec + OCXL_DVSEC_TL_RECV_RATE + i,
> + &val);
> + ptr32 = (u32 *) &recv_rate[i];
> + *ptr32 = cpu_to_be32(val);
drivers/misc/ocxl/config.c:633:24: warning: incorrect type in assignment
(different base types)
drivers/misc/ocxl/config.c:633:24: expected unsigned int [unsigned]
[usertype] <noident>
drivers/misc/ocxl/config.c:633:24: got restricted __be32 [usertype]
<noident>
> + }
> + pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP, &val);
> + recv_cap = (long) val << 32;
> + pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP + 4, &val);
> + recv_cap |= val;
> +
> + rc = pnv_ocxl_set_tl_conf(dev, recv_cap, __pa(recv_rate),
> + PNV_OCXL_TL_RATE_BUF_SIZE);
> + if (rc)
> + goto out;
> +
> + /*
> + * Opencapi commands needing to be retried are classified per
> + * the TL in 2 groups: short and long commands.
> + *
> + * The short back off timer it not used for now. It will be
> + * for opencapi 4.0.
> + *
> + * The long back off timer is typically used when an AFU hits
> + * a page fault but the NPU is already processing one. So the
> + * AFU needs to wait before it can resubmit. Having a value
> + * too low doesn't break anything, but can generate extra
> + * traffic on the link.
> + * We set it to 1.6 us for now. It's shorter than, but in the
> + * same order of magnitude as the time spent to process a page
> + * fault.
> + */
> + timers = 0x2 << 4; /* long timer = 1.6 us */
> + pci_write_config_byte(dev, tl_dvsec + OCXL_DVSEC_TL_BACKOFF_TIMERS,
> + timers);
> +
> + rc = 0;
> +out:
> + kfree(recv_rate);
> + return rc;
> +}
> +
> +int ocxl_config_terminate_pasid(struct pci_dev *dev, int afu_control, int pasid)
> +{
> + u32 val;
> + unsigned long timeout;
> +
> + pci_read_config_dword(dev, afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID,
> + &val);
> + if (EXTRACT_BIT(val, 20)) {
> + dev_err(&dev->dev,
> + "Can't terminate PASID %#x, previous termination didn't complete\n",
> + pasid);
> + return -EBUSY;
> + }
> +
> + val &= ~OCXL_DVSEC_PASID_MASK;
> + val |= pasid & OCXL_DVSEC_PASID_MASK;
> + val |= BIT(20);
> + pci_write_config_dword(dev,
> + afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID,
> + val);
> +
> + timeout = jiffies + (HZ * OCXL_CFG_TIMEOUT);
> + pci_read_config_dword(dev, afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID,
> + &val);
> + while (EXTRACT_BIT(val, 20)) {
> + if (time_after_eq(jiffies, timeout)) {
> + dev_err(&dev->dev,
> + "Timeout while waiting for AFU to terminate PASID %#x\n",
> + pasid);
> + return -EBUSY;
> + }
> + cpu_relax();
> + pci_read_config_dword(dev,
> + afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID,
> + &val);
> + }
> + return 0;
> +}
> +
> +void ocxl_config_set_actag(struct pci_dev *dev, int func_dvsec, u32 tag_first,
> + u32 tag_count)
> +{
> + u32 val;
> +
> + val = (tag_first & OCXL_DVSEC_ACTAG_MASK) << 16;
> + val |= tag_count & OCXL_DVSEC_ACTAG_MASK;
> + pci_write_config_dword(dev, func_dvsec + OCXL_DVSEC_FUNC_OFF_ACTAG,
> + val);
> +}
> diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c
> new file mode 100644
> index 000000000000..0bc0dd97d784
> --- /dev/null
> +++ b/drivers/misc/ocxl/context.c
> @@ -0,0 +1,237 @@
> +/*
> + * Copyright 2017 IBM Corp.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +
> +#include <linux/sched/mm.h>
> +#include "ocxl_internal.h"
> +
> +struct ocxl_context *ocxl_context_alloc(void)
> +{
> + return kzalloc(sizeof(struct ocxl_context), GFP_KERNEL);
> +}
> +
> +int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu,
> + struct address_space *mapping)
> +{
> + int pasid;
> +
> + ctx->afu = afu;
> + mutex_lock(&afu->contexts_lock);
> + pasid = idr_alloc(&afu->contexts_idr, ctx, afu->pasid_base,
> + afu->pasid_base + afu->pasid_max, GFP_KERNEL);
> + if (pasid < 0) {
> + mutex_unlock(&afu->contexts_lock);
> + return pasid;
> + }
> + afu->pasid_count++;
> + mutex_unlock(&afu->contexts_lock);
> +
> + ctx->pasid = pasid;
> + ctx->status = OPENED;
> + mutex_init(&ctx->status_mutex);
> + ctx->mapping = mapping;
> + mutex_init(&ctx->mapping_lock);
> + init_waitqueue_head(&ctx->events_wq);
> + mutex_init(&ctx->xsl_error_lock);
> + /*
> + * Keep a reference on the AFU to make sure it's valid for the
> + * duration of the life of the context
> + */
> + ocxl_afu_get(afu);
> + return 0;
> +}
> +
> +/*
> + * Callback for when a translation fault triggers an error
> + * data: a pointer to the context which triggered the fault
> + * addr: the address that triggered the error
> + * dsisr: the value of the PPC64 dsisr register
> + */
> +static void xsl_fault_error(void *data, u64 addr, u64 dsisr)
> +{
> + struct ocxl_context *ctx = (struct ocxl_context *) data;
> +
> + mutex_lock(&ctx->xsl_error_lock);
> + ctx->xsl_error.addr = addr;
> + ctx->xsl_error.dsisr = dsisr;
> + ctx->xsl_error.count++;
> + mutex_unlock(&ctx->xsl_error_lock);
> +
> + wake_up_all(&ctx->events_wq);
> +}
> +
> +int ocxl_context_attach(struct ocxl_context *ctx, u64 amr)
> +{
> + int rc;
> +
> + mutex_lock(&ctx->status_mutex);
> + if (ctx->status != OPENED) {
> + rc = -EIO;
> + goto out;
> + }
> +
> + rc = ocxl_link_add_pe(ctx->afu->fn->link, ctx->pasid,
> + current->mm->context.id, 0, amr, current->mm,
> + xsl_fault_error, ctx);
> + if (rc)
> + goto out;
> +
> + ctx->status = ATTACHED;
> +out:
> + mutex_unlock(&ctx->status_mutex);
> + return rc;
> +}
> +
> +static int map_pp_mmio(struct vm_area_struct *vma, unsigned long address,
> + u64 offset, struct ocxl_context *ctx)
> +{
> + u64 pp_mmio_addr;
> + int pasid_off;
> +
> + if (offset >= ctx->afu->config.pp_mmio_stride)
> + return VM_FAULT_SIGBUS;
> +
> + mutex_lock(&ctx->status_mutex);
> + if (ctx->status != ATTACHED) {
> + mutex_unlock(&ctx->status_mutex);
> + pr_debug("%s: Context not attached, failing mmio mmap\n",
> + __func__);
> + return VM_FAULT_SIGBUS;
> + }
> +
> + pasid_off = ctx->pasid - ctx->afu->pasid_base;
> + pp_mmio_addr = ctx->afu->pp_mmio_start +
> + pasid_off * ctx->afu->config.pp_mmio_stride +
> + offset;
> +
> + vm_insert_pfn(vma, address, pp_mmio_addr >> PAGE_SHIFT);
> + mutex_unlock(&ctx->status_mutex);
> + return VM_FAULT_NOPAGE;
> +}
> +
> +static int ocxl_mmap_fault(struct vm_fault *vmf)
> +{
> + struct vm_area_struct *vma = vmf->vma;
> + struct ocxl_context *ctx = vma->vm_file->private_data;
> + u64 offset;
> + int rc;
> +
> + offset = vmf->pgoff << PAGE_SHIFT;
> + pr_debug("%s: pasid %d address 0x%lx offset 0x%llx\n", __func__,
> + ctx->pasid, vmf->address, offset);
> +
> + rc = map_pp_mmio(vma, vmf->address, offset, ctx);
> + return rc;
> +}
> +
> +static const struct vm_operations_struct ocxl_vmops = {
> + .fault = ocxl_mmap_fault,
> +};
> +
> +static int check_mmap_mmio(struct ocxl_context *ctx,
> + struct vm_area_struct *vma)
> +{
> + if ((vma_pages(vma) + vma->vm_pgoff) >
> + (ctx->afu->config.pp_mmio_stride >> PAGE_SHIFT))
> + return -EINVAL;
> + return 0;
> +}
> +
> +int ocxl_context_mmap(struct ocxl_context *ctx, struct vm_area_struct *vma)
> +{
> + int rc;
> +
> + rc = check_mmap_mmio(ctx, vma);
> + if (rc)
> + return rc;
> +
> + vma->vm_flags |= VM_IO | VM_PFNMAP;
> + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
> + vma->vm_ops = &ocxl_vmops;
> + return 0;
> +}
> +
> +int ocxl_context_detach(struct ocxl_context *ctx)
> +{
> + struct pci_dev *dev;
> + int afu_control_pos;
> + enum ocxl_context_status status;
> + int rc;
> +
> + mutex_lock(&ctx->status_mutex);
> + status = ctx->status;
> + ctx->status = CLOSED;
> + mutex_unlock(&ctx->status_mutex);
> + if (status != ATTACHED)
> + return 0;
> +
> + dev = to_pci_dev(ctx->afu->fn->dev.parent);
> + afu_control_pos = ctx->afu->config.dvsec_afu_control_pos;
> +
> + mutex_lock(&ctx->afu->afu_control_lock);
> + rc = ocxl_config_terminate_pasid(dev, afu_control_pos, ctx->pasid);
> + mutex_unlock(&ctx->afu->afu_control_lock);
> + if (rc) {
> + /*
> + * If we timeout waiting for the AFU to terminate the
> + * pasid, then it's dangerous to clean up the Process
> + * Element entry in the SPA, as it may be referenced
> + * in the future by the AFU. In which case, we would
> + * checkstop because of an invalid PE access (FIR
> + * register 2, bit 42). So leave the PE
> + * defined. Caller shouldn't free the context so that
> + * PASID remains allocated.
> + *
> + * A link reset will be required to cleanup the AFU
> + * and the SPA.
> + */
> + if (rc == -EBUSY)
> + return rc;
> + }
> + rc = ocxl_link_remove_pe(ctx->afu->fn->link, ctx->pasid);
> + if (rc) {
> + dev_warn(&ctx->afu->dev,
> + "Couldn't remove PE entry cleanly: %d\n", rc);
> + }
> + return 0;
> +}
> +
> +void ocxl_context_detach_all(struct ocxl_afu *afu)
> +{
> + struct ocxl_context *ctx;
> + int tmp;
> +
> + mutex_lock(&afu->contexts_lock);
> + idr_for_each_entry(&afu->contexts_idr, ctx, tmp) {
> + ocxl_context_detach(ctx);
> + /*
> + * We are force detaching - remove any active mmio
> + * mappings so userspace cannot interfere with the
> + * card if it comes back. Easiest way to exercise
> + * this is to unbind and rebind the driver via sysfs
> + * while it is in use.
> + */
> + mutex_lock(&ctx->mapping_lock);
> + if (ctx->mapping)
> + unmap_mapping_range(ctx->mapping, 0, 0, 1);
> + mutex_unlock(&ctx->mapping_lock);
> + }
> + mutex_unlock(&afu->contexts_lock);
> +}
> +
> +void ocxl_context_free(struct ocxl_context *ctx)
> +{
> + mutex_lock(&ctx->afu->contexts_lock);
> + ctx->afu->pasid_count--;
> + idr_remove(&ctx->afu->contexts_idr, ctx->pasid);
> + mutex_unlock(&ctx->afu->contexts_lock);
> +
> + /* reference to the AFU taken in ocxl_context_init */
> + ocxl_afu_put(ctx->afu);
> + kfree(ctx);
> +}
> diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c
> new file mode 100644
> index 000000000000..a51386eff4f5
> --- /dev/null
> +++ b/drivers/misc/ocxl/file.c
> @@ -0,0 +1,405 @@
> +/*
> + * Copyright 2017 IBM Corp.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +
> +#include <linux/fs.h>
> +#include <linux/poll.h>
> +#include <linux/sched/signal.h>
> +#include <linux/uaccess.h>
> +#include <uapi/misc/ocxl.h>
> +#include "ocxl_internal.h"
> +
> +
> +#define OCXL_NUM_MINORS 256 /* Total to reserve */
> +
> +static dev_t ocxl_dev;
> +static struct class *ocxl_class;
> +static struct mutex minors_idr_lock;
> +static struct idr minors_idr;
> +
> +static struct ocxl_afu *find_and_get_afu(dev_t devno)
> +{
> + struct ocxl_afu *afu;
> + int afu_minor;
> +
> + afu_minor = MINOR(devno);
> + /*
> + * We don't declare an RCU critical section here, as our AFU
> + * is protected by a reference counter on the device. By the time the
> + * minor number of a device is removed from the idr, the ref count of
> + * the device is already at 0, so no user API will access that AFU and
> + * this function can't return it.
> + */
> + afu = idr_find(&minors_idr, afu_minor);
> + if (afu)
> + ocxl_afu_get(afu);
> + return afu;
> +}
> +
> +static int allocate_afu_minor(struct ocxl_afu *afu)
> +{
> + int minor;
> +
> + mutex_lock(&minors_idr_lock);
> + minor = idr_alloc(&minors_idr, afu, 0, OCXL_NUM_MINORS, GFP_KERNEL);
> + mutex_unlock(&minors_idr_lock);
> + return minor;
> +}
> +
> +static void free_afu_minor(struct ocxl_afu *afu)
> +{
> + mutex_lock(&minors_idr_lock);
> + idr_remove(&minors_idr, MINOR(afu->dev.devt));
> + mutex_unlock(&minors_idr_lock);
> +}
> +
> +static int afu_open(struct inode *inode, struct file *file)
> +{
> + struct ocxl_afu *afu;
> + struct ocxl_context *ctx;
> + int rc;
> +
> + pr_debug("%s for device %x\n", __func__, inode->i_rdev);
> +
> + afu = find_and_get_afu(inode->i_rdev);
> + if (!afu)
> + return -ENODEV;
> +
> + ctx = ocxl_context_alloc();
> + if (!ctx) {
> + rc = -ENOMEM;
> + goto put_afu;
> + }
> +
> + rc = ocxl_context_init(ctx, afu, inode->i_mapping);
> + if (rc)
> + goto put_afu;
> + file->private_data = ctx;
> + ocxl_afu_put(afu);
> + return 0;
> +
> +put_afu:
> + ocxl_afu_put(afu);
> + return rc;
> +}
> +
> +static long afu_ioctl_attach(struct ocxl_context *ctx,
> + struct ocxl_ioctl_attach __user *uarg)
> +{
> + struct ocxl_ioctl_attach arg;
> + u64 amr = 0;
> + int rc;
> +
> + pr_debug("%s for context %d\n", __func__, ctx->pasid);
> +
> + if (copy_from_user(&arg, uarg, sizeof(arg)))
> + return -EFAULT;
> +
> + /* Make sure reserved fields are not set for forward compatibility */
> + if (arg.reserved1 || arg.reserved2 || arg.reserved3)
> + return -EINVAL;
> +
> + amr = arg.amr & mfspr(SPRN_UAMOR);
> + rc = ocxl_context_attach(ctx, amr);
> + return rc;
> +}
> +
> +#define CMD_STR(x) (x == OCXL_IOCTL_ATTACH ? "ATTACH" : \
> + "UNKNOWN")
> +
> +static long afu_ioctl(struct file *file, unsigned int cmd,
> + unsigned long args)
> +{
> + struct ocxl_context *ctx = file->private_data;
> + long rc;
> +
> + pr_debug("%s for context %d, command %s\n", __func__, ctx->pasid,
> + CMD_STR(cmd));
> +
> + if (ctx->status == CLOSED)
> + return -EIO;
> +
> + switch (cmd) {
> + case OCXL_IOCTL_ATTACH:
> + rc = afu_ioctl_attach(ctx,
> + (struct ocxl_ioctl_attach __user *) args);
> + break;
> +
> + default:
> + rc = -EINVAL;
> + }
> + return rc;
> +}
> +
> +static long afu_compat_ioctl(struct file *file, unsigned int cmd,
> + unsigned long args)
> +{
> + return afu_ioctl(file, cmd, args);
> +}
> +
> +static int afu_mmap(struct file *file, struct vm_area_struct *vma)
> +{
> + struct ocxl_context *ctx = file->private_data;
> +
> + pr_debug("%s for context %d\n", __func__, ctx->pasid);
> + return ocxl_context_mmap(ctx, vma);
> +}
> +
> +static bool has_xsl_error(struct ocxl_context *ctx)
> +{
> + bool ret;
> +
> + mutex_lock(&ctx->xsl_error_lock);
> + ret = !!ctx->xsl_error.addr;
> + mutex_unlock(&ctx->xsl_error_lock);
> +
> + return ret;
> +}
> +
> +/*
> + * Are there any events pending on the AFU
> + * ctx: The AFU context
> + * Returns: true if there are events pending
> + */
> +static bool afu_events_pending(struct ocxl_context *ctx)
> +{
> + if (has_xsl_error(ctx))
> + return true;
> + return false;
> +}
> +
> +static unsigned int afu_poll(struct file *file, struct poll_table_struct *wait)
> +{
> + struct ocxl_context *ctx = file->private_data;
> + unsigned int mask = 0;
> + bool closed;
> +
> + pr_debug("%s for context %d\n", __func__, ctx->pasid);
> +
> + poll_wait(file, &ctx->events_wq, wait);
> +
> + mutex_lock(&ctx->status_mutex);
> + closed = (ctx->status == CLOSED);
> + mutex_unlock(&ctx->status_mutex);
> +
> + if (afu_events_pending(ctx))
> + mask = POLLIN | POLLRDNORM;
> + else if (closed)
> + mask = POLLERR;
> +
> + return mask;
> +}
> +
> +/*
> + * Populate the supplied buffer with a single XSL error
> + * ctx: The AFU context to report the error from
> + * header: the event header to populate
> + * buf: The buffer to write the body into (should be at least
> + * AFU_EVENT_BODY_XSL_ERROR_SIZE)
> + * Return: the amount of buffer that was populated
> + */
> +static ssize_t append_xsl_error(struct ocxl_context *ctx,
> + struct ocxl_kernel_event_header *header,
> + char __user *buf)
> +{
> + struct ocxl_kernel_event_xsl_fault_error body;
> +
> + memset(&body, 0, sizeof(body));
> +
> + mutex_lock(&ctx->xsl_error_lock);
> + if (!ctx->xsl_error.addr) {
> + mutex_unlock(&ctx->xsl_error_lock);
> + return 0;
> + }
> +
> + body.addr = ctx->xsl_error.addr;
> + body.dsisr = ctx->xsl_error.dsisr;
> + body.count = ctx->xsl_error.count;
> +
> + ctx->xsl_error.addr = 0;
> + ctx->xsl_error.dsisr = 0;
> + ctx->xsl_error.count = 0;
> +
> + mutex_unlock(&ctx->xsl_error_lock);
> +
> + header->type = OCXL_AFU_EVENT_XSL_FAULT_ERROR;
> +
> + if (copy_to_user(buf, &body, sizeof(body)))
> + return -EFAULT;
> +
> + return sizeof(body);
> +}
> +
> +#define AFU_EVENT_BODY_MAX_SIZE sizeof(struct ocxl_kernel_event_xsl_fault_error)
> +
> +/*
> + * Reports events on the AFU
> + * Format:
> + * Header (struct ocxl_kernel_event_header)
> + * Body (struct ocxl_kernel_event_*)
> + * Header...
> + */
> +static ssize_t afu_read(struct file *file, char __user *buf, size_t count,
> + loff_t *off)
> +{
> + struct ocxl_context *ctx = file->private_data;
> + struct ocxl_kernel_event_header header;
> + ssize_t rc;
> + size_t used = 0;
> + DEFINE_WAIT(event_wait);
> +
> + memset(&header, 0, sizeof(header));
> +
> + /* Require offset to be 0 */
> + if (*off != 0)
> + return -EINVAL;
> +
> + if (count < (sizeof(struct ocxl_kernel_event_header) +
> + AFU_EVENT_BODY_MAX_SIZE))
> + return -EINVAL;
> +
> + for (;;) {
> + prepare_to_wait(&ctx->events_wq, &event_wait,
> + TASK_INTERRUPTIBLE);
> +
> + if (afu_events_pending(ctx))
> + break;
> +
> + if (ctx->status == CLOSED)
> + break;
> +
> + if (file->f_flags & O_NONBLOCK) {
> + finish_wait(&ctx->events_wq, &event_wait);
> + return -EAGAIN;
> + }
> +
> + if (signal_pending(current)) {
> + finish_wait(&ctx->events_wq, &event_wait);
> + return -ERESTARTSYS;
> + }
> +
> + schedule();
> + }
> +
> + finish_wait(&ctx->events_wq, &event_wait);
> +
> + if (has_xsl_error(ctx)) {
> + used = append_xsl_error(ctx, &header, buf + sizeof(header));
> + if (used < 0)
> + return used;
> + }
> +
> + if (!afu_events_pending(ctx))
> + header.flags |= OCXL_KERNEL_EVENT_FLAG_LAST;
> +
> + if (copy_to_user(buf, &header, sizeof(header)))
> + return -EFAULT;
> +
> + used += sizeof(header);
> +
> + rc = (ssize_t) used;
> + return rc;
> +}
> +
> +static int afu_release(struct inode *inode, struct file *file)
> +{
> + struct ocxl_context *ctx = file->private_data;
> + int rc;
> +
> + pr_debug("%s for device %x\n", __func__, inode->i_rdev);
> + rc = ocxl_context_detach(ctx);
> + mutex_lock(&ctx->mapping_lock);
> + ctx->mapping = NULL;
> + mutex_unlock(&ctx->mapping_lock);
> + wake_up_all(&ctx->events_wq);
> + if (rc != -EBUSY)
> + ocxl_context_free(ctx);
> + return 0;
> +}
> +
> +static const struct file_operations ocxl_afu_fops = {
> + .owner = THIS_MODULE,
> + .open = afu_open,
> + .unlocked_ioctl = afu_ioctl,
> + .compat_ioctl = afu_compat_ioctl,
> + .mmap = afu_mmap,
> + .poll = afu_poll,
> + .read = afu_read,
> + .release = afu_release,
> +};
> +
> +int ocxl_create_cdev(struct ocxl_afu *afu)
> +{
> + int rc;
> +
> + cdev_init(&afu->cdev, &ocxl_afu_fops);
> + rc = cdev_add(&afu->cdev, afu->dev.devt, 1);
> + if (rc) {
> + dev_err(&afu->dev, "Unable to add afu char device: %d\n", rc);
> + return rc;
> + }
> + return 0;
> +}
> +
> +void ocxl_destroy_cdev(struct ocxl_afu *afu)
> +{
> + cdev_del(&afu->cdev);
> +}
> +
> +int ocxl_register_afu(struct ocxl_afu *afu)
> +{
> + int minor;
> +
> + minor = allocate_afu_minor(afu);
> + if (minor < 0)
> + return minor;
> + afu->dev.devt = MKDEV(MAJOR(ocxl_dev), minor);
> + afu->dev.class = ocxl_class;
> + return device_register(&afu->dev);
> +}
> +
> +void ocxl_unregister_afu(struct ocxl_afu *afu)
> +{
> + free_afu_minor(afu);
> +}
> +
> +static char *ocxl_devnode(struct device *dev, umode_t *mode)
> +{
> + return kasprintf(GFP_KERNEL, "ocxl/%s", dev_name(dev));
> +}
> +
> +int ocxl_file_init(void)
> +{
> + int rc;
> +
> + mutex_init(&minors_idr_lock);
> + idr_init(&minors_idr);
> +
> + rc = alloc_chrdev_region(&ocxl_dev, 0, OCXL_NUM_MINORS, "ocxl");
> + if (rc) {
> + pr_err("Unable to allocate ocxl major number: %d\n", rc);
> + return rc;
> + }
> +
> + ocxl_class = class_create(THIS_MODULE, "ocxl");
> + if (IS_ERR(ocxl_class)) {
> + pr_err("Unable to create ocxl class\n");
> + unregister_chrdev_region(ocxl_dev, OCXL_NUM_MINORS);
> + return PTR_ERR(ocxl_class);
> + }
> +
> + ocxl_class->devnode = ocxl_devnode;
> + return 0;
> +}
> +
> +void ocxl_file_exit(void)
> +{
> + class_destroy(ocxl_class);
> + unregister_chrdev_region(ocxl_dev, OCXL_NUM_MINORS);
> + idr_destroy(&minors_idr);
> +}
> diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c
> new file mode 100644
> index 000000000000..6b184cd7d2a6
> --- /dev/null
> +++ b/drivers/misc/ocxl/link.c
> @@ -0,0 +1,610 @@
> +/*
> + * Copyright 2017 IBM Corp.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +
> +#include <linux/sched/mm.h>
> +#include <linux/mutex.h>
> +#include <linux/mmu_context.h>
> +#include <asm/copro.h>
> +#include <asm/pnv-ocxl.h>
> +#include "ocxl_internal.h"
> +
> +
> +#define SPA_PASID_BITS 15
> +#define SPA_PASID_MAX ((1 << SPA_PASID_BITS) - 1)
> +#define SPA_PE_MASK SPA_PASID_MAX
> +#define SPA_SPA_SIZE_LOG 22 /* Each SPA is 4 Mb */
> +
> +#define SPA_CFG_SF (1ull << (63-0))
> +#define SPA_CFG_TA (1ull << (63-1))
> +#define SPA_CFG_HV (1ull << (63-3))
> +#define SPA_CFG_UV (1ull << (63-4))
> +#define SPA_CFG_XLAT_hpt (0ull << (63-6)) /* Hashed page table (HPT) mode */
> +#define SPA_CFG_XLAT_roh (2ull << (63-6)) /* Radix on HPT mode */
> +#define SPA_CFG_XLAT_ror (3ull << (63-6)) /* Radix on Radix mode */
> +#define SPA_CFG_PR (1ull << (63-49))
> +#define SPA_CFG_TC (1ull << (63-54))
> +#define SPA_CFG_DR (1ull << (63-59))
> +
> +#define SPA_XSL_TF (1ull << (63-3)) /* Translation fault */
> +#define SPA_XSL_S (1ull << (63-38)) /* Store operation */
> +
> +#define SPA_PE_VALID 0x80000000
> +
> +
> +struct pe_data {
> + struct mm_struct *mm;
> + /* callback to trigger when a translation fault occurs */
> + void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr);
> + /* opaque pointer to be passed to the above callback */
> + void *xsl_err_data;
> + struct rcu_head rcu;
> +};
> +
> +struct spa {
> + struct ocxl_process_element *spa_mem;
> + int spa_order;
> + struct mutex spa_lock;
> + struct radix_tree_root pe_tree; /* Maps PE handles to pe_data */
> + char *irq_name;
> + int virq;
> + void __iomem *reg_dsisr;
> + void __iomem *reg_dar;
> + void __iomem *reg_tfc;
> + void __iomem *reg_pe_handle;
> + /*
> + * The following field are used by the memory fault
> + * interrupt handler. We can only have one interrupt at a
> + * time. The NPU won't raise another interrupt until the
> + * previous one has been ack'd by writing to the TFC register
> + */
> + struct xsl_fault {
> + struct work_struct fault_work;
> + u64 pe;
> + u64 dsisr;
> + u64 dar;
> + struct pe_data pe_data;
> + } xsl_fault;
> +};
> +
> +/*
> + * A opencapi link can be used be by several PCI functions. We have
> + * one link per device slot.
> + *
> + * A linked list of opencapi links should suffice, as there's a
> + * limited number of opencapi slots on a system and lookup is only
> + * done when the device is probed
> + */
> +struct link {
> + struct list_head list;
> + struct kref ref;
> + int domain;
> + int bus;
> + int dev;
> + atomic_t irq_available;
> + struct spa *spa;
> + void *platform_data;
> +};
> +static struct list_head links_list = LIST_HEAD_INIT(links_list);
> +static DEFINE_MUTEX(links_list_lock);
> +
> +enum xsl_response {
> + CONTINUE,
> + ADDRESS_ERROR,
> + RESTART,
> +};
> +
> +
> +static void read_irq(struct spa *spa, u64 *dsisr, u64 *dar, u64 *pe)
> +{
> + u64 reg;
> +
> + *dsisr = in_be64(spa->reg_dsisr);
> + *dar = in_be64(spa->reg_dar);
> + reg = in_be64(spa->reg_pe_handle);
> + *pe = reg & SPA_PE_MASK;
> +}
> +
> +static void ack_irq(struct spa *spa, enum xsl_response r)
> +{
> + u64 reg = 0;
> +
> + /* continue is not supported */
> + if (r == RESTART)
> + reg = PPC_BIT(31);
> + else if (r == ADDRESS_ERROR)
> + reg = PPC_BIT(30);
> + else
> + WARN(1, "Invalid irq response %d\n", r);
> +
> + if (reg)
> + out_be64(spa->reg_tfc, reg);
> +}
> +
> +static void xsl_fault_handler_bh(struct work_struct *fault_work)
> +{
> + unsigned int flt = 0;
> + unsigned long access, flags, inv_flags = 0;
> + enum xsl_response r;
> + struct xsl_fault *fault = container_of(fault_work, struct xsl_fault,
> + fault_work);
> + struct spa *spa = container_of(fault, struct spa, xsl_fault);
> +
> + int rc;
> +
> + /*
> + * We need to release a reference on the mm whenever exiting this
> + * function (taken in the memory fault interrupt handler)
> + */
> + rc = copro_handle_mm_fault(fault->pe_data.mm, fault->dar, fault->dsisr,
> + &flt);
> + if (rc) {
> + pr_debug("copro_handle_mm_fault failed: %d\n", rc);
> + if (fault->pe_data.xsl_err_cb) {
> + fault->pe_data.xsl_err_cb(
> + fault->pe_data.xsl_err_data,
> + fault->dar, fault->dsisr);
> + }
> + r = ADDRESS_ERROR;
> + goto ack;
> + }
> +
> + if (!radix_enabled()) {
> + /*
> + * update_mmu_cache() will not have loaded the hash
> + * since current->trap is not a 0x400 or 0x300, so
> + * just call hash_page_mm() here.
> + */
> + access = _PAGE_PRESENT | _PAGE_READ;
> + if (fault->dsisr & SPA_XSL_S)
> + access |= _PAGE_WRITE;
> +
> + if (REGION_ID(fault->dar) != USER_REGION_ID)
> + access |= _PAGE_PRIVILEGED;
> +
> + local_irq_save(flags);
> + hash_page_mm(fault->pe_data.mm, fault->dar, access, 0x300,
> + inv_flags);
> + local_irq_restore(flags);
> + }
> + r = RESTART;
> +ack:
> + mmdrop(fault->pe_data.mm);
> + ack_irq(spa, r);
> +}
> +
> +static irqreturn_t xsl_fault_handler(int irq, void *data)
> +{
> + struct link *link = (struct link *) data;
> + struct spa *spa = link->spa;
> + u64 dsisr, dar, pe_handle;
> + struct pe_data *pe_data;
> + struct ocxl_process_element *pe;
> + int lpid, pid, tid;
> +
> + read_irq(spa, &dsisr, &dar, &pe_handle);
> +
> + WARN_ON(pe_handle > SPA_PE_MASK);
> + pe = spa->spa_mem + pe_handle;
> + lpid = be32_to_cpu(pe->lpid);
> + pid = be32_to_cpu(pe->pid);
> + tid = be32_to_cpu(pe->tid);
drivers/misc/ocxl/link.c:193:16: warning: cast to restricted __be32
drivers/misc/ocxl/link.c:194:15: warning: cast to restricted __be32
drivers/misc/ocxl/link.c:195:15: warning: cast to restricted __be32
> + /* We could be reading all null values here if the PE is being
> + * removed while an interrupt kicks in. It's not supposed to
> + * happen if the driver notified the AFU to terminate the
> + * PASID, and the AFU waited for pending operations before
> + * acknowledging. But even if it happens, we won't find a
> + * memory context below and fail silently, so it should be ok.
> + */
> + if (!(dsisr & SPA_XSL_TF)) {
> + WARN(1, "Invalid xsl interrupt fault register %#llx\n", dsisr);
> + ack_irq(spa, ADDRESS_ERROR);
> + return IRQ_HANDLED;
> + }
> +
> + rcu_read_lock();
> + pe_data = radix_tree_lookup(&spa->pe_tree, pe_handle);
> + if (!pe_data) {
> + /*
> + * Could only happen if the driver didn't notify the
> + * AFU about PASID termination before removing the PE,
> + * or the AFU didn't wait for all memory access to
> + * have completed.
> + *
> + * Either way, we fail early, but we shouldn't log an
> + * error message, as it is a valid (if unexpected)
> + * scenario
> + */
> + rcu_read_unlock();
> + pr_debug("Unknown mm context for xsl interrupt\n");
> + ack_irq(spa, ADDRESS_ERROR);
> + return IRQ_HANDLED;
> + }
> + WARN_ON(pe_data->mm->context.id != pid);
> +
> + spa->xsl_fault.pe = pe_handle;
> + spa->xsl_fault.dar = dar;
> + spa->xsl_fault.dsisr = dsisr;
> + spa->xsl_fault.pe_data = *pe_data;
> + mmgrab(pe_data->mm); /* mm count is released by bottom half */
> +
> + rcu_read_unlock();
> + schedule_work(&spa->xsl_fault.fault_work);
> + return IRQ_HANDLED;
> +}
> +
> +static void unmap_irq_registers(struct spa *spa)
> +{
> + pnv_ocxl_unmap_xsl_regs(spa->reg_dsisr, spa->reg_dar, spa->reg_tfc,
> + spa->reg_pe_handle);
> +}
> +
> +static int map_irq_registers(struct pci_dev *dev, struct spa *spa)
> +{
> + return pnv_ocxl_map_xsl_regs(dev, &spa->reg_dsisr, &spa->reg_dar,
> + &spa->reg_tfc, &spa->reg_pe_handle);
> +}
> +
> +static int setup_xsl_irq(struct pci_dev *dev, struct link *link)
> +{
> + struct spa *spa = link->spa;
> + int rc;
> + int hwirq;
> +
> + rc = pnv_ocxl_get_xsl_irq(dev, &hwirq);
> + if (rc)
> + return rc;
> +
> + rc = map_irq_registers(dev, spa);
> + if (rc)
> + return rc;
> +
> + spa->irq_name = kasprintf(GFP_KERNEL, "ocxl-xsl-%x-%x-%x",
> + link->domain, link->bus, link->dev);
> + if (!spa->irq_name) {
> + unmap_irq_registers(spa);
> + dev_err(&dev->dev, "Can't allocate name for xsl interrupt\n");
> + return -ENOMEM;
> + }
> + /*
> + * At some point, we'll need to look into allowing a higher
> + * number of interrupts. Could we have an IRQ domain per link?
> + */
> + spa->virq = irq_create_mapping(NULL, hwirq);
> + if (!spa->virq) {
> + kfree(spa->irq_name);
> + unmap_irq_registers(spa);
> + dev_err(&dev->dev,
> + "irq_create_mapping failed for translation interrupt\n");
> + return -EINVAL;
> + }
> +
> + dev_dbg(&dev->dev, "hwirq %d mapped to virq %d\n", hwirq, spa->virq);
> +
> + rc = request_irq(spa->virq, xsl_fault_handler, 0, spa->irq_name,
> + link);
> + if (rc) {
> + irq_dispose_mapping(spa->virq);
> + kfree(spa->irq_name);
> + unmap_irq_registers(spa);
> + dev_err(&dev->dev,
> + "request_irq failed for translation interrupt: %d\n",
> + rc);
> + return -EINVAL;
> + }
> + return 0;
> +}
> +
> +static void release_xsl_irq(struct link *link)
> +{
> + struct spa *spa = link->spa;
> +
> + if (spa->virq) {
> + free_irq(spa->virq, link);
> + irq_dispose_mapping(spa->virq);
> + }
> + kfree(spa->irq_name);
> + unmap_irq_registers(spa);
> +}
> +
> +static int alloc_spa(struct pci_dev *dev, struct link *link)
> +{
> + struct spa *spa;
> +
> + spa = kzalloc(sizeof(struct spa), GFP_KERNEL);
> + if (!spa)
> + return -ENOMEM;
> +
> + mutex_init(&spa->spa_lock);
> + INIT_RADIX_TREE(&spa->pe_tree, GFP_KERNEL);
> + INIT_WORK(&spa->xsl_fault.fault_work, xsl_fault_handler_bh);
> +
> + spa->spa_order = SPA_SPA_SIZE_LOG - PAGE_SHIFT;
> + spa->spa_mem = (struct ocxl_process_element *)
> + __get_free_pages(GFP_KERNEL | __GFP_ZERO, spa->spa_order);
> + if (!spa->spa_mem) {
> + dev_err(&dev->dev, "Can't allocate Shared Process Area\n");
> + kfree(spa);
> + return -ENOMEM;
> + }
> + pr_debug("Allocated SPA for %x:%x:%x at %p\n", link->domain, link->bus,
> + link->dev, spa->spa_mem);
> +
> + link->spa = spa;
> + return 0;
> +}
> +
> +static void free_spa(struct link *link)
> +{
> + struct spa *spa = link->spa;
> +
> + pr_debug("Freeing SPA for %x:%x:%x\n", link->domain, link->bus,
> + link->dev);
> +
> + if (spa && spa->spa_mem) {
> + free_pages((unsigned long) spa->spa_mem, spa->spa_order);
> + kfree(spa);
> + link->spa = NULL;
> + }
> +}
> +
> +static int alloc_link(struct pci_dev *dev, int PE_mask, struct link **out_link)
> +{
> + struct link *link;
> + int rc;
> +
> + link = kzalloc(sizeof(struct link), GFP_KERNEL);
> + if (!link)
> + return -ENOMEM;
> +
> + kref_init(&link->ref);
> + link->domain = pci_domain_nr(dev->bus);
> + link->bus = dev->bus->number;
> + link->dev = PCI_SLOT(dev->devfn);
> + atomic_set(&link->irq_available, MAX_IRQ_PER_LINK);
> +
> + rc = alloc_spa(dev, link);
> + if (rc)
> + goto err_free;
> +
> + rc = setup_xsl_irq(dev, link);
> + if (rc)
> + goto err_spa;
> +
> + /* platform specific hook */
> + rc = pnv_ocxl_spa_setup(dev, link->spa->spa_mem, PE_mask,
> + &link->platform_data);
> + if (rc)
> + goto err_xsl_irq;
> +
> + *out_link = link;
> + return 0;
> +
> +err_xsl_irq:
> + release_xsl_irq(link);
> +err_spa:
> + free_spa(link);
> +err_free:
> + kfree(link);
> + return rc;
> +}
> +
> +static void free_link(struct link *link)
> +{
> + release_xsl_irq(link);
> + free_spa(link);
> + kfree(link);
> +}
> +
> +int ocxl_link_setup(struct pci_dev *dev, int PE_mask, void **link_handle)
> +{
> + int rc = 0;
> + struct link *link;
> +
> + mutex_lock(&links_list_lock);
> + list_for_each_entry(link, &links_list, list) {
> + /* The functions of a device all share the same link */
> + if (link->domain == pci_domain_nr(dev->bus) &&
> + link->bus == dev->bus->number &&
> + link->dev == PCI_SLOT(dev->devfn)) {
> + kref_get(&link->ref);
> + *link_handle = link;
> + goto unlock;
> + }
> + }
> + rc = alloc_link(dev, PE_mask, &link);
> + if (rc)
> + goto unlock;
> +
> + list_add(&link->list, &links_list);
> + *link_handle = link;
> +unlock:
> + mutex_unlock(&links_list_lock);
> + return rc;
> +}
> +
> +static void release_xsl(struct kref *ref)
> +{
> + struct link *link = container_of(ref, struct link, ref);
> +
> + list_del(&link->list);
> + /* call platform code before releasing data */
> + pnv_ocxl_spa_release(link->platform_data);
> + free_link(link);
> +}
> +
> +void ocxl_link_release(struct pci_dev *dev, void *link_handle)
> +{
> + struct link *link = (struct link *) link_handle;
> +
> + mutex_lock(&links_list_lock);
> + kref_put(&link->ref, release_xsl);
> + mutex_unlock(&links_list_lock);
> +}
> +
> +static u64 calculate_cfg_state(bool kernel)
> +{
> + u64 state;
> +
> + state = SPA_CFG_DR;
> + if (mfspr(SPRN_LPCR) & LPCR_TC)
> + state |= SPA_CFG_TC;
> + if (radix_enabled())
> + state |= SPA_CFG_XLAT_ror;
> + else
> + state |= SPA_CFG_XLAT_hpt;
> + state |= SPA_CFG_HV;
> + if (kernel) {
> + if (mfmsr() & MSR_SF)
> + state |= SPA_CFG_SF;
> + } else {
> + state |= SPA_CFG_PR;
> + if (!test_tsk_thread_flag(current, TIF_32BIT))
> + state |= SPA_CFG_SF;
> + }
> + return state;
> +}
> +
> +int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
> + u64 amr, struct mm_struct *mm,
> + void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr),
> + void *xsl_err_data)
> +{
> + struct link *link = (struct link *) link_handle;
> + struct spa *spa = link->spa;
> + struct ocxl_process_element *pe;
> + int pe_handle, rc = 0;
> + struct pe_data *pe_data;
> +
> + BUILD_BUG_ON(sizeof(struct ocxl_process_element) != 128);
> + if (pasid > SPA_PASID_MAX)
> + return -EINVAL;
> +
> + mutex_lock(&spa->spa_lock);
> + pe_handle = pasid & SPA_PE_MASK;
> + pe = spa->spa_mem + pe_handle;
> +
> + if (pe->software_state) {
> + rc = -EBUSY;
> + goto unlock;
> + }
> +
> + pe_data = kmalloc(sizeof(*pe_data), GFP_KERNEL);
> + if (!pe_data) {
> + rc = -ENOMEM;
> + goto unlock;
> + }
> +
> + pe_data->mm = mm;
> + pe_data->xsl_err_cb = xsl_err_cb;
> + pe_data->xsl_err_data = xsl_err_data;
> +
> + memset(pe, 0, sizeof(struct ocxl_process_element));
> + pe->config_state = cpu_to_be64(calculate_cfg_state(pidr == 0));
> + pe->lpid = cpu_to_be32(mfspr(SPRN_LPID));
> + pe->pid = cpu_to_be32(pidr);
> + pe->tid = cpu_to_be32(tidr);
> + pe->amr = cpu_to_be64(amr);
> + pe->software_state = cpu_to_be32(SPA_PE_VALID);
drivers/misc/ocxl/link.c:509:26: warning: incorrect type in assignment
(different base types)
drivers/misc/ocxl/link.c:509:26: expected unsigned long long
[unsigned] [usertype] config_state
drivers/misc/ocxl/link.c:509:26: got restricted __be64 [usertype]
<noident>
drivers/misc/ocxl/link.c:510:18: warning: incorrect type in assignment
(different base types)
drivers/misc/ocxl/link.c:510:18: expected unsigned int [unsigned]
[usertype] lpid
drivers/misc/ocxl/link.c:510:18: got restricted __be32 [usertype]
<noident>
drivers/misc/ocxl/link.c:511:17: warning: incorrect type in assignment
(different base types)
drivers/misc/ocxl/link.c:511:17: expected unsigned int [unsigned]
[usertype] pid
drivers/misc/ocxl/link.c:511:17: got restricted __be32 [usertype]
<noident>
drivers/misc/ocxl/link.c:512:17: warning: incorrect type in assignment
(different base types)
drivers/misc/ocxl/link.c:512:17: expected unsigned int [unsigned]
[usertype] tid
drivers/misc/ocxl/link.c:512:17: got restricted __be32 [usertype]
<noident>
drivers/misc/ocxl/link.c:513:17: warning: incorrect type in assignment
(different base types)
drivers/misc/ocxl/link.c:513:17: expected unsigned long long
[unsigned] [usertype] amr
drivers/misc/ocxl/link.c:513:17: got restricted __be64 [usertype]
<noident>
drivers/misc/ocxl/link.c:514:28: warning: incorrect type in assignment
(different base types)
drivers/misc/ocxl/link.c:514:28: expected unsigned int [unsigned]
[usertype] software_state
drivers/misc/ocxl/link.c:514:28: got restricted __be32 [usertype]
<noident>
> +
> + mm_context_add_copro(mm);
> + /*
> + * Barrier is to make sure PE is visible in the SPA before it
> + * is used by the device. It also helps with the global TLBI
> + * invalidation
> + */
> + mb();
> + radix_tree_insert(&spa->pe_tree, pe_handle, pe_data);
> +
> + /*
> + * The mm must stay valid for as long as the device uses it. We
> + * lower the count when the context is removed from the SPA.
> + *
> + * We grab mm_count (and not mm_users), as we don't want to
> + * end up in a circular dependency if a process mmaps its
> + * mmio, therefore incrementing the file ref count when
> + * calling mmap(), and forgets to unmap before exiting. In
> + * that scenario, when the kernel handles the death of the
> + * process, the file is not cleaned because unmap was not
> + * called, and the mm wouldn't be freed because we would still
> + * have a reference on mm_users. Incrementing mm_count solves
> + * the problem.
> + */
> + mmgrab(mm);
> +unlock:
> + mutex_unlock(&spa->spa_lock);
> + return rc;
> +}
> +
> +int ocxl_link_remove_pe(void *link_handle, int pasid)
> +{
> + struct link *link = (struct link *) link_handle;
> + struct spa *spa = link->spa;
> + struct ocxl_process_element *pe;
> + struct pe_data *pe_data;
> + int pe_handle, rc;
> +
> + if (pasid > SPA_PASID_MAX)
> + return -EINVAL;
> +
> + /*
> + * About synchronization with our memory fault handler:
> + *
> + * Before removing the PE, the driver is supposed to have
> + * notified the AFU, which should have cleaned up and make
> + * sure the PASID is no longer in use, including pending
> + * interrupts. However, there's no way to be sure...
> + *
> + * We clear the PE and remove the context from our radix
> + * tree. From that point on, any new interrupt for that
> + * context will fail silently, which is ok. As mentioned
> + * above, that's not expected, but it could happen if the
> + * driver or AFU didn't do the right thing.
> + *
> + * There could still be a bottom half running, but we don't
> + * need to wait/flush, as it is managing a reference count on
> + * the mm it reads from the radix tree.
> + */
> + pe_handle = pasid & SPA_PE_MASK;
> + pe = spa->spa_mem + pe_handle;
> +
> + mutex_lock(&spa->spa_lock);
> +
> + if (!(pe->software_state & cpu_to_be32(SPA_PE_VALID))) {
drivers/misc/ocxl/link.c:581:36: warning: restricted __be32 degrades to
integer
> + rc = -EINVAL;
> + goto unlock;
> + }
> +
> + memset(pe, 0, sizeof(struct ocxl_process_element));
> + /*
> + * The barrier makes sure the PE is removed from the SPA
> + * before we clear the NPU context cache below, so that the
> + * old PE cannot be reloaded erroneously.
> + */
> + mb();
> +
> + /*
> + * hook to platform code
> + * On powerpc, the entry needs to be cleared from the context
> + * cache of the NPU.
> + */
> + rc = pnv_ocxl_spa_remove_pe(link->platform_data, pe_handle);
> + WARN_ON(rc);
> +
> + pe_data = radix_tree_delete(&spa->pe_tree, pe_handle);
> + if (!pe_data) {
> + WARN(1, "Couldn't find pe data when removing PE\n");
> + } else {
> + mm_context_remove_copro(pe_data->mm);
> + mmdrop(pe_data->mm);
> + kfree_rcu(pe_data, rcu);
> + }
> +unlock:
> + mutex_unlock(&spa->spa_lock);
> + return rc;
> +}
> diff --git a/drivers/misc/ocxl/main.c b/drivers/misc/ocxl/main.c
> new file mode 100644
> index 000000000000..be34b8fae97a
> --- /dev/null
> +++ b/drivers/misc/ocxl/main.c
> @@ -0,0 +1,40 @@
> +/*
> + * Copyright 2017 IBM Corp.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +
> +#include <linux/module.h>
> +#include <linux/pci.h>
> +#include "ocxl_internal.h"
> +
> +static int __init init_ocxl(void)
> +{
> + int rc = 0;
> +
> + rc = ocxl_file_init();
> + if (rc)
> + return rc;
> +
> + rc = pci_register_driver(&ocxl_pci_driver);
> + if (rc) {
> + ocxl_file_exit();
> + return rc;
> + }
> + return 0;
> +}
> +
> +static void exit_ocxl(void)
> +{
> + pci_unregister_driver(&ocxl_pci_driver);
> + ocxl_file_exit();
> +}
> +
> +module_init(init_ocxl);
> +module_exit(exit_ocxl);
> +
> +MODULE_DESCRIPTION("Open Coherent Accelerator");
> +MODULE_LICENSE("GPL");
> diff --git a/drivers/misc/ocxl/ocxl_internal.h b/drivers/misc/ocxl/ocxl_internal.h
> new file mode 100644
> index 000000000000..e07f7d523275
> --- /dev/null
> +++ b/drivers/misc/ocxl/ocxl_internal.h
> @@ -0,0 +1,200 @@
> +/*
> + * Copyright 2017 IBM Corp.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +
> +#ifndef _OCXL_INTERNAL_H_
> +#define _OCXL_INTERNAL_H_
> +
> +#include <linux/pci.h>
> +#include <linux/cdev.h>
> +#include <linux/list.h>
> +
> +#define OCXL_AFU_NAME_SZ (24+1) /* add 1 for NULL termination */
> +#define MAX_IRQ_PER_LINK 2000
> +#define MAX_IRQ_PER_CONTEXT MAX_IRQ_PER_LINK
> +
> +#define to_ocxl_function(d) container_of(d, struct ocxl_fn, dev)
> +#define to_ocxl_afu(d) container_of(d, struct ocxl_afu, dev)
> +
> +extern struct pci_driver ocxl_pci_driver;
> +
> +/*
> + * The following 2 structures are a fairly generic way of representing
> + * the configuration data for a function and AFU, as read from the
> + * configuration space.
> + */
> +struct ocxl_afu_config {
> + u8 idx;
> + int dvsec_afu_control_pos;
> + char name[OCXL_AFU_NAME_SZ];
> + u8 version_major;
> + u8 version_minor;
> + u8 afuc_type;
> + u8 afum_type;
> + u8 profile;
> + u8 global_mmio_bar;
> + u64 global_mmio_offset;
> + u32 global_mmio_size;
> + u8 pp_mmio_bar;
> + u64 pp_mmio_offset;
> + u32 pp_mmio_stride;
> + u8 log_mem_size;
> + u8 pasid_supported_log;
> + u16 actag_supported;
> +};
> +
> +struct ocxl_fn_config {
> + int dvsec_tl_pos;
> + int dvsec_function_pos;
> + int dvsec_afu_info_pos;
> + s8 max_pasid_log;
> + s8 max_afu_index;
> +};
> +
> +struct ocxl_fn {
> + struct device dev;
> + int bar_used[3];
> + struct ocxl_fn_config config;
> + struct list_head afu_list;
> + int pasid_base;
> + int actag_base;
> + int actag_enabled;
> + int actag_supported;
> + struct list_head pasid_list;
> + struct list_head actag_list;
> + void *link;
> +};
> +
> +struct ocxl_afu {
> + struct ocxl_fn *fn;
> + struct list_head list;
> + struct device dev;
> + struct cdev cdev;
> + struct ocxl_afu_config config;
> + int pasid_base;
> + int pasid_count; /* opened contexts */
> + int pasid_max; /* maximum number of contexts */
> + int actag_base;
> + int actag_enabled;
> + struct mutex contexts_lock;
> + struct idr contexts_idr;
> + struct mutex afu_control_lock;
> + u64 global_mmio_start;
> + u64 irq_base_offset;
> + void __iomem *global_mmio_ptr;
> + u64 pp_mmio_start;
> + struct bin_attribute attr_global_mmio;
> +};
> +
> +enum ocxl_context_status {
> + CLOSED,
> + OPENED,
> + ATTACHED,
> +};
> +
> +// Contains metadata about a translation fault
> +struct ocxl_xsl_error {
> + u64 addr; // The address that triggered the fault
> + u64 dsisr; // the value of the dsisr register
> + u64 count; // The number of times this fault has been triggered
> +};
> +
> +struct ocxl_context {
> + struct ocxl_afu *afu;
> + int pasid;
> + struct mutex status_mutex;
> + enum ocxl_context_status status;
> + struct address_space *mapping;
> + struct mutex mapping_lock;
> + wait_queue_head_t events_wq;
> + struct mutex xsl_error_lock;
> + struct ocxl_xsl_error xsl_error;
> + struct mutex irq_lock;
> + struct idr irq_idr;
> +};
> +
> +struct ocxl_process_element {
> + u64 config_state;
> + u32 reserved1[11];
> + u32 lpid;
> + u32 tid;
> + u32 pid;
> + u32 reserved2[10];
> + u64 amr;
> + u32 reserved3[3];
> + u32 software_state;
> +};
> +
> +
> +extern struct ocxl_afu *ocxl_afu_get(struct ocxl_afu *afu);
> +extern void ocxl_afu_put(struct ocxl_afu *afu);
> +
> +extern int ocxl_create_cdev(struct ocxl_afu *afu);
> +extern void ocxl_destroy_cdev(struct ocxl_afu *afu);
> +extern int ocxl_register_afu(struct ocxl_afu *afu);
> +extern void ocxl_unregister_afu(struct ocxl_afu *afu);
> +
> +extern int ocxl_file_init(void);
> +extern void ocxl_file_exit(void);
> +
> +extern int ocxl_config_read_function(struct pci_dev *dev,
> + struct ocxl_fn_config *fn);
> +
> +extern int ocxl_config_check_afu_index(struct pci_dev *dev,
> + struct ocxl_fn_config *fn, int afu_idx);
> +extern int ocxl_config_read_afu(struct pci_dev *dev,
> + struct ocxl_fn_config *fn,
> + struct ocxl_afu_config *afu,
> + u8 afu_idx);
> +extern int ocxl_config_get_pasid_info(struct pci_dev *dev, int *count);
> +extern void ocxl_config_set_afu_pasid(struct pci_dev *dev,
> + int afu_control,
> + int pasid_base, u32 pasid_count_log);
> +extern int ocxl_config_get_actag_info(struct pci_dev *dev,
> + u16 *base, u16 *enabled, u16 *supported);
> +extern void ocxl_config_set_actag(struct pci_dev *dev, int func_dvsec,
> + u32 tag_first, u32 tag_count);
> +extern void ocxl_config_set_afu_actag(struct pci_dev *dev, int afu_control,
> + int actag_base, int actag_count);
> +extern void ocxl_config_set_afu_state(struct pci_dev *dev, int afu_control,
> + int enable);
> +extern int ocxl_config_set_TL(struct pci_dev *dev, int tl_dvsec);
> +extern int ocxl_config_terminate_pasid(struct pci_dev *dev, int afu_control,
> + int pasid);
> +
> +extern int ocxl_link_setup(struct pci_dev *dev, int PE_mask,
> + void **link_handle);
> +extern void ocxl_link_release(struct pci_dev *dev, void *link_handle);
> +extern int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
> + u64 amr, struct mm_struct *mm,
> + void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr),
> + void *xsl_err_data);
> +extern int ocxl_link_remove_pe(void *link_handle, int pasid);
> +extern int ocxl_link_irq_alloc(void *link_handle, int *hw_irq,
> + u64 *addr);
> +extern void ocxl_link_free_irq(void *link_handle, int hw_irq);
> +
> +extern int ocxl_pasid_afu_alloc(struct ocxl_fn *fn, u32 size);
> +extern void ocxl_pasid_afu_free(struct ocxl_fn *fn, u32 start, u32 size);
> +extern int ocxl_actag_afu_alloc(struct ocxl_fn *fn, u32 size);
> +extern void ocxl_actag_afu_free(struct ocxl_fn *fn, u32 start, u32 size);
> +
> +extern struct ocxl_context *ocxl_context_alloc(void);
> +extern int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu,
> + struct address_space *mapping);
> +extern int ocxl_context_attach(struct ocxl_context *ctx, u64 amr);
> +extern int ocxl_context_mmap(struct ocxl_context *ctx,
> + struct vm_area_struct *vma);
> +extern int ocxl_context_detach(struct ocxl_context *ctx);
> +extern void ocxl_context_detach_all(struct ocxl_afu *afu);
> +extern void ocxl_context_free(struct ocxl_context *ctx);
> +
> +extern int ocxl_sysfs_add_afu(struct ocxl_afu *afu);
> +extern void ocxl_sysfs_remove_afu(struct ocxl_afu *afu);
> +
> +#endif /* _OCXL_INTERNAL_H_ */
> diff --git a/drivers/misc/ocxl/pasid.c b/drivers/misc/ocxl/pasid.c
> new file mode 100644
> index 000000000000..ea999a3a99b4
> --- /dev/null
> +++ b/drivers/misc/ocxl/pasid.c
> @@ -0,0 +1,114 @@
> +/*
> + * Copyright 2017 IBM Corp.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +
> +#include "ocxl_internal.h"
> +
> +
> +struct id_range {
> + struct list_head list;
> + u32 start;
> + u32 end;
> +};
> +
> +#ifdef DEBUG
> +static void dump_list(struct list_head *head, char *type_str)
> +{
> + struct id_range *cur;
> +
> + pr_debug("%s ranges allocated:\n", type_str);
> + list_for_each_entry(cur, head, list) {
> + pr_debug("Range %d->%d\n", cur->start, cur->end);
> + }
> +}
> +#endif
> +
> +static int range_alloc(struct list_head *head, u32 size, int max_id,
> + char *type_str)
> +{
> + struct list_head *pos;
> + struct id_range *cur, *new;
> + int rc, last_end;
> +
> + new = kmalloc(sizeof(struct id_range), GFP_KERNEL);
> + if (!new)
> + return -ENOMEM;
> +
> + pos = head;
> + last_end = -1;
> + list_for_each_entry(cur, head, list) {
> + if ((cur->start - last_end) > size)
> + break;
> + last_end = cur->end;
> + pos = &cur->list;
> + }
> +
> + new->start = last_end + 1;
> + new->end = new->start + size - 1;
> +
> + if (new->end > max_id) {
> + kfree(new);
> + rc = -ENOSPC;
> + } else {
> + list_add(&new->list, pos);
> + rc = new->start;
> + }
> +
> +#ifdef DEBUG
> + dump_list(head, type_str);
> +#endif
> + return rc;
> +}
> +
> +static void range_free(struct list_head *head, u32 start, u32 size,
> + char *type_str)
> +{
> + bool found = false;
> + struct id_range *cur, *tmp;
> +
> + list_for_each_entry_safe(cur, tmp, head, list) {
> + if (cur->start == start && cur->end == (start + size - 1)) {
> + found = true;
> + list_del(&cur->list);
> + kfree(cur);
> + break;
> + }
> + }
> + WARN_ON(!found);
> +#ifdef DEBUG
> + dump_list(head, type_str);
> +#endif
> +}
> +
> +int ocxl_pasid_afu_alloc(struct ocxl_fn *fn, u32 size)
> +{
> + int max_pasid;
> +
> + if (fn->config.max_pasid_log < 0)
> + return -ENOSPC;
> + max_pasid = 1 << fn->config.max_pasid_log;
> + return range_alloc(&fn->pasid_list, size, max_pasid, "afu pasid");
> +}
> +
> +void ocxl_pasid_afu_free(struct ocxl_fn *fn, u32 start, u32 size)
> +{
> + return range_free(&fn->pasid_list, start, size, "afu pasid");
> +}
> +
> +int ocxl_actag_afu_alloc(struct ocxl_fn *fn, u32 size)
> +{
> + int max_actag;
> +
> + max_actag = fn->actag_enabled;
> + return range_alloc(&fn->actag_list, size, max_actag, "afu actag");
> +}
> +
> +void ocxl_actag_afu_free(struct ocxl_fn *fn, u32 start, u32 size)
> +{
> + return range_free(&fn->actag_list, start, size, "afu actag");
> +}
> diff --git a/drivers/misc/ocxl/pci.c b/drivers/misc/ocxl/pci.c
> new file mode 100644
> index 000000000000..39e7bdd48215
> --- /dev/null
> +++ b/drivers/misc/ocxl/pci.c
> @@ -0,0 +1,592 @@
> +/*
> + * Copyright 2017 IBM Corp.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +
> +#include <linux/module.h>
> +#include <linux/pci.h>
> +#include <linux/idr.h>
> +#include <asm/pnv-ocxl.h>
> +#include "ocxl_internal.h"
> +
> +/*
> + * Any opencapi device which wants to use this 'generic' driver should
> + * use the 0x062B device ID. Vendors should define the subsystem
> + * vendor/device ID to help differentiate devices.
> + */
> +static const struct pci_device_id ocxl_pci_tbl[] = {
> + { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x062B), },
> + { }
> +};
> +MODULE_DEVICE_TABLE(pci, ocxl_pci_tbl);
> +
> +
> +static struct ocxl_fn *ocxl_fn_get(struct ocxl_fn *fn)
> +{
> + return (get_device(&fn->dev) == NULL) ? NULL : fn;
> +}
> +
> +static void ocxl_fn_put(struct ocxl_fn *fn)
> +{
> + put_device(&fn->dev);
> +}
> +
> +struct ocxl_afu *ocxl_afu_get(struct ocxl_afu *afu)
> +{
> + return (get_device(&afu->dev) == NULL) ? NULL : afu;
> +}
> +
> +void ocxl_afu_put(struct ocxl_afu *afu)
> +{
> + put_device(&afu->dev);
> +}
> +
> +static struct ocxl_afu *alloc_afu(struct ocxl_fn *fn)
> +{
> + struct ocxl_afu *afu;
> +
> + afu = kzalloc(sizeof(struct ocxl_afu), GFP_KERNEL);
> + if (!afu)
> + return NULL;
> +
> + mutex_init(&afu->contexts_lock);
> + mutex_init(&afu->afu_control_lock);
> + idr_init(&afu->contexts_idr);
> + afu->fn = fn;
> + ocxl_fn_get(fn);
> + return afu;
> +}
> +
> +static void free_afu(struct ocxl_afu *afu)
> +{
> + idr_destroy(&afu->contexts_idr);
> + ocxl_fn_put(afu->fn);
> + kfree(afu);
> +}
> +
> +static void free_afu_dev(struct device *dev)
> +{
> + struct ocxl_afu *afu = to_ocxl_afu(dev);
> +
> + ocxl_unregister_afu(afu);
> + free_afu(afu);
> +}
> +
> +static int set_afu_device(struct ocxl_afu *afu, const char *location)
> +{
> + struct ocxl_fn *fn = afu->fn;
> + int rc;
> +
> + afu->dev.parent = &fn->dev;
> + afu->dev.release = free_afu_dev;
> + rc = dev_set_name(&afu->dev, "%s.%s.%hhu", afu->config.name, location,
> + afu->config.idx);
> + return rc;
> +}
> +
> +static int assign_afu_actag(struct ocxl_afu *afu, struct pci_dev *dev)
> +{
> + struct ocxl_fn *fn = afu->fn;
> + int actag_count, actag_offset;
> +
> + /*
> + * if there were not enough actags for the function, each afu
> + * reduces its count as well
> + */
> + actag_count = afu->config.actag_supported *
> + fn->actag_enabled / fn->actag_supported;
> + actag_offset = ocxl_actag_afu_alloc(fn, actag_count);
> + if (actag_offset < 0) {
> + dev_err(&afu->dev, "Can't allocate %d actags for AFU: %d\n",
> + actag_count, actag_offset);
> + return actag_offset;
> + }
> + afu->actag_base = fn->actag_base + actag_offset;
> + afu->actag_enabled = actag_count;
> +
> + ocxl_config_set_afu_actag(dev, afu->config.dvsec_afu_control_pos,
> + afu->actag_base, afu->actag_enabled);
> + dev_dbg(&afu->dev, "actag base=%d enabled=%d\n",
> + afu->actag_base, afu->actag_enabled);
> + return 0;
> +}
> +
> +static void reclaim_afu_actag(struct ocxl_afu *afu)
> +{
> + struct ocxl_fn *fn = afu->fn;
> + int start_offset, size;
> +
> + start_offset = afu->actag_base - fn->actag_base;
> + size = afu->actag_enabled;
> + ocxl_actag_afu_free(afu->fn, start_offset, size);
> +}
> +
> +static int assign_afu_pasid(struct ocxl_afu *afu, struct pci_dev *dev)
> +{
> + struct ocxl_fn *fn = afu->fn;
> + int pasid_count, pasid_offset;
> +
> + /*
> + * We only support the case where the function configuration
> + * requested enough PASIDs to cover all AFUs.
> + */
> + pasid_count = 1 << afu->config.pasid_supported_log;
> + pasid_offset = ocxl_pasid_afu_alloc(fn, pasid_count);
> + if (pasid_offset < 0) {
> + dev_err(&afu->dev, "Can't allocate %d PASIDs for AFU: %d\n",
> + pasid_count, pasid_offset);
> + return pasid_offset;
> + }
> + afu->pasid_base = fn->pasid_base + pasid_offset;
> + afu->pasid_count = 0;
> + afu->pasid_max = pasid_count;
> +
> + ocxl_config_set_afu_pasid(dev, afu->config.dvsec_afu_control_pos,
> + afu->pasid_base,
> + afu->config.pasid_supported_log);
> + dev_dbg(&afu->dev, "PASID base=%d, enabled=%d\n",
> + afu->pasid_base, pasid_count);
> + return 0;
> +}
> +
> +static void reclaim_afu_pasid(struct ocxl_afu *afu)
> +{
> + struct ocxl_fn *fn = afu->fn;
> + int start_offset, size;
> +
> + start_offset = afu->pasid_base - fn->pasid_base;
> + size = 1 << afu->config.pasid_supported_log;
> + ocxl_pasid_afu_free(afu->fn, start_offset, size);
> +}
> +
> +static int reserve_fn_bar(struct ocxl_fn *fn, int bar)
> +{
> + struct pci_dev *dev = to_pci_dev(fn->dev.parent);
> + int rc, idx;
> +
> + if (bar != 0 && bar != 2 && bar != 4)
> + return -EINVAL;
> +
> + idx = bar >> 1;
> + if (fn->bar_used[idx]++ == 0) {
> + rc = pci_request_region(dev, bar, "ocxl");
> + if (rc)
> + return rc;
> + }
> + return 0;
> +}
> +
> +static void release_fn_bar(struct ocxl_fn *fn, int bar)
> +{
> + struct pci_dev *dev = to_pci_dev(fn->dev.parent);
> + int idx;
> +
> + if (bar != 0 && bar != 2 && bar != 4)
> + return;
> +
> + idx = bar >> 1;
> + if (--fn->bar_used[idx] == 0)
> + pci_release_region(dev, bar);
> + WARN_ON(fn->bar_used[idx] < 0);
> +}
> +
> +static int map_mmio_areas(struct ocxl_afu *afu, struct pci_dev *dev)
> +{
> + int rc;
> +
> + rc = reserve_fn_bar(afu->fn, afu->config.global_mmio_bar);
> + if (rc)
> + return rc;
> +
> + rc = reserve_fn_bar(afu->fn, afu->config.pp_mmio_bar);
> + if (rc) {
> + release_fn_bar(afu->fn, afu->config.global_mmio_bar);
> + return rc;
> + }
> +
> + afu->global_mmio_start =
> + pci_resource_start(dev, afu->config.global_mmio_bar) +
> + afu->config.global_mmio_offset;
> + afu->pp_mmio_start =
> + pci_resource_start(dev, afu->config.pp_mmio_bar) +
> + afu->config.pp_mmio_offset;
> +
> + afu->global_mmio_ptr = ioremap(afu->global_mmio_start,
> + afu->config.global_mmio_size);
> + if (!afu->global_mmio_ptr) {
> + release_fn_bar(afu->fn, afu->config.pp_mmio_bar);
> + release_fn_bar(afu->fn, afu->config.global_mmio_bar);
> + dev_err(&dev->dev, "Error mapping global mmio area\n");
> + return -ENOMEM;
> + }
> +
> + /*
> + * Leave an empty page between the per-process mmio area and
> + * the AFU interrupt mappings
> + */
> + afu->irq_base_offset = afu->config.pp_mmio_stride + PAGE_SIZE;
> + return 0;
> +}
> +
> +static void unmap_mmio_areas(struct ocxl_afu *afu)
> +{
> + if (afu->global_mmio_ptr) {
> + iounmap(afu->global_mmio_ptr);
> + afu->global_mmio_ptr = NULL;
> + }
> + afu->global_mmio_start = 0;
> + afu->pp_mmio_start = 0;
> + release_fn_bar(afu->fn, afu->config.pp_mmio_bar);
> + release_fn_bar(afu->fn, afu->config.global_mmio_bar);
> +}
> +
> +static int configure_afu(struct ocxl_afu *afu, u8 afu_idx, struct pci_dev *dev)
> +{
> + int rc;
> +
> + rc = ocxl_config_read_afu(dev, &afu->fn->config, &afu->config, afu_idx);
> + if (rc)
> + return rc;
> +
> + rc = set_afu_device(afu, dev_name(&dev->dev));
> + if (rc)
> + return rc;
> +
> + rc = assign_afu_actag(afu, dev);
> + if (rc)
> + return rc;
> +
> + rc = assign_afu_pasid(afu, dev);
> + if (rc) {
> + reclaim_afu_actag(afu);
> + return rc;
> + }
> +
> + rc = map_mmio_areas(afu, dev);
> + if (rc) {
> + reclaim_afu_pasid(afu);
> + reclaim_afu_actag(afu);
> + return rc;
> + }
> + return 0;
> +}
> +
> +static void deconfigure_afu(struct ocxl_afu *afu)
> +{
> + unmap_mmio_areas(afu);
> + reclaim_afu_pasid(afu);
> + reclaim_afu_actag(afu);
> +}
> +
> +static int activate_afu(struct pci_dev *dev, struct ocxl_afu *afu)
> +{
> + int rc;
> +
> + ocxl_config_set_afu_state(dev, afu->config.dvsec_afu_control_pos, 1);
> + /*
> + * Char device creation is the last step, as processes can
> + * call our driver immediately, so all our inits must be finished.
> + */
> + rc = ocxl_create_cdev(afu);
> + if (rc)
> + return rc;
> + return 0;
> +}
> +
> +static void deactivate_afu(struct ocxl_afu *afu)
> +{
> + struct pci_dev *dev = to_pci_dev(afu->fn->dev.parent);
> +
> + ocxl_destroy_cdev(afu);
> + ocxl_config_set_afu_state(dev, afu->config.dvsec_afu_control_pos, 0);
> +}
> +
> +static int init_afu(struct pci_dev *dev, struct ocxl_fn *fn, u8 afu_idx)
> +{
> + int rc;
> + struct ocxl_afu *afu;
> +
> + afu = alloc_afu(fn);
> + if (!afu)
> + return -ENOMEM;
> +
> + rc = configure_afu(afu, afu_idx, dev);
> + if (rc) {
> + free_afu(afu);
> + return rc;
> + }
> +
> + rc = ocxl_register_afu(afu);
> + if (rc)
> + goto err;
> +
> + rc = ocxl_sysfs_add_afu(afu);
> + if (rc)
> + goto err;
> +
> + rc = activate_afu(dev, afu);
> + if (rc)
> + goto err_sys;
> +
> + list_add_tail(&afu->list, &fn->afu_list);
> + return 0;
> +
> +err_sys:
> + ocxl_sysfs_remove_afu(afu);
> +err:
> + deconfigure_afu(afu);
> + device_unregister(&afu->dev);
> + return rc;
> +}
> +
> +static void remove_afu(struct ocxl_afu *afu)
> +{
> + list_del(&afu->list);
> + ocxl_context_detach_all(afu);
> + deactivate_afu(afu);
> + ocxl_sysfs_remove_afu(afu);
> + deconfigure_afu(afu);
> + device_unregister(&afu->dev);
> +}
> +
> +static struct ocxl_fn *alloc_function(struct pci_dev *dev)
> +{
> + struct ocxl_fn *fn;
> +
> + fn = kzalloc(sizeof(struct ocxl_fn), GFP_KERNEL);
> + if (!fn)
> + return NULL;
> +
> + INIT_LIST_HEAD(&fn->afu_list);
> + INIT_LIST_HEAD(&fn->pasid_list);
> + INIT_LIST_HEAD(&fn->actag_list);
> + return fn;
> +}
> +
> +static void free_function(struct ocxl_fn *fn)
> +{
> + WARN_ON(!list_empty(&fn->afu_list));
> + WARN_ON(!list_empty(&fn->pasid_list));
> + kfree(fn);
> +}
> +
> +static void free_function_dev(struct device *dev)
> +{
> + struct ocxl_fn *fn = to_ocxl_function(dev);
> +
> + free_function(fn);
> +}
> +
> +static int set_function_device(struct ocxl_fn *fn, struct pci_dev *dev)
> +{
> + int rc;
> +
> + fn->dev.parent = &dev->dev;
> + fn->dev.release = free_function_dev;
> + rc = dev_set_name(&fn->dev, "ocxlfn.%s", dev_name(&dev->dev));
> + if (rc)
> + return rc;
> + pci_set_drvdata(dev, fn);
> + return 0;
> +}
> +
> +static int assign_function_actag(struct ocxl_fn *fn)
> +{
> + struct pci_dev *dev = to_pci_dev(fn->dev.parent);
> + u16 base, enabled, supported;
> + int rc;
> +
> + rc = ocxl_config_get_actag_info(dev, &base, &enabled, &supported);
> + if (rc)
> + return rc;
> +
> + fn->actag_base = base;
> + fn->actag_enabled = enabled;
> + fn->actag_supported = supported;
> +
> + ocxl_config_set_actag(dev, fn->config.dvsec_function_pos,
> + fn->actag_base, fn->actag_enabled);
> + dev_dbg(&fn->dev, "actag range starting at %d, enabled %d\n",
> + fn->actag_base, fn->actag_enabled);
> + return 0;
> +}
> +
> +static int set_function_pasid(struct ocxl_fn *fn)
> +{
> + struct pci_dev *dev = to_pci_dev(fn->dev.parent);
> + int rc, desired_count, max_count;
> +
> + /* A function may not require any PASID */
> + if (fn->config.max_pasid_log < 0)
> + return 0;
> +
> + rc = ocxl_config_get_pasid_info(dev, &max_count);
> + if (rc)
> + return rc;
> +
> + desired_count = 1 << fn->config.max_pasid_log;
> +
> + if (desired_count > max_count) {
> + dev_err(&fn->dev,
> + "Function requires more PASIDs than is available (%d vs. %d)\n",
> + desired_count, max_count);
> + return -ENOSPC;
> + }
> +
> + fn->pasid_base = 0;
> + return 0;
> +}
> +
> +static int configure_function(struct ocxl_fn *fn, struct pci_dev *dev)
> +{
> + int rc;
> +
> + rc = pci_enable_device(dev);
> + if (rc) {
> + dev_err(&dev->dev, "pci_enable_device failed: %d\n", rc);
> + return rc;
> + }
> +
> + /*
> + * Once it has been confirmed to work on our hardware, we
> + * should reset the function, to force the adapter to restart
> + * from scratch.
> + * A function reset would also reset all its AFUs.
> + *
> + * Some hints for implementation:
> + *
> + * - there's not status bit to know when the reset is done. We
> + * should try reading the config space to know when it's
> + * done.
> + * - probably something like:
> + * Reset
> + * wait 100ms
> + * issue config read
> + * allow device up to 1 sec to return success on config
> + * read before declaring it broken
> + *
> + * Some shared logic on the card (CFG, TLX) won't be reset, so
> + * there's no guarantee that it will be enough.
> + */
> + rc = ocxl_config_read_function(dev, &fn->config);
> + if (rc)
> + return rc;
> +
> + rc = set_function_device(fn, dev);
> + if (rc)
> + return rc;
> +
> + rc = assign_function_actag(fn);
> + if (rc)
> + return rc;
> +
> + rc = set_function_pasid(fn);
> + if (rc)
> + return rc;
> +
> + rc = ocxl_link_setup(dev, 0, &fn->link);
> + if (rc)
> + return rc;
> +
> + rc = ocxl_config_set_TL(dev, fn->config.dvsec_tl_pos);
> + if (rc) {
> + ocxl_link_release(dev, fn->link);
> + return rc;
> + }
> + return 0;
> +}
> +
> +static void deconfigure_function(struct ocxl_fn *fn)
> +{
> + struct pci_dev *dev = to_pci_dev(fn->dev.parent);
> +
> + ocxl_link_release(dev, fn->link);
> + pci_disable_device(dev);
> +}
> +
> +static struct ocxl_fn *init_function(struct pci_dev *dev)
> +{
> + struct ocxl_fn *fn;
> + int rc;
> +
> + fn = alloc_function(dev);
> + if (!fn)
> + return ERR_PTR(-ENOMEM);
> +
> + rc = configure_function(fn, dev);
> + if (rc) {
> + free_function(fn);
> + return ERR_PTR(rc);
> + }
> +
> + rc = device_register(&fn->dev);
> + if (rc) {
> + deconfigure_function(fn);
> + device_unregister(&fn->dev);
> + return ERR_PTR(rc);
> + }
> + return fn;
> +}
> +
> +static void remove_function(struct ocxl_fn *fn)
> +{
> + deconfigure_function(fn);
> + device_unregister(&fn->dev);
> +}
> +
> +static int ocxl_probe(struct pci_dev *dev, const struct pci_device_id *id)
> +{
> + int rc, afu_count = 0;
> + u8 afu;
> + struct ocxl_fn *fn;
> +
> + if (!radix_enabled()) {
> + dev_err(&dev->dev, "Unsupported memory model (hash)\n");
> + return -ENODEV;
> + }
> +
> + fn = init_function(dev);
> + if (IS_ERR(fn)) {
> + dev_err(&dev->dev, "function init failed: %li\n",
> + PTR_ERR(fn));
> + return PTR_ERR(fn);
> + }
> +
> + for (afu = 0; afu <= fn->config.max_afu_index; afu++) {
> + rc = ocxl_config_check_afu_index(dev, &fn->config, afu);
> + if (rc > 0) {
> + rc = init_afu(dev, fn, afu);
> + if (rc) {
> + dev_err(&dev->dev,
> + "Can't initialize AFU index %d\n", afu);
> + continue;
> + }
> + afu_count++;
> + }
> + }
> + dev_info(&dev->dev, "%d AFU(s) configured\n", afu_count);
> + return 0;
> +}
> +
> +static void ocxl_remove(struct pci_dev *dev)
> +{
> + struct ocxl_afu *afu, *tmp;
> + struct ocxl_fn *fn = pci_get_drvdata(dev);
> +
> + list_for_each_entry_safe(afu, tmp, &fn->afu_list, list) {
> + remove_afu(afu);
> + }
> + remove_function(fn);
> +}
> +
> +struct pci_driver ocxl_pci_driver = {
> + .name = "ocxl",
> + .id_table = ocxl_pci_tbl,
> + .probe = ocxl_probe,
> + .remove = ocxl_remove,
> + .shutdown = ocxl_remove,
> +};
> diff --git a/drivers/misc/ocxl/sysfs.c b/drivers/misc/ocxl/sysfs.c
> new file mode 100644
> index 000000000000..b7b1d1735c07
> --- /dev/null
> +++ b/drivers/misc/ocxl/sysfs.c
> @@ -0,0 +1,150 @@
> +/*
> + * Copyright 2017 IBM Corp.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +
> +#include <linux/sysfs.h>
> +#include "ocxl_internal.h"
> +
> +static ssize_t global_mmio_size_show(struct device *device,
> + struct device_attribute *attr,
> + char *buf)
> +{
> + struct ocxl_afu *afu = to_ocxl_afu(device);
> +
> + return scnprintf(buf, PAGE_SIZE, "%d\n",
> + afu->config.global_mmio_size);
> +}
> +
> +static ssize_t pp_mmio_size_show(struct device *device,
> + struct device_attribute *attr,
> + char *buf)
> +{
> + struct ocxl_afu *afu = to_ocxl_afu(device);
> +
> + return scnprintf(buf, PAGE_SIZE, "%d\n",
> + afu->config.pp_mmio_stride);
> +}
> +
> +static ssize_t afu_version_show(struct device *device,
> + struct device_attribute *attr,
> + char *buf)
> +{
> + struct ocxl_afu *afu = to_ocxl_afu(device);
> +
> + return scnprintf(buf, PAGE_SIZE, "%hhu:%hhu\n",
> + afu->config.version_major,
> + afu->config.version_minor);
> +}
> +
> +static ssize_t contexts_show(struct device *device,
> + struct device_attribute *attr,
> + char *buf)
> +{
> + struct ocxl_afu *afu = to_ocxl_afu(device);
> +
> + return scnprintf(buf, PAGE_SIZE, "%d/%d\n",
> + afu->pasid_count, afu->pasid_max);
> +}
> +
> +static struct device_attribute afu_attrs[] = {
> + __ATTR_RO(global_mmio_size),
> + __ATTR_RO(pp_mmio_size),
> + __ATTR_RO(afu_version),
> + __ATTR_RO(contexts),
> +};
> +
> +static ssize_t global_mmio_read(struct file *filp, struct kobject *kobj,
> + struct bin_attribute *bin_attr, char *buf,
> + loff_t off, size_t count)
> +{
> + struct ocxl_afu *afu = to_ocxl_afu(kobj_to_dev(kobj));
> +
> + if (count == 0 || off < 0 ||
> + off >= afu->config.global_mmio_size)
> + return 0;
> +
> + memcpy(buf, afu->global_mmio_ptr + off, count);
drivers/misc/ocxl/sysfs.c:64:42: warning: incorrect type in argument 2
(different address spaces)
drivers/misc/ocxl/sysfs.c:64:42: expected void const *<noident>
drivers/misc/ocxl/sysfs.c:64:42: got void [noderef] <asn:2>*
> + return count;
> +}
> +
> +static int global_mmio_fault(struct vm_fault *vmf)
> +{
> + struct vm_area_struct *vma = vmf->vma;
> + struct ocxl_afu *afu = vma->vm_private_data;
> + unsigned long offset;
> +
> + if (vmf->pgoff >= (afu->config.global_mmio_size >> PAGE_SHIFT))
> + return VM_FAULT_SIGBUS;
> +
> + offset = vmf->pgoff;
> + offset += (afu->global_mmio_start >> PAGE_SHIFT);
> + vm_insert_pfn(vma, vmf->address, offset);
> + return VM_FAULT_NOPAGE;
> +}
> +
> +static const struct vm_operations_struct global_mmio_vmops = {
> + .fault = global_mmio_fault,
> +};
> +
> +static int global_mmio_mmap(struct file *filp, struct kobject *kobj,
> + struct bin_attribute *bin_attr,
> + struct vm_area_struct *vma)
> +{
> + struct ocxl_afu *afu = to_ocxl_afu(kobj_to_dev(kobj));
> +
> + if ((vma_pages(vma) + vma->vm_pgoff) >
> + (afu->config.global_mmio_size >> PAGE_SHIFT))
> + return -EINVAL;
> +
> + vma->vm_flags |= VM_IO | VM_PFNMAP;
> + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
> + vma->vm_ops = &global_mmio_vmops;
> + vma->vm_private_data = afu;
> + return 0;
> +}
> +
> +int ocxl_sysfs_add_afu(struct ocxl_afu *afu)
> +{
> + int i, rc;
> +
> + for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) {
> + rc = device_create_file(&afu->dev, &afu_attrs[i]);
> + if (rc)
> + goto err;
> + }
> +
> + sysfs_attr_init(&afu->attr_global_mmio.attr);
> + afu->attr_global_mmio.attr.name = "global_mmio_area";
> + afu->attr_global_mmio.attr.mode = 0600;
> + afu->attr_global_mmio.size = afu->config.global_mmio_size;
> + afu->attr_global_mmio.read = global_mmio_read;
> + afu->attr_global_mmio.mmap = global_mmio_mmap;
> + rc = device_create_bin_file(&afu->dev, &afu->attr_global_mmio);
> + if (rc) {
> + dev_err(&afu->dev,
> + "Unable to create global mmio attr for afu: %d\n",
> + rc);
> + goto err;
> + }
> +
> + return 0;
> +
> +err:
> + for (i--; i >= 0; i--)
> + device_remove_file(&afu->dev, &afu_attrs[i]);
> + return rc;
> +}
> +
> +void ocxl_sysfs_remove_afu(struct ocxl_afu *afu)
> +{
> + int i;
> +
> + for (i = 0; i < ARRAY_SIZE(afu_attrs); i++)
> + device_remove_file(&afu->dev, &afu_attrs[i]);
> + device_remove_bin_file(&afu->dev, &afu->attr_global_mmio);
> +}
> diff --git a/include/uapi/misc/ocxl.h b/include/uapi/misc/ocxl.h
> new file mode 100644
> index 000000000000..71fa387f2efd
> --- /dev/null
> +++ b/include/uapi/misc/ocxl.h
> @@ -0,0 +1,47 @@
> +/*
> + * Copyright 2017 IBM Corp.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +
> +#ifndef _UAPI_MISC_OCXL_H
> +#define _UAPI_MISC_OCXL_H
> +
> +#include <linux/types.h>
> +#include <linux/ioctl.h>
> +
> +enum ocxl_event_type {
> + OCXL_AFU_EVENT_XSL_FAULT_ERROR = 0,
> +};
> +
> +#define OCXL_KERNEL_EVENT_FLAG_LAST 0x0001 /* This is the last event pending */
> +
> +struct ocxl_kernel_event_header {
> + __u16 type;
> + __u16 flags;
> + __u32 reserved;
> +};
> +
> +struct ocxl_kernel_event_xsl_fault_error {
> + __u64 addr;
> + __u64 dsisr;
> + __u64 count;
> + __u64 reserved;
> +};
> +
> +struct ocxl_ioctl_attach {
> + __u64 amr;
> + __u64 reserved1;
> + __u64 reserved2;
> + __u64 reserved3;
> +};
> +
> +/* ioctl numbers */
> +#define OCXL_MAGIC 0xCA
> +/* AFU devices */
> +#define OCXL_IOCTL_ATTACH _IOW(OCXL_MAGIC, 0x10, struct ocxl_ioctl_attach)
> +
> +#endif /* _UAPI_MISC_OCXL_H */
>
--
Andrew Donnellan OzLabs, ADL Canberra
andrew.donnellan@....ibm.com IBM Australia Limited
Powered by blists - more mailing lists