[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20210114175044.clgelxe7csltyx2e@intel.com>
Date: Thu, 14 Jan 2021 09:50:44 -0800
From: Ben Widawsky <ben.widawsky@...el.com>
To: Jonathan Cameron <Jonathan.Cameron@...wei.com>
Cc: linux-cxl@...r.kernel.org, linux-kernel@...r.kernel.org,
linux-pci@...r.kernel.org,
"linux-acpi@...r.kernel.org, Ira Weiny" <ira.weiny@...el.com>,
Dan Williams <dan.j.williams@...el.com>,
Vishal Verma <vishal.l.verma@...el.com>,
"Kelley, Sean V" <sean.v.kelley@...el.com>,
Rafael Wysocki <rafael.j.wysocki@...el.com>,
Bjorn Helgaas <helgaas@...nel.org>,
Jon Masters <jcm@...masters.org>,
Chris Browy <cbrowy@...ry-design.com>,
Randy Dunlap <rdunlap@...radead.org>,
Christoph Hellwig <hch@...radead.org>,
daniel.lll@...baba-inc.com
Subject: Re: [RFC PATCH v3 07/16] cxl/mem: Implement polled mode mailbox
On 21-01-14 17:40:12, Jonathan Cameron wrote:
> On Mon, 11 Jan 2021 14:51:11 -0800
> Ben Widawsky <ben.widawsky@...el.com> wrote:
>
> > Provide enough functionality to utilize the mailbox of a memory device.
> > The mailbox is used to interact with the firmware running on the memory
> > device.
> >
> > The CXL specification defines separate capabilities for the mailbox and
> > the memory device. While we can confirm the mailbox is ready, in order
> > to actually interact with the memory device, you must also confirm the
> > device's firmware is ready.
> >
> > Create a function to handle sending a command, optionally with a
> > payload, to the memory device, polling on a result, and then optionally
> > copying out the payload. The algorithm for doing this comes straight out
> > of the CXL 2.0 specification.
> >
> > Primary mailboxes are capable of generating an interrupt when submitting
> > a command in the background. That implementation is saved for a later
> > time.
> >
> > Secondary mailboxes aren't implemented at this time.
> >
> > The flow is proven with one implemented command, "identify". Because the
> > class code has already told the driver this is a memory device and the
> > identify command is mandatory, it's safe to assume for sane devices that
> > everything here will work.
> >
> > Signed-off-by: Ben Widawsky <ben.widawsky@...el.com>
> One more thing that had me confused in a later patch (14)
>
> J
> > ---
> > drivers/cxl/cxl.h | 43 +++++++
> > drivers/cxl/mem.c | 312 ++++++++++++++++++++++++++++++++++++++++++++++
> > 2 files changed, 355 insertions(+)
> >
> > diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> > index a77286d04ce4..ca3fa496e21c 100644
> > --- a/drivers/cxl/cxl.h
> > +++ b/drivers/cxl/cxl.h
> > @@ -32,9 +32,40 @@
> > #define CXLDEV_MB_CAP_PAYLOAD_SIZE_MASK GENMASK(4, 0)
> > #define CXLDEV_MB_CAP_PAYLOAD_SIZE_SHIFT 0
> > #define CXLDEV_MB_CTRL_OFFSET 0x04
> > +#define CXLDEV_MB_CTRL_DOORBELL BIT(0)
> > #define CXLDEV_MB_CMD_OFFSET 0x08
> > +#define CXLDEV_MB_CMD_COMMAND_OPCODE_SHIFT 0
> > +#define CXLDEV_MB_CMD_COMMAND_OPCODE_MASK GENMASK(15, 0)
> > +#define CXLDEV_MB_CMD_PAYLOAD_LENGTH_SHIFT 16
> > +#define CXLDEV_MB_CMD_PAYLOAD_LENGTH_MASK GENMASK(36, 16)
> > #define CXLDEV_MB_STATUS_OFFSET 0x10
> > +#define CXLDEV_MB_STATUS_RET_CODE_SHIFT 32
> > +#define CXLDEV_MB_STATUS_RET_CODE_MASK GENMASK(47, 32)
> > #define CXLDEV_MB_BG_CMD_STATUS_OFFSET 0x18
> > +#define CXLDEV_MB_PAYLOAD_OFFSET 0x20
> > +
> > +/* Memory Device (CXL 2.0 - 8.2.8.5.1.1) */
> > +#define CXLMDEV_STATUS_OFFSET 0x0
> > +#define CXLMDEV_DEV_FATAL BIT(0)
> > +#define CXLMDEV_FW_HALT BIT(1)
> > +#define CXLMDEV_STATUS_MEDIA_STATUS_SHIFT 2
> > +#define CXLMDEV_STATUS_MEDIA_STATUS_MASK GENMASK(3, 2)
> > +#define CXLMDEV_MS_NOT_READY 0
> > +#define CXLMDEV_MS_READY 1
> > +#define CXLMDEV_MS_ERROR 2
> > +#define CXLMDEV_MS_DISABLED 3
> > +#define CXLMDEV_READY(status) \
> > + (CXL_GET_FIELD(status, CXLMDEV_STATUS_MEDIA_STATUS) == CXLMDEV_MS_READY)
> > +#define CXLMDEV_MBOX_IF_READY BIT(4)
> > +#define CXLMDEV_RESET_NEEDED_SHIFT 5
> > +#define CXLMDEV_RESET_NEEDED_MASK GENMASK(7, 5)
> > +#define CXLMDEV_RESET_NEEDED_NOT 0
> > +#define CXLMDEV_RESET_NEEDED_COLD 1
> > +#define CXLMDEV_RESET_NEEDED_WARM 2
> > +#define CXLMDEV_RESET_NEEDED_HOT 3
> > +#define CXLMDEV_RESET_NEEDED_CXL 4
> > +#define CXLMDEV_RESET_NEEDED(status) \
> > + (CXL_GET_FIELD(status, CXLMDEV_RESET_NEEDED) != CXLMDEV_RESET_NEEDED_NOT)
> >
> > /**
> > * struct cxl_mem - A CXL memory device
> > @@ -45,6 +76,16 @@ struct cxl_mem {
> > struct pci_dev *pdev;
> > void __iomem *regs;
> >
> > + struct {
> > + struct range range;
> > + } pmem;
> > +
> > + struct {
> > + struct range range;
> > + } ram;
> > +
> > + char firmware_version[0x10];
> > +
> > /* Cap 0001h - CXL_CAP_CAP_ID_DEVICE_STATUS */
> > struct {
> > void __iomem *regs;
> > @@ -52,6 +93,7 @@ struct cxl_mem {
> >
> > /* Cap 0002h - CXL_CAP_CAP_ID_PRIMARY_MAILBOX */
> > struct {
> > + struct mutex mutex; /* Protects device mailbox and firmware */
> > void __iomem *regs;
> > size_t payload_size;
> > } mbox;
> > @@ -90,6 +132,7 @@ struct cxl_mem {
> >
> > cxl_reg(status);
> > cxl_reg(mbox);
> > +cxl_reg(mem);
> >
> > #define cxl_payload_regs(cxlm) \
> > ((void __iomem *)(cxlm)->mbox.regs + CXLDEV_MB_PAYLOAD_OFFSET)
> > diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
> > index 8da9f4a861ea..e9ba97bbd7b9 100644
> > --- a/drivers/cxl/mem.c
> > +++ b/drivers/cxl/mem.c
> > @@ -1,5 +1,6 @@
> > // SPDX-License-Identifier: GPL-2.0-only
> > /* Copyright(c) 2020 Intel Corporation. All rights reserved. */
> > +#include <linux/sched/clock.h>
> > #include <linux/module.h>
> > #include <linux/pci.h>
> > #include <linux/io.h>
> > @@ -7,6 +8,248 @@
> > #include "pci.h"
> > #include "cxl.h"
> >
> > +#define cxl_doorbell_busy(cxlm) \
> > + (cxl_read_mbox_reg32(cxlm, CXLDEV_MB_CTRL_OFFSET) & \
> > + CXLDEV_MB_CTRL_DOORBELL)
> > +
> > +#define CXL_MAILBOX_TIMEOUT_US 2000
> > +
> > +enum opcode {
> > + CXL_MBOX_OP_IDENTIFY = 0x4000,
> > + CXL_MBOX_OP_MAX = 0x10000
> > +};
> > +
> > +/**
> > + * struct mbox_cmd - A command to be submitted to hardware.
> > + * @opcode: (input) The command set and command submitted to hardware.
> > + * @payload: (input/output) Pointer to the input and output payload.
> > + * Payload can be NULL if the caller wants to populate the payload
> > + * registers themselves (potentially avoiding a copy).
> > + * @size_in: (input) Number of bytes to load from @payload.
> > + * @size_out:
> > + * - (input) Number of bytes allocated to load into @payload.
>
> I'm not actually seeing where this is used as an input. I'd expect a min(input, output)
> at the memcpy but there isn't one there.
>
The functionality changed since v2 and this interface no longer deals with
restricting output length. This was because at some intermediate point I had
entirely removed the copying in/out of the payload registers from this command.
As you see, it came back.
For userspace submitted commands the lengths are all validated up front before
we get here.
For commands submitted directly from the kernel, I've gone back and forth about
how useful it is. If you feel strongly that it is useful, I can add it back. It
sounds like you want me to convert everything back to an intermediate buffer,
which I will respond in that patch, so I'd need to rework it anyway if that's
the path we end up taking.
> > + * - (output) Number of bytes loaded into @payload.
> > + * @return_code: (output) Error code returned from hardware.
> > + *
> > + * This is the primary mechanism used to send commands to the hardware.
> > + * All the fields except @payload correspond exactly to the fields described in
> > + * Command Register section of the CXL 2.0 spec (8.2.8.4.5). @payload
> > + * corresponds to the Command Payload Registers (8.2.8.4.8).
> > + */
> > +struct mbox_cmd {
> > + u16 opcode;
> > + void *payload;
> > + size_t size_in;
> > + size_t size_out;
> > + u16 return_code;
> > +};
> > +
> > +static int cxl_mem_wait_for_doorbell(struct cxl_mem *cxlm)
> > +{
> > + const int timeout = msecs_to_jiffies(CXL_MAILBOX_TIMEOUT_US);
> > + const unsigned long start = jiffies;
> > + unsigned long end = start;
> > +
> > + while (cxl_doorbell_busy(cxlm)) {
> > + end = jiffies;
> > +
> > + if (time_after(end, start + timeout)) {
> > + /* Check again in case preempted before timeout test */
> > + if (!cxl_doorbell_busy(cxlm))
> > + break;
> > + return -ETIMEDOUT;
> > + }
> > + cpu_relax();
> > + }
> > +
> > + dev_dbg(&cxlm->pdev->dev, "Doorbell wait took %dms",
> > + jiffies_to_msecs(end) - jiffies_to_msecs(start));
> > + return 0;
> > +}
> > +
> > +static void cxl_mem_mbox_timeout(struct cxl_mem *cxlm,
> > + struct mbox_cmd *mbox_cmd)
> > +{
> > + dev_warn(&cxlm->pdev->dev, "Mailbox command timed out\n");
> > + dev_info(&cxlm->pdev->dev,
> > + "\topcode: 0x%04x\n"
> > + "\tpayload size: %zub\n",
> > + mbox_cmd->opcode, mbox_cmd->size_in);
> > + print_hex_dump_debug("Payload ", DUMP_PREFIX_OFFSET, 16, 1,
> > + mbox_cmd->payload, mbox_cmd->size_in, true);
> > +
> > + /* Here's a good place to figure out if a device reset is needed */
> > +}
> > +
> > +/**
> > + * cxl_mem_mbox_send_cmd() - Send a mailbox command to a memory device.
> > + * @cxlm: The CXL memory device to communicate with.
> > + * @mbox_cmd: Command to send to the memory device.
> > + *
> > + * Context: Any context. Expects mbox_lock to be held.
> > + * Return: -ETIMEDOUT if timeout occurred waiting for completion. 0 on success.
> > + * Caller should check the return code in @mbox_cmd to make sure it
> > + * succeeded.
> > + *
> > + * This is a generic form of the CXL mailbox send command, thus the only I/O
> > + * operations used are cxl_read_mbox_reg(). Memory devices, and perhaps other
> > + * types of CXL devices may have further information available upon error
> > + * conditions.
> > + *
> > + * FIXME: As stated above, references to &struct cxl_mem should be changed to a
> > + * more generic cxl structure when needed.
> > + */
> > +static int cxl_mem_mbox_send_cmd(struct cxl_mem *cxlm,
> > + struct mbox_cmd *mbox_cmd)
> > +{
> > + u64 cmd_reg, status_reg;
> > + size_t out_len;
> > + int rc;
> > +
> > + lockdep_assert_held(&cxlm->mbox.mutex);
> > +
> > + /*
> > + * Here are the steps from 8.2.8.4 of the CXL 2.0 spec.
> > + * 1. Caller reads MB Control Register to verify doorbell is clear
> > + * 2. Caller writes Command Register
> > + * 3. Caller writes Command Payload Registers if input payload is non-empty
> > + * 4. Caller writes MB Control Register to set doorbell
> > + * 5. Caller either polls for doorbell to be clear or waits for interrupt if configured
> > + * 6. Caller reads MB Status Register to fetch Return code
> > + * 7. If command successful, Caller reads Command Register to get Payload Length
> > + * 8. If output payload is non-empty, host reads Command Payload Registers
> > + */
> > +
> > + /* #1 */
> > + WARN_ON(cxl_doorbell_busy(cxlm));
> > +
> > + cmd_reg = CXL_SET_FIELD(mbox_cmd->opcode, CXLDEV_MB_CMD_COMMAND_OPCODE);
> > + if (mbox_cmd->size_in) {
> > + cmd_reg |= CXL_SET_FIELD(mbox_cmd->size_in,
> > + CXLDEV_MB_CMD_PAYLOAD_LENGTH);
> > + if (mbox_cmd->payload)
> > + memcpy_toio(cxl_payload_regs(cxlm), mbox_cmd->payload,
> > + mbox_cmd->size_in);
> > + }
> > +
> > + /* #2, #3 */
> > + cxl_write_mbox_reg64(cxlm, CXLDEV_MB_CMD_OFFSET, cmd_reg);
> > +
> > + /* #4 */
> > + dev_dbg(&cxlm->pdev->dev, "Sending command\n");
> > + cxl_write_mbox_reg32(cxlm, CXLDEV_MB_CTRL_OFFSET,
> > + CXLDEV_MB_CTRL_DOORBELL);
> > +
> > + /* #5 */
> > + rc = cxl_mem_wait_for_doorbell(cxlm);
> > + if (rc == -ETIMEDOUT) {
> > + cxl_mem_mbox_timeout(cxlm, mbox_cmd);
> > + return rc;
> > + }
> > +
> > + /* #6 */
> > + status_reg = cxl_read_mbox_reg64(cxlm, CXLDEV_MB_STATUS_OFFSET);
> > + mbox_cmd->return_code =
> > + CXL_GET_FIELD(status_reg, CXLDEV_MB_STATUS_RET_CODE);
> > +
> > + if (mbox_cmd->return_code != 0) {
> > + dev_dbg(&cxlm->pdev->dev, "Mailbox operation had an error\n");
> > + return 0;
> > + }
> > +
> > + /* #7 */
> > + cmd_reg = cxl_read_mbox_reg64(cxlm, CXLDEV_MB_CMD_OFFSET);
> > + out_len = CXL_GET_FIELD(cmd_reg, CXLDEV_MB_CMD_PAYLOAD_LENGTH);
> > + mbox_cmd->size_out = out_len;
> > +
> > + /* #8 */
> > + if (out_len && mbox_cmd->payload)
> > + memcpy_fromio(mbox_cmd->payload, cxl_payload_regs(cxlm),
> > + mbox_cmd->size_out);
> > +
> > + return 0;
> > +}
> > +
> > +/**
> > + * cxl_mem_mbox_get() - Acquire exclusive access to the mailbox.
> > + * @cxlm: The memory device to gain access to.
> > + *
> > + * Context: Any context. Takes the mbox_lock.
> > + * Return: 0 if exclusive access was acquired.
> > + */
> > +static int cxl_mem_mbox_get(struct cxl_mem *cxlm)
> > +{
> > + u64 md_status;
> > + int rc = -EBUSY;
> > +
> > + mutex_lock_io(&cxlm->mbox.mutex);
> > +
> > + /*
> > + * XXX: There is some amount of ambiguity in the 2.0 version of the spec
> > + * around the mailbox interface ready (8.2.8.5.1.1). The purpose of the
> > + * bit is to allow firmware running on the device to notify us that it's
> > + * ready to receive commands. It is unclear if the bit needs to be read
> > + * every time one tries to use the mailbox, ie. the firmware can switch
> > + * it on and off as needed. Second, there is no defined timeout for
> > + * mailbox ready, like there is for the doorbell interface.
> > + *
> > + * As such, we make the following assumptions:
> > + * 1. The firmware might toggle the Mailbox Interface Ready bit, and so
> > + * we check it for every command.
> > + * 2. If the doorbell is clear, the firmware should have first set the
> > + * Mailbox Interface Ready bit. Therefore, waiting for the doorbell
> > + * to be ready is a sufficient amount of time.
> > + */
> > + rc = cxl_mem_wait_for_doorbell(cxlm);
> > + if (rc) {
> > + dev_warn(&cxlm->pdev->dev, "Mailbox interface not ready\n");
> > + goto out;
> > + }
> > +
> > + md_status = cxl_read_mem_reg64(cxlm, CXLMDEV_STATUS_OFFSET);
> > + if (md_status & CXLMDEV_MBOX_IF_READY && CXLMDEV_READY(md_status)) {
> > + /*
> > + * Hardware shouldn't allow a ready status but also have failure
> > + * bits set. Spit out an error, this should be a bug report
> > + */
> > + if (md_status & CXLMDEV_DEV_FATAL) {
> > + dev_err(&cxlm->pdev->dev,
> > + "CXL device reporting ready and fatal\n");
> > + rc = -EFAULT;
> > + goto out;
> > + }
> > + if (md_status & CXLMDEV_FW_HALT) {
> > + dev_err(&cxlm->pdev->dev,
> > + "CXL device reporting ready and halted\n");
> > + rc = -EFAULT;
> > + goto out;
> > + }
> > + if (CXLMDEV_RESET_NEEDED(md_status)) {
> > + dev_err(&cxlm->pdev->dev,
> > + "CXL device reporting ready and reset needed\n");
> > + rc = -EFAULT;
> > + goto out;
> > + }
> > +
> > + return 0;
> > + }
> > +
> > +out:
> > + mutex_unlock(&cxlm->mbox.mutex);
> > + return rc;
> > +}
> > +
> > +/**
> > + * cxl_mem_mbox_put() - Release exclusive access to the mailbox.
> > + * @cxlm: The CXL memory device to communicate with.
> > + *
> > + * Context: Any context. Expects mbox_lock to be held.
> > + */
> > +static void cxl_mem_mbox_put(struct cxl_mem *cxlm)
> > +{
> > + mutex_unlock(&cxlm->mbox.mutex);
> > +}
> > +
> > /**
> > * cxl_mem_setup_regs() - Setup necessary MMIO.
> > * @cxlm: The CXL memory device to communicate with.
> > @@ -135,6 +378,8 @@ static struct cxl_mem *cxl_mem_create(struct pci_dev *pdev, u32 reg_lo,
> > return NULL;
> > }
> >
> > + mutex_init(&cxlm->mbox.mutex);
> > +
> > regs = pcim_iomap_table(pdev)[bar];
> > cxlm->pdev = pdev;
> > cxlm->regs = regs + offset;
> > @@ -167,6 +412,69 @@ static int cxl_mem_dvsec(struct pci_dev *pdev, int dvsec)
> > return 0;
> > }
> >
> > +/**
> > + * cxl_mem_identify() - Send the IDENTIFY command to the device.
> > + * @cxlm: The device to identify.
> > + *
> > + * Return: 0 if identify was executed successfully.
> > + *
> > + * This will dispatch the identify command to the device and on success populate
> > + * structures to be exported to sysfs.
> > + */
> > +static int cxl_mem_identify(struct cxl_mem *cxlm)
> > +{
> > + struct cxl_mbox_identify {
> > + char fw_revision[0x10];
> > + __le64 total_capacity;
> > + __le64 volatile_capacity;
> > + __le64 persistent_capacity;
> > + __le64 partition_align;
> > + __le16 info_event_log_size;
> > + __le16 warning_event_log_size;
> > + __le16 failure_event_log_size;
> > + __le16 fatal_event_log_size;
> > + __le32 lsa_size;
> > + u8 poison_list_max_mer[3];
> > + __le16 inject_poison_limit;
> > + u8 poison_caps;
> > + u8 qos_telemetry_caps;
> > + } __packed id;
> > + struct mbox_cmd mbox_cmd;
> > + int rc;
> > +
> > + /* Retrieve initial device memory map */
> > + rc = cxl_mem_mbox_get(cxlm);
> > + if (rc)
> > + return rc;
> > +
> > + mbox_cmd = (struct mbox_cmd){
> > + .opcode = CXL_MBOX_OP_IDENTIFY,
> > + .payload = &id,
> > + .size_in = 0,
> > + };
> > + rc = cxl_mem_mbox_send_cmd(cxlm, &mbox_cmd);
> > + cxl_mem_mbox_put(cxlm);
> > + if (rc)
> > + return rc;
> > +
> > + if (mbox_cmd.size_out != sizeof(id))
> > + return -ENXIO;
> > +
> > + /*
> > + * TODO: enumerate DPA map, as 'ram' and 'pmem' do not alias.
> > + * For now, only the capacity is exported in sysfs
> > + */
> > + cxlm->ram.range.start = 0;
> > + cxlm->ram.range.end = le64_to_cpu(id.volatile_capacity) - 1;
> > +
> > + cxlm->pmem.range.start = 0;
> > + cxlm->pmem.range.end = le64_to_cpu(id.persistent_capacity) - 1;
> > +
> > + memcpy(cxlm->firmware_version, id.fw_revision, sizeof(id.fw_revision));
> > +
> > + return rc;
> > +}
> > +
> > static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> > {
> > struct device *dev = &pdev->dev;
> > @@ -222,6 +530,10 @@ static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> > if (rc)
> > goto err;
> >
> > + rc = cxl_mem_identify(cxlm);
> > + if (rc)
> > + goto err;
> > +
> > pci_set_drvdata(pdev, cxlm);
> > return 0;
> >
>
Powered by blists - more mailing lists