lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CANXhq0qff3ShvkvVrTuYLWtj1g6_ABg=pCGJBgQY=1ce8i5dGw@mail.gmail.com>
Date: Fri, 17 May 2024 15:18:56 +0800
From: Zong Li <zong.li@...ive.com>
To: Tomasz Jeznach <tjeznach@...osinc.com>
Cc: Joerg Roedel <joro@...tes.org>, Will Deacon <will@...nel.org>, 
	Robin Murphy <robin.murphy@....com>, Paul Walmsley <paul.walmsley@...ive.com>, 
	Anup Patel <apatel@...tanamicro.com>, devicetree@...r.kernel.org, 
	Conor Dooley <conor+dt@...nel.org>, Albert Ou <aou@...s.berkeley.edu>, linux@...osinc.com, 
	linux-kernel@...r.kernel.org, Rob Herring <robh+dt@...nel.org>, 
	Sebastien Boeuf <seb@...osinc.com>, iommu@...ts.linux.dev, 
	Palmer Dabbelt <palmer@...belt.com>, Nick Kossifidis <mick@....forth.gr>, 
	Krzysztof Kozlowski <krzk+dt@...nel.org>, linux-riscv@...ts.infradead.org, 
	Lu Baolu <baolu.lu@...ux.intel.com>
Subject: Re: [PATCH v5 6/7] iommu/riscv: Command and fault queue support

On Wed, May 15, 2024 at 2:17 AM Tomasz Jeznach <tjeznach@...osinc.com> wrote:
>
> Introduce device command submission and fault reporting queues,
> as described in Chapter 3.1 and 3.2 of the RISC-V IOMMU Architecture
> Specification.
>
> Command and fault queues are instantiated in contiguous system memory
> local to IOMMU device domain, or mapped from fixed I/O space provided
> by the hardware implementation. Detection of the location and maximum
> allowed size of the queue utilize WARL properties of queue base control
> register. Driver implementation will try to allocate up to 128KB of
> system memory, while respecting hardware supported maximum queue size.
>
> Interrupts allocation is based on interrupt vectors availability and
> distributed to all queues in simple round-robin fashion. For hardware
> Implementation with fixed event type to interrupt vector assignment
> IVEC WARL property is used to discover such mappings.
>
> Address translation, command and queue fault handling in this change
> is limited to simple fault reporting without taking any action.
>
> Reviewed-by: Lu Baolu <baolu.lu@...ux.intel.com>
> Signed-off-by: Tomasz Jeznach <tjeznach@...osinc.com>
> ---
>  drivers/iommu/riscv/iommu-bits.h |  75 +++++
>  drivers/iommu/riscv/iommu.c      | 507 ++++++++++++++++++++++++++++++-
>  drivers/iommu/riscv/iommu.h      |  21 ++
>  3 files changed, 601 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/iommu/riscv/iommu-bits.h b/drivers/iommu/riscv/iommu-bits.h
> index 48f795a0cd10..78b98b4546ba 100644
> --- a/drivers/iommu/riscv/iommu-bits.h
> +++ b/drivers/iommu/riscv/iommu-bits.h
> @@ -704,4 +704,79 @@ struct riscv_iommu_msi_pte {
>  #define RISCV_IOMMU_MSI_MRIF_NPPN      RISCV_IOMMU_PPN_FIELD
>  #define RISCV_IOMMU_MSI_MRIF_NID_MSB   BIT_ULL(60)
>
> +/* Helper functions: command structure builders. */
> +
> +static inline void riscv_iommu_cmd_inval_vma(struct riscv_iommu_command *cmd)
> +{
> +       cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD_OPCODE, RISCV_IOMMU_CMD_IOTINVAL_OPCODE) |
> +                     FIELD_PREP(RISCV_IOMMU_CMD_FUNC, RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA);
> +       cmd->dword1 = 0;
> +}
> +
> +static inline void riscv_iommu_cmd_inval_set_addr(struct riscv_iommu_command *cmd,
> +                                                 u64 addr)
> +{
> +       cmd->dword1 = FIELD_PREP(RISCV_IOMMU_CMD_IOTINVAL_ADDR, phys_to_pfn(addr));
> +       cmd->dword0 |= RISCV_IOMMU_CMD_IOTINVAL_AV;
> +}
> +
> +static inline void riscv_iommu_cmd_inval_set_pscid(struct riscv_iommu_command *cmd,
> +                                                  int pscid)
> +{
> +       cmd->dword0 |= FIELD_PREP(RISCV_IOMMU_CMD_IOTINVAL_PSCID, pscid) |
> +                      RISCV_IOMMU_CMD_IOTINVAL_PSCV;
> +}
> +
> +static inline void riscv_iommu_cmd_inval_set_gscid(struct riscv_iommu_command *cmd,
> +                                                  int gscid)
> +{
> +       cmd->dword0 |= FIELD_PREP(RISCV_IOMMU_CMD_IOTINVAL_GSCID, gscid) |
> +                      RISCV_IOMMU_CMD_IOTINVAL_GV;
> +}
> +
> +static inline void riscv_iommu_cmd_iofence(struct riscv_iommu_command *cmd)
> +{
> +       cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD_OPCODE, RISCV_IOMMU_CMD_IOFENCE_OPCODE) |
> +                     FIELD_PREP(RISCV_IOMMU_CMD_FUNC, RISCV_IOMMU_CMD_IOFENCE_FUNC_C) |
> +                     RISCV_IOMMU_CMD_IOFENCE_PR | RISCV_IOMMU_CMD_IOFENCE_PW;
> +       cmd->dword1 = 0;
> +}
> +
> +static inline void riscv_iommu_cmd_iofence_set_av(struct riscv_iommu_command *cmd,
> +                                                 u64 addr, u32 data)
> +{
> +       cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD_OPCODE, RISCV_IOMMU_CMD_IOFENCE_OPCODE) |
> +                     FIELD_PREP(RISCV_IOMMU_CMD_FUNC, RISCV_IOMMU_CMD_IOFENCE_FUNC_C) |
> +                     FIELD_PREP(RISCV_IOMMU_CMD_IOFENCE_DATA, data) |
> +                     RISCV_IOMMU_CMD_IOFENCE_AV;
> +       cmd->dword1 = addr >> 2;
> +}
> +
> +static inline void riscv_iommu_cmd_iodir_inval_ddt(struct riscv_iommu_command *cmd)
> +{
> +       cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD_OPCODE, RISCV_IOMMU_CMD_IODIR_OPCODE) |
> +                     FIELD_PREP(RISCV_IOMMU_CMD_FUNC, RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT);
> +       cmd->dword1 = 0;
> +}
> +
> +static inline void riscv_iommu_cmd_iodir_inval_pdt(struct riscv_iommu_command *cmd)
> +{
> +       cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD_OPCODE, RISCV_IOMMU_CMD_IODIR_OPCODE) |
> +                     FIELD_PREP(RISCV_IOMMU_CMD_FUNC, RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT);
> +       cmd->dword1 = 0;
> +}
> +
> +static inline void riscv_iommu_cmd_iodir_set_did(struct riscv_iommu_command *cmd,
> +                                                unsigned int devid)
> +{
> +       cmd->dword0 |= FIELD_PREP(RISCV_IOMMU_CMD_IODIR_DID, devid) |
> +                      RISCV_IOMMU_CMD_IODIR_DV;
> +}
> +
> +static inline void riscv_iommu_cmd_iodir_set_pid(struct riscv_iommu_command *cmd,
> +                                                unsigned int pasid)
> +{
> +       cmd->dword0 |= FIELD_PREP(RISCV_IOMMU_CMD_IODIR_PID, pasid);
> +}
> +
>  #endif /* _RISCV_IOMMU_BITS_H_ */
> diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c
> index 9ca130505c96..8666ea0517ee 100644
> --- a/drivers/iommu/riscv/iommu.c
> +++ b/drivers/iommu/riscv/iommu.c
> @@ -25,7 +25,14 @@
>  #include "iommu.h"
>
>  /* Timeouts in [us] */
> -#define RISCV_IOMMU_DDTP_TIMEOUT       50000
> +#define RISCV_IOMMU_QCSR_TIMEOUT       150000
> +#define RISCV_IOMMU_QUEUE_TIMEOUT      150000
> +#define RISCV_IOMMU_DDTP_TIMEOUT       10000000
> +#define RISCV_IOMMU_IOTINVAL_TIMEOUT   90000000
> +
> +/* Number of entries per CMD/FLT queue, should be <= INT_MAX */
> +#define RISCV_IOMMU_DEF_CQ_COUNT       8192
> +#define RISCV_IOMMU_DEF_FQ_COUNT       4096
>
>  /* RISC-V IOMMU PPN <> PHYS address conversions, PHYS <=> PPN[53:10] */
>  #define phys_to_ppn(pa)  (((pa) >> 2) & (((1ULL << 44) - 1) << 10))
> @@ -89,6 +96,446 @@ static void riscv_iommu_free_pages(struct riscv_iommu_device *iommu, void *addr)
>                        riscv_iommu_devres_pages_match, &devres);
>  }
>
> +/*
> + * Hardware queue allocation and management.
> + */
> +
> +/* Setup queue base, control registers and default queue length */
> +#define RISCV_IOMMU_QUEUE_INIT(q, name) do {                           \
> +       struct riscv_iommu_queue *_q = q;                               \
> +       _q->qid = RISCV_IOMMU_INTR_ ## name;                            \
> +       _q->qbr = RISCV_IOMMU_REG_ ## name ## B;                        \
> +       _q->qcr = RISCV_IOMMU_REG_ ## name ## CSR;                      \
> +       _q->mask = _q->mask ?: (RISCV_IOMMU_DEF_ ## name ## _COUNT) - 1;\
> +} while (0)
> +
> +/* Note: offsets are the same for all queues */
> +#define Q_HEAD(q) ((q)->qbr + (RISCV_IOMMU_REG_CQH - RISCV_IOMMU_REG_CQB))
> +#define Q_TAIL(q) ((q)->qbr + (RISCV_IOMMU_REG_CQT - RISCV_IOMMU_REG_CQB))
> +#define Q_ITEM(q, index) ((q)->mask & (index))
> +#define Q_IPSR(q) BIT((q)->qid)
> +
> +/*
> + * Discover queue ring buffer hardware configuration, allocate in-memory
> + * ring buffer or use fixed I/O memory location, configure queue base register.
> + * Must be called before hardware queue is enabled.
> + *
> + * @queue - data structure, configured with RISCV_IOMMU_QUEUE_INIT()
> + * @entry_size - queue single element size in bytes.
> + */
> +static int riscv_iommu_queue_alloc(struct riscv_iommu_device *iommu,
> +                                  struct riscv_iommu_queue *queue,
> +                                  size_t entry_size)
> +{
> +       unsigned int logsz;
> +       u64 qb, rb;
> +
> +       /*
> +        * Use WARL base register property to discover maximum allowed
> +        * number of entries and optional fixed IO address for queue location.
> +        */
> +       riscv_iommu_writeq(iommu, queue->qbr, RISCV_IOMMU_QUEUE_LOGSZ_FIELD);
> +       qb = riscv_iommu_readq(iommu, queue->qbr);
> +
> +       /*
> +        * Calculate and verify hardware supported queue length, as reported
> +        * by the field LOGSZ, where max queue length is equal to 2^(LOGSZ + 1).
> +        * Update queue size based on hardware supported value.
> +        */
> +       logsz = ilog2(queue->mask);
> +       if (logsz > FIELD_GET(RISCV_IOMMU_QUEUE_LOGSZ_FIELD, qb))
> +               logsz = FIELD_GET(RISCV_IOMMU_QUEUE_LOGSZ_FIELD, qb);
> +
> +       /*
> +        * Use WARL base register property to discover an optional fixed IO
> +        * address for queue ring buffer location. Otherwise allocate contigus
> +        * system memory.
> +        */
> +       if (FIELD_GET(RISCV_IOMMU_PPN_FIELD, qb)) {
> +               const size_t queue_size = entry_size << (logsz + 1);
> +
> +               queue->phys = ppn_to_phys(FIELD_GET(RISCV_IOMMU_PPN_FIELD, qb));
> +               queue->base = devm_ioremap(iommu->dev, queue->phys, queue_size);
> +       } else {
> +               do {
> +                       const size_t queue_size = entry_size << (logsz + 1);
> +                       const int order = get_order(queue_size);
> +
> +                       queue->base = riscv_iommu_get_pages(iommu, order);
> +                       queue->phys = __pa(queue->base);
> +               } while (!queue->base && logsz-- > 0);
> +       }
> +
> +       if (!queue->base)
> +               return -ENOMEM;
> +
> +       qb = phys_to_ppn(queue->phys) |
> +            FIELD_PREP(RISCV_IOMMU_QUEUE_LOGSZ_FIELD, logsz);
> +
> +       /* Update base register and read back to verify hw accepted our write */
> +       riscv_iommu_writeq(iommu, queue->qbr, qb);
> +       rb = riscv_iommu_readq(iommu, queue->qbr);
> +       if (rb != qb) {
> +               dev_err(iommu->dev, "queue #%u allocation failed\n", queue->qid);
> +               return -ENODEV;
> +       }
> +
> +       /* Update actual queue mask */
> +       queue->mask = (2U << logsz) - 1;
> +
> +       dev_dbg(iommu->dev, "queue #%u allocated 2^%u entries",
> +               queue->qid, logsz + 1);
> +
> +       return 0;
> +}
> +
> +/* Check interrupt queue status, IPSR */
> +static irqreturn_t riscv_iommu_queue_ipsr(int irq, void *data)
> +{
> +       struct riscv_iommu_queue *queue = (struct riscv_iommu_queue *)data;
> +
> +       if (riscv_iommu_readl(queue->iommu, RISCV_IOMMU_REG_IPSR) & Q_IPSR(queue))
> +               return IRQ_WAKE_THREAD;
> +
> +       return IRQ_NONE;
> +}
> +
> +static int riscv_iommu_queue_vec(struct riscv_iommu_device *iommu, int n)
> +{
> +       /* Reuse ICVEC.CIV mask for all interrupt vectors mapping. */
> +       return (iommu->icvec >> (n * 4)) & RISCV_IOMMU_ICVEC_CIV;
> +}
> +
> +/*
> + * Enable queue processing in the hardware, register interrupt handler.
> + *
> + * @queue - data structure, already allocated with riscv_iommu_queue_alloc()
> + * @irq_handler - threaded interrupt handler.
> + */
> +static int riscv_iommu_queue_enable(struct riscv_iommu_device *iommu,
> +                                   struct riscv_iommu_queue *queue,
> +                                   irq_handler_t irq_handler)
> +{
> +       const unsigned int irq = iommu->irqs[riscv_iommu_queue_vec(iommu, queue->qid)];
> +       u32 csr;
> +       int rc;
> +
> +       if (queue->iommu)
> +               return -EBUSY;
> +
> +       /* Polling not implemented */
> +       if (!irq)
> +               return -ENODEV;
> +
> +       queue->iommu = iommu;
> +       rc = request_threaded_irq(irq, riscv_iommu_queue_ipsr, irq_handler,
> +                                 IRQF_ONESHOT | IRQF_SHARED,
> +                                 dev_name(iommu->dev), queue);
> +       if (rc) {
> +               queue->iommu = NULL;
> +               return rc;
> +       }
> +
> +       /*
> +        * Enable queue with interrupts, clear any memory fault if any.
> +        * Wait for the hardware to acknowledge request and activate queue
> +        * processing.
> +        * Note: All CSR bitfields are in the same offsets for all queues.
> +        */
> +       riscv_iommu_writel(iommu, queue->qcr,
> +                          RISCV_IOMMU_QUEUE_ENABLE |
> +                          RISCV_IOMMU_QUEUE_INTR_ENABLE |
> +                          RISCV_IOMMU_QUEUE_MEM_FAULT);
> +
> +       riscv_iommu_readl_timeout(iommu, queue->qcr,
> +                                 csr, !(csr & RISCV_IOMMU_QUEUE_BUSY),
> +                                 10, RISCV_IOMMU_QCSR_TIMEOUT);
> +
> +       if (RISCV_IOMMU_QUEUE_ACTIVE != (csr & (RISCV_IOMMU_QUEUE_ACTIVE |
> +                                               RISCV_IOMMU_QUEUE_BUSY |
> +                                               RISCV_IOMMU_QUEUE_MEM_FAULT))) {
> +               /* Best effort to stop and disable failing hardware queue */
> +               riscv_iommu_writel(iommu, queue->qcr, 0);
> +               free_irq(irq, queue);
> +               queue->iommu = NULL;
> +               dev_err(iommu->dev, "queue #%u failed to start\n", queue->qid);
> +               return -EBUSY;
> +       }
> +
> +       /* Clear any pending interrupt flag. */
> +       riscv_iommu_writel(iommu, RISCV_IOMMU_REG_IPSR, Q_IPSR(queue));
> +
> +       return 0;
> +}
> +
> +/*
> + * Disable queue. Wait for the hardware to acknowledge request and
> + * stop processing enqueued requests. Report errors but continue.
> + */
> +static void riscv_iommu_queue_disable(struct riscv_iommu_queue *queue)
> +{
> +       struct riscv_iommu_device *iommu = queue->iommu;
> +       u32 csr;
> +
> +       if (!iommu)
> +               return;
> +
> +       free_irq(iommu->irqs[riscv_iommu_queue_vec(iommu, queue->qid)], queue);
> +       riscv_iommu_writel(iommu, queue->qcr, 0);
> +       riscv_iommu_readl_timeout(iommu, queue->qcr,
> +                                 csr, !(csr & RISCV_IOMMU_QUEUE_BUSY),
> +                                 10, RISCV_IOMMU_QCSR_TIMEOUT);
> +
> +       if (csr & (RISCV_IOMMU_QUEUE_ACTIVE | RISCV_IOMMU_QUEUE_BUSY))
> +               dev_err(iommu->dev, "fail to disable hardware queue #%u, csr 0x%x\n",
> +                       queue->qid, csr);
> +
> +       queue->iommu = NULL;
> +}
> +
> +/*
> + * Returns number of available valid queue entries and the first item index.
> + * Update shadow producer index if necessary.
> + */
> +static int riscv_iommu_queue_consume(struct riscv_iommu_queue *queue,
> +                                    unsigned int *index)
> +{
> +       unsigned int head = atomic_read(&queue->head);
> +       unsigned int tail = atomic_read(&queue->tail);
> +       unsigned int last = Q_ITEM(queue, tail);
> +       int available = (int)(tail - head);
> +
> +       *index = head;
> +
> +       if (available > 0)
> +               return available;
> +
> +       /* read hardware producer index, check reserved register bits are not set. */
> +       if (riscv_iommu_readl_timeout(queue->iommu, Q_TAIL(queue),
> +                                     tail, (tail & ~queue->mask) == 0,
> +                                     0, RISCV_IOMMU_QUEUE_TIMEOUT)) {
> +               dev_err_once(queue->iommu->dev,
> +                            "Hardware error: queue access timeout\n");
> +               return 0;
> +       }
> +
> +       if (tail == last)
> +               return 0;
> +
> +       /* update shadow producer index */
> +       return (int)(atomic_add_return((tail - last) & queue->mask, &queue->tail) - head);
> +}
> +
> +/*
> + * Release processed queue entries, should match riscv_iommu_queue_consume() calls.
> + */
> +static void riscv_iommu_queue_release(struct riscv_iommu_queue *queue, int count)
> +{
> +       const unsigned int head = atomic_add_return(count, &queue->head);
> +
> +       riscv_iommu_writel(queue->iommu, Q_HEAD(queue), Q_ITEM(queue, head));
> +}
> +
> +/* Return actual consumer index based on hardware reported queue head index. */
> +static unsigned int riscv_iommu_queue_cons(struct riscv_iommu_queue *queue)
> +{
> +       const unsigned int cons = atomic_read(&queue->head);
> +       const unsigned int last = Q_ITEM(queue, cons);
> +       unsigned int head;
> +
> +       if (riscv_iommu_readl_timeout(queue->iommu, Q_HEAD(queue), head,
> +                                     !(head & ~queue->mask),
> +                                     0, RISCV_IOMMU_QUEUE_TIMEOUT))
> +               return cons;
> +
> +       return cons + ((head - last) & queue->mask);
> +}
> +
> +/* Wait for submitted item to be processed. */
> +static int riscv_iommu_queue_wait(struct riscv_iommu_queue *queue,
> +                                 unsigned int index,
> +                                 unsigned int timeout_us)
> +{
> +       unsigned int cons = atomic_read(&queue->head);
> +
> +       /* Already processed by the consumer */
> +       if ((int)(cons - index) > 0)
> +               return 0;
> +
> +       /* Monitor consumer index */
> +       return readx_poll_timeout(riscv_iommu_queue_cons, queue, cons,
> +                                (int)(cons - index) > 0, 0, timeout_us);
> +}
> +
> +/* Enqueue an entry and wait to be processed if timeout_us > 0
> + *
> + * Error handling for IOMMU hardware not responding in reasonable time
> + * will be added as separate patch series along with other RAS features.
> + * For now, only report hardware failure and continue.
> + */
> +static unsigned int riscv_iommu_queue_send(struct riscv_iommu_queue *queue,
> +                                          void *entry, size_t entry_size)
> +{
> +       unsigned int prod;
> +       unsigned int head;
> +       unsigned int tail;
> +       unsigned long flags;
> +
> +       /* Do not preempt submission flow. */
> +       local_irq_save(flags);
> +
> +       /* 1. Allocate some space in the queue */
> +       prod = atomic_inc_return(&queue->prod) - 1;
> +       head = atomic_read(&queue->head);
> +
> +       /* 2. Wait for space availability. */
> +       if ((prod - head) > queue->mask) {
> +               if (readx_poll_timeout(atomic_read, &queue->head,
> +                                      head, (prod - head) < queue->mask,
> +                                      0, RISCV_IOMMU_QUEUE_TIMEOUT))
> +                       goto err_busy;
> +       } else if ((prod - head) == queue->mask) {
> +               const unsigned int last = Q_ITEM(queue, head);
> +
> +               if (riscv_iommu_readl_timeout(queue->iommu, Q_HEAD(queue), head,
> +                                             !(head & ~queue->mask) && head != last,
> +                                             0, RISCV_IOMMU_QUEUE_TIMEOUT))
> +                       goto err_busy;
> +               atomic_add((head - last) & queue->mask, &queue->head);
> +       }
> +
> +       /* 3. Store entry in the ring buffer. */
> +       memcpy(queue->base + Q_ITEM(queue, prod) * entry_size, entry, entry_size);
> +
> +       /* 4. Wait for all previous entries to be ready */
> +       if (readx_poll_timeout(atomic_read, &queue->tail, tail, prod == tail,
> +                              0, RISCV_IOMMU_QUEUE_TIMEOUT))
> +               goto err_busy;
> +
> +       /* 5. Complete submission and restore local interrupts */
> +       dma_wmb();
> +       riscv_iommu_writel(queue->iommu, Q_TAIL(queue), Q_ITEM(queue, prod + 1));
> +       atomic_inc(&queue->tail);
> +       local_irq_restore(flags);
> +
> +       return prod;
> +
> +err_busy:
> +       local_irq_restore(flags);
> +       dev_err_once(queue->iommu->dev, "Hardware error: command enqueue failed\n");
> +
> +       return prod;
> +}
> +
> +/*
> + * IOMMU Command queue chapter 3.1
> + */
> +
> +/* Command queue interrupt handler thread function */
> +static irqreturn_t riscv_iommu_cmdq_process(int irq, void *data)
> +{
> +       const struct riscv_iommu_queue *queue = (struct riscv_iommu_queue *)data;
> +       unsigned int ctrl;
> +
> +       /* Clear MF/CQ errors, complete error recovery to be implemented. */
> +       ctrl = riscv_iommu_readl(queue->iommu, queue->qcr);
> +       if (ctrl & (RISCV_IOMMU_CQCSR_CQMF | RISCV_IOMMU_CQCSR_CMD_TO |
> +                   RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_FENCE_W_IP)) {
> +               riscv_iommu_writel(queue->iommu, queue->qcr, ctrl);
> +               dev_warn(queue->iommu->dev,
> +                        "Queue #%u error; fault:%d timeout:%d illegal:%d fence_w_ip:%d\n",
> +                        queue->qid,
> +                        !!(ctrl & RISCV_IOMMU_CQCSR_CQMF),
> +                        !!(ctrl & RISCV_IOMMU_CQCSR_CMD_TO),
> +                        !!(ctrl & RISCV_IOMMU_CQCSR_CMD_ILL),
> +                        !!(ctrl & RISCV_IOMMU_CQCSR_FENCE_W_IP));
> +       }
> +
> +       /* Placeholder for command queue interrupt notifiers */
> +
> +       /* Clear command interrupt pending. */
> +       riscv_iommu_writel(queue->iommu, RISCV_IOMMU_REG_IPSR, Q_IPSR(queue));
> +
> +       return IRQ_HANDLED;
> +}
> +
> +/* Send command to the IOMMU command queue */
> +static void riscv_iommu_cmd_send(struct riscv_iommu_device *iommu,
> +                                struct riscv_iommu_command *cmd)
> +{
> +       riscv_iommu_queue_send(&iommu->cmdq, cmd, sizeof(*cmd));
> +}
> +
> +/* Send IOFENCE.C command and wait for all scheduled commands to complete. */
> +static void riscv_iommu_cmd_sync(struct riscv_iommu_device *iommu,
> +                                unsigned int timeout_us)
> +{
> +       struct riscv_iommu_command cmd;
> +       unsigned int prod;
> +
> +       riscv_iommu_cmd_iofence(&cmd);
> +       prod = riscv_iommu_queue_send(&iommu->cmdq, &cmd, sizeof(cmd));
> +
> +       if (!timeout_us)
> +               return;
> +
> +       if (riscv_iommu_queue_wait(&iommu->cmdq, prod, timeout_us))
> +               dev_err_once(iommu->dev,
> +                            "Hardware error: command execution timeout\n");
> +}
> +
> +/*
> + * IOMMU Fault/Event queue chapter 3.2
> + */
> +
> +static void riscv_iommu_fault(struct riscv_iommu_device *iommu,
> +                             struct riscv_iommu_fq_record *event)
> +{
> +       unsigned int err = FIELD_GET(RISCV_IOMMU_FQ_HDR_CAUSE, event->hdr);
> +       unsigned int devid = FIELD_GET(RISCV_IOMMU_FQ_HDR_DID, event->hdr);
> +
> +       /* Placeholder for future fault handling implementation, report only. */
> +       if (err)
> +               dev_warn_ratelimited(iommu->dev,
> +                                    "Fault %d devid: 0x%x iotval: %llx iotval2: %llx\n",
> +                                    err, devid, event->iotval, event->iotval2);
> +}
> +
> +/* Fault queue interrupt handler thread function */
> +static irqreturn_t riscv_iommu_fltq_process(int irq, void *data)
> +{
> +       struct riscv_iommu_queue *queue = (struct riscv_iommu_queue *)data;
> +       struct riscv_iommu_device *iommu = queue->iommu;
> +       struct riscv_iommu_fq_record *events;
> +       unsigned int ctrl, idx;
> +       int cnt, len;
> +
> +       events = (struct riscv_iommu_fq_record *)queue->base;
> +
> +       /* Clear fault interrupt pending and process all received fault events. */
> +       riscv_iommu_writel(iommu, RISCV_IOMMU_REG_IPSR, Q_IPSR(queue));
> +
> +       do {
> +               cnt = riscv_iommu_queue_consume(queue, &idx);
> +               for (len = 0; len < cnt; idx++, len++)
> +                       riscv_iommu_fault(iommu, &events[Q_ITEM(queue, idx)]);
> +               riscv_iommu_queue_release(queue, cnt);
> +       } while (cnt > 0);
> +
> +       /* Clear MF/OF errors, complete error recovery to be implemented. */
> +       ctrl = riscv_iommu_readl(iommu, queue->qcr);
> +       if (ctrl & (RISCV_IOMMU_FQCSR_FQMF | RISCV_IOMMU_FQCSR_FQOF)) {
> +               riscv_iommu_writel(iommu, queue->qcr, ctrl);
> +               dev_warn(iommu->dev,
> +                        "Queue #%u error; memory fault:%d overflow:%d\n",
> +                        queue->qid,
> +                        !!(ctrl & RISCV_IOMMU_FQCSR_FQMF),
> +                        !!(ctrl & RISCV_IOMMU_FQCSR_FQOF));
> +       }
> +
> +       return IRQ_HANDLED;
> +}
> +
>  /* Lookup and initialize device context info structure. */
>  static struct riscv_iommu_dc *riscv_iommu_get_dc(struct riscv_iommu_device *iommu,
>                                                  unsigned int devid)
> @@ -250,6 +697,7 @@ static int riscv_iommu_iodir_set_mode(struct riscv_iommu_device *iommu,
>         struct device *dev = iommu->dev;
>         u64 ddtp, rq_ddtp;
>         unsigned int mode, rq_mode = ddtp_mode;
> +       struct riscv_iommu_command cmd;
>
>         ddtp = riscv_iommu_read_ddtp(iommu);
>         if (ddtp & RISCV_IOMMU_DDTP_BUSY)
> @@ -317,6 +765,17 @@ static int riscv_iommu_iodir_set_mode(struct riscv_iommu_device *iommu,
>         if (mode != ddtp_mode)
>                 dev_dbg(dev, "DDTP hw mode %u, requested %u\n", mode, ddtp_mode);
>
> +       /* Invalidate device context cache */
> +       riscv_iommu_cmd_iodir_inval_ddt(&cmd);
> +       riscv_iommu_cmd_send(iommu, &cmd);
> +
> +       /* Invalidate address translation cache */
> +       riscv_iommu_cmd_inval_vma(&cmd);
> +       riscv_iommu_cmd_send(iommu, &cmd);
> +
> +       /* IOFENCE.C */
> +       riscv_iommu_cmd_sync(iommu, RISCV_IOMMU_IOTINVAL_TIMEOUT);
> +
>         return 0;
>  }
>
> @@ -493,6 +952,24 @@ static int riscv_iommu_init_check(struct riscv_iommu_device *iommu)
>                         return -EINVAL;
>         }
>
> +       /*
> +        * Distribute interrupt vectors, always use first vector for CIV.
> +        * At least one interrupt is required. Read back and verify.
> +        */
> +       if (!iommu->irqs_count)
> +               return -EINVAL;
> +
> +       iommu->icvec = FIELD_PREP(RISCV_IOMMU_ICVEC_FIV, 1 % iommu->irqs_count) |
> +                      FIELD_PREP(RISCV_IOMMU_ICVEC_PIV, 2 % iommu->irqs_count) |
> +                      FIELD_PREP(RISCV_IOMMU_ICVEC_PMIV, 3 % iommu->irqs_count);
> +       riscv_iommu_writeq(iommu, RISCV_IOMMU_REG_ICVEC, iommu->icvec);
> +       iommu->icvec = riscv_iommu_readq(iommu, RISCV_IOMMU_REG_ICVEC);
> +       if (max(max(FIELD_GET(RISCV_IOMMU_ICVEC_CIV, iommu->icvec),
> +                   FIELD_GET(RISCV_IOMMU_ICVEC_FIV, iommu->icvec)),
> +               max(FIELD_GET(RISCV_IOMMU_ICVEC_PIV, iommu->icvec),
> +                   FIELD_GET(RISCV_IOMMU_ICVEC_PMIV, iommu->icvec))) >= iommu->irqs_count)
> +               return -EINVAL;
> +
>         return 0;
>  }
>
> @@ -501,12 +978,17 @@ void riscv_iommu_remove(struct riscv_iommu_device *iommu)
>         iommu_device_unregister(&iommu->iommu);
>         iommu_device_sysfs_remove(&iommu->iommu);
>         riscv_iommu_iodir_set_mode(iommu, RISCV_IOMMU_DDTP_MODE_OFF);
> +       riscv_iommu_queue_disable(&iommu->cmdq);
> +       riscv_iommu_queue_disable(&iommu->fltq);
>  }
>
>  int riscv_iommu_init(struct riscv_iommu_device *iommu)
>  {
>         int rc;
>
> +       RISCV_IOMMU_QUEUE_INIT(&iommu->cmdq, CQ);
> +       RISCV_IOMMU_QUEUE_INIT(&iommu->fltq, FQ);
> +
>         rc = riscv_iommu_init_check(iommu);
>         if (rc)
>                 return dev_err_probe(iommu->dev, rc, "unexpected device state\n");
> @@ -515,10 +997,28 @@ int riscv_iommu_init(struct riscv_iommu_device *iommu)
>         if (rc)
>                 return rc;
>
> -       rc = riscv_iommu_iodir_set_mode(iommu, RISCV_IOMMU_DDTP_MODE_MAX);
> +       rc = riscv_iommu_queue_alloc(iommu, &iommu->cmdq,
> +                                    sizeof(struct riscv_iommu_command));
> +       if (rc)
> +               return rc;
> +
> +       rc = riscv_iommu_queue_alloc(iommu, &iommu->fltq,
> +                                    sizeof(struct riscv_iommu_fq_record));
> +       if (rc)
> +               return rc;
> +
> +       rc = riscv_iommu_queue_enable(iommu, &iommu->cmdq, riscv_iommu_cmdq_process);
>         if (rc)
>                 return rc;
>
> +       rc = riscv_iommu_queue_enable(iommu, &iommu->fltq, riscv_iommu_fltq_process);
> +       if (rc)
> +               goto err_queue_disable;
> +
> +       rc = riscv_iommu_iodir_set_mode(iommu, RISCV_IOMMU_DDTP_MODE_MAX);
> +       if (rc)
> +               goto err_queue_disable;
> +
>         rc = iommu_device_sysfs_add(&iommu->iommu, NULL, NULL, "riscv-iommu@%s",
>                                     dev_name(iommu->dev));
>         if (rc) {
> @@ -538,5 +1038,8 @@ int riscv_iommu_init(struct riscv_iommu_device *iommu)
>         iommu_device_sysfs_remove(&iommu->iommu);
>  err_iodir_off:
>         riscv_iommu_iodir_set_mode(iommu, RISCV_IOMMU_DDTP_MODE_OFF);
> +err_queue_disable:
> +       riscv_iommu_queue_disable(&iommu->fltq);
> +       riscv_iommu_queue_disable(&iommu->cmdq);
>         return rc;
>  }
> diff --git a/drivers/iommu/riscv/iommu.h b/drivers/iommu/riscv/iommu.h
> index f1696926582c..b1c4664542b4 100644
> --- a/drivers/iommu/riscv/iommu.h
> +++ b/drivers/iommu/riscv/iommu.h
> @@ -17,6 +17,22 @@
>
>  #include "iommu-bits.h"
>
> +struct riscv_iommu_device;
> +
> +struct riscv_iommu_queue {
> +       atomic_t prod;                          /* unbounded producer allocation index */
> +       atomic_t head;                          /* unbounded shadow ring buffer consumer index */
> +       atomic_t tail;                          /* unbounded shadow ring buffer producer index */
> +       unsigned int mask;                      /* index mask, queue length - 1 */
> +       unsigned int irq;                       /* allocated interrupt number */
> +       struct riscv_iommu_device *iommu;       /* iommu device handling the queue when active */
> +       void *base;                             /* ring buffer kernel pointer */
> +       dma_addr_t phys;                        /* ring buffer physical address */
> +       u16 qbr;                                /* base register offset, head and tail reference */
> +       u16 qcr;                                /* control and status register offset */
> +       u8 qid;                                 /* queue identifier, same as RISCV_IOMMU_INTR_XX */
> +};
> +
>  struct riscv_iommu_device {
>         /* iommu core interface */
>         struct iommu_device iommu;
> @@ -34,6 +50,11 @@ struct riscv_iommu_device {
>         /* available interrupt numbers, MSI or WSI */
>         unsigned int irqs[RISCV_IOMMU_INTR_COUNT];
>         unsigned int irqs_count;
> +       unsigned int icvec;
> +
> +       /* hardware queues */
> +       struct riscv_iommu_queue cmdq;
> +       struct riscv_iommu_queue fltq;
>
>         /* device directory */
>         unsigned int ddt_mode;
> --
> 2.34.1
>

Reviewed-by: Zong Li <zong.li@...ive.com>

Thanks

>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@...ts.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ