lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAH2o1u5wHZr-BHU0zbvq_DHzXY6xshyF1EkDDffGuAqZ670bXQ@mail.gmail.com>
Date: Wed, 8 May 2024 09:03:22 -0700
From: Tomasz Jeznach <tjeznach@...osinc.com>
To: Zong Li <zong.li@...ive.com>
Cc: Joerg Roedel <joro@...tes.org>, Will Deacon <will@...nel.org>, 
	Robin Murphy <robin.murphy@....com>, Paul Walmsley <paul.walmsley@...ive.com>, 
	Anup Patel <apatel@...tanamicro.com>, devicetree@...r.kernel.org, 
	Conor Dooley <conor+dt@...nel.org>, Albert Ou <aou@...s.berkeley.edu>, linux@...osinc.com, 
	linux-kernel@...r.kernel.org, Rob Herring <robh+dt@...nel.org>, 
	Sebastien Boeuf <seb@...osinc.com>, iommu@...ts.linux.dev, 
	Palmer Dabbelt <palmer@...belt.com>, Nick Kossifidis <mick@....forth.gr>, 
	Krzysztof Kozlowski <krzk+dt@...nel.org>, linux-riscv@...ts.infradead.org, 
	Lu Baolu <baolu.lu@...ux.intel.com>
Subject: Re: [PATCH v4 6/7] iommu/riscv: Command and fault queue support

On Wed, May 8, 2024 at 8:39 AM Zong Li <zong.li@...ive.com> wrote:
>
> On Sat, May 4, 2024 at 12:13 AM Tomasz Jeznach <tjeznach@...osinccom> wrote:
> >
> > Introduce device command submission and fault reporting queues,
> > as described in Chapter 3.1 and 3.2 of the RISC-V IOMMU Architecture
> > Specification.
> >
> > Command and fault queues are instantiated in contiguous system memory
> > local to IOMMU device domain, or mapped from fixed I/O space provided
> > by the hardware implementation. Detection of the location and maximum
> > allowed size of the queue utilize WARL properties of queue base control
> > register. Driver implementation will try to allocate up to 128KB of
> > system memory, while respecting hardware supported maximum queue size.
> >
> > Interrupts allocation is based on interrupt vectors availability and
> > distributed to all queues in simple round-robin fashion. For hardware
> > Implementation with fixed event type to interrupt vector assignment
> > IVEC WARL property is used to discover such mappings.
> >
> > Address translation, command and queue fault handling in this change
> > is limited to simple fault reporting without taking any action.
> >
> > Reviewed-by: Lu Baolu <baolu.lu@...ux.intel.com>
> > Signed-off-by: Tomasz Jeznach <tjeznach@...osinc.com>
> > ---
> >  drivers/iommu/riscv/iommu-bits.h |  75 +++++
> >  drivers/iommu/riscv/iommu.c      | 496 ++++++++++++++++++++++++++++++-
> >  drivers/iommu/riscv/iommu.h      |  21 ++
> >  3 files changed, 590 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/iommu/riscv/iommu-bits.h b/drivers/iommu/riscv/iommu-bits.h
> > index ba093c29de9f..40c379222821 100644
> > --- a/drivers/iommu/riscv/iommu-bits.h
> > +++ b/drivers/iommu/riscv/iommu-bits.h
> > @@ -704,4 +704,79 @@ struct riscv_iommu_msi_pte {
> >  #define RISCV_IOMMU_MSI_MRIF_NPPN      RISCV_IOMMU_PPN_FIELD
> >  #define RISCV_IOMMU_MSI_MRIF_NID_MSB   BIT_ULL(60)
> >
> > +/* Helper functions: command structure builders. */
> > +
> > +static inline void riscv_iommu_cmd_inval_vma(struct riscv_iommu_command *cmd)
> > +{
> > +       cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD_OPCODE, RISCV_IOMMU_CMD_IOTINVAL_OPCODE) |
> > +                     FIELD_PREP(RISCV_IOMMU_CMD_FUNC, RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA);
> > +       cmd->dword1 = 0;
> > +}
> > +
> > +static inline void riscv_iommu_cmd_inval_set_addr(struct riscv_iommu_command *cmd,
> > +                                                 u64 addr)
> > +{
> > +       cmd->dword1 = FIELD_PREP(RISCV_IOMMU_CMD_IOTINVAL_ADDR, phys_to_pfn(addr));
> > +       cmd->dword0 |= RISCV_IOMMU_CMD_IOTINVAL_AV;
> > +}
> > +
> > +static inline void riscv_iommu_cmd_inval_set_pscid(struct riscv_iommu_command *cmd,
> > +                                                  int pscid)
> > +{
> > +       cmd->dword0 |= FIELD_PREP(RISCV_IOMMU_CMD_IOTINVAL_PSCID, pscid) |
> > +                      RISCV_IOMMU_CMD_IOTINVAL_PSCV;
> > +}
> > +
> > +static inline void riscv_iommu_cmd_inval_set_gscid(struct riscv_iommu_command *cmd,
> > +                                                  int gscid)
> > +{
> > +       cmd->dword0 |= FIELD_PREP(RISCV_IOMMU_CMD_IOTINVAL_GSCID, gscid) |
> > +                      RISCV_IOMMU_CMD_IOTINVAL_GV;
> > +}
> > +
> > +static inline void riscv_iommu_cmd_iofence(struct riscv_iommu_command *cmd)
> > +{
> > +       cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD_OPCODE, RISCV_IOMMU_CMD_IOFENCE_OPCODE) |
> > +                     FIELD_PREP(RISCV_IOMMU_CMD_FUNC, RISCV_IOMMU_CMD_IOFENCE_FUNC_C) |
> > +                     RISCV_IOMMU_CMD_IOFENCE_PR | RISCV_IOMMU_CMD_IOFENCE_PW;
> > +       cmd->dword1 = 0;
> > +}
> > +
> > +static inline void riscv_iommu_cmd_iofence_set_av(struct riscv_iommu_command *cmd,
> > +                                                 u64 addr, u32 data)
> > +{
> > +       cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD_OPCODE, RISCV_IOMMU_CMD_IOFENCE_OPCODE) |
> > +                     FIELD_PREP(RISCV_IOMMU_CMD_FUNC, RISCV_IOMMU_CMD_IOFENCE_FUNC_C) |
> > +                     FIELD_PREP(RISCV_IOMMU_CMD_IOFENCE_DATA, data) |
> > +                     RISCV_IOMMU_CMD_IOFENCE_AV;
> > +       cmd->dword1 = addr >> 2;
> > +}
> > +
> > +static inline void riscv_iommu_cmd_iodir_inval_ddt(struct riscv_iommu_command *cmd)
> > +{
> > +       cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD_OPCODE, RISCV_IOMMU_CMD_IODIR_OPCODE) |
> > +                     FIELD_PREP(RISCV_IOMMU_CMD_FUNC, RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT);
> > +       cmd->dword1 = 0;
> > +}
> > +
> > +static inline void riscv_iommu_cmd_iodir_inval_pdt(struct riscv_iommu_command *cmd)
> > +{
> > +       cmd->dword0 = FIELD_PREP(RISCV_IOMMU_CMD_OPCODE, RISCV_IOMMU_CMD_IODIR_OPCODE) |
> > +                     FIELD_PREP(RISCV_IOMMU_CMD_FUNC, RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT);
> > +       cmd->dword1 = 0;
> > +}
> > +
> > +static inline void riscv_iommu_cmd_iodir_set_did(struct riscv_iommu_command *cmd,
> > +                                                unsigned int devid)
> > +{
> > +       cmd->dword0 |= FIELD_PREP(RISCV_IOMMU_CMD_IODIR_DID, devid) |
> > +                      RISCV_IOMMU_CMD_IODIR_DV;
> > +}
> > +
> > +static inline void riscv_iommu_cmd_iodir_set_pid(struct riscv_iommu_command *cmd,
> > +                                                unsigned int pasid)
> > +{
> > +       cmd->dword0 |= FIELD_PREP(RISCV_IOMMU_CMD_IODIR_PID, pasid);
> > +}
> > +
> >  #endif /* _RISCV_IOMMU_BITS_H_ */
> > diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c
> > index 71b7903d83d4..4349ac8a3990 100644
> > --- a/drivers/iommu/riscv/iommu.c
> > +++ b/drivers/iommu/riscv/iommu.c
> > @@ -25,7 +25,14 @@
> >  #include "iommu.h"
> >
> >  /* Timeouts in [us] */
> > -#define RISCV_IOMMU_DDTP_TIMEOUT       50000
> > +#define RISCV_IOMMU_QCSR_TIMEOUT       150000
> > +#define RISCV_IOMMU_QUEUE_TIMEOUT      150000
> > +#define RISCV_IOMMU_DDTP_TIMEOUT       10000000
> > +#define RISCV_IOMMU_IOTINVAL_TIMEOUT   90000000
> > +
> > +/* Number of entries per CMD/FLT queue, should be <= INT_MAX */
> > +#define RISCV_IOMMU_DEF_CQ_COUNT       8192
> > +#define RISCV_IOMMU_DEF_FQ_COUNT       4096
> >
> >  /* RISC-V IOMMU PPN <> PHYS address conversions, PHYS <=> PPN[53:10] */
> >  #define phys_to_ppn(va)  (((va) >> 2) & (((1ULL << 44) - 1) << 10))
> > @@ -89,6 +96,432 @@ static void riscv_iommu_free_pages(struct riscv_iommu_device *iommu, void *addr)
> >                        riscv_iommu_devres_pages_match, &devres);
> >  }
> >
> > +/*
> > + * Hardware queue allocation and management.
> > + */
> > +
> > +/* Setup queue base, control registers and default queue length */
> > +#define RISCV_IOMMU_QUEUE_INIT(q, name) do {                           \
> > +       struct riscv_iommu_queue *_q = q;                               \
> > +       _q->qid = RISCV_IOMMU_INTR_ ## name;                            \
> > +       _q->qbr = RISCV_IOMMU_REG_ ## name ## B;                        \
> > +       _q->qcr = RISCV_IOMMU_REG_ ## name ## CSR;                      \
> > +       _q->mask = _q->mask ?: (RISCV_IOMMU_DEF_ ## name ## _COUNT) - 1;\
> > +} while (0)
> > +
> > +/* Note: offsets are the same for all queues */
> > +#define Q_HEAD(q) ((q)->qbr + (RISCV_IOMMU_REG_CQH - RISCV_IOMMU_REG_CQB))
> > +#define Q_TAIL(q) ((q)->qbr + (RISCV_IOMMU_REG_CQT - RISCV_IOMMU_REG_CQB))
> > +#define Q_ITEM(q, index) ((q)->mask & (index))
> > +#define Q_IPSR(q) BIT((q)->qid)
> > +
> > +/*
> > + * Discover queue ring buffer hardware configuration, allocate in-memory
> > + * ring buffer or use fixed I/O memory location, configure queue base register.
> > + * Must be called before hardware queue is enabled.
> > + *
> > + * @queue - data structure, configured with RISCV_IOMMU_QUEUE_INIT()
> > + * @entry_size - queue single element size in bytes.
> > + */
> > +static int riscv_iommu_queue_alloc(struct riscv_iommu_device *iommu,
> > +                                  struct riscv_iommu_queue *queue,
> > +                                  size_t entry_size)
> > +{
> > +       unsigned int logsz;
> > +       u64 qb, rb;
> > +
> > +       /*
> > +        * Use WARL base register property to discover maximum allowed
> > +        * number of entries and optional fixed IO address for queue location.
> > +        */
> > +       riscv_iommu_writeq(iommu, queue->qbr, RISCV_IOMMU_QUEUE_LOGSZ_FIELD);
> > +       qb = riscv_iommu_readq(iommu, queue->qbr);
> > +
> > +       /*
> > +        * Calculate and verify hardware supported queue length, as reported
> > +        * by the field LOGSZ, where max queue length is equal to 2^(LOGSZ + 1).
> > +        * Update queue size based on hardware supported value.
> > +        */
> > +       logsz = ilog2(queue->mask);
> > +       if (logsz > FIELD_GET(RISCV_IOMMU_QUEUE_LOGSZ_FIELD, qb))
> > +               logsz = FIELD_GET(RISCV_IOMMU_QUEUE_LOGSZ_FIELD, qb);
> > +
> > +       /*
> > +        * Use WARL base register property to discover an optional fixed IO
> > +        * address for queue ring buffer location. Otherwise allocate contigus
> > +        * system memory.
> > +        */
> > +       if (FIELD_GET(RISCV_IOMMU_PPN_FIELD, qb)) {
> > +               const size_t queue_size = entry_size << (logsz + 1);
> > +
> > +               queue->phys = ppn_to_phys(FIELD_GET(RISCV_IOMMU_PPN_FIELD, qb));
> > +               queue->base = devm_ioremap(iommu->dev, queue->phys, queue_size);
> > +       } else {
> > +               do {
> > +                       const size_t queue_size = entry_size << (logsz + 1);
> > +                       const int order = get_order(queue_size);
> > +
> > +                       queue->base = riscv_iommu_get_pages(iommu, order);
> > +                       queue->phys = __pa(queue->base);
> > +               } while (!queue->base && logsz-- > 0);
> > +       }
> > +
> > +       if (!queue->base)
> > +               return -ENOMEM;
> > +
> > +       qb = phys_to_ppn(queue->phys) |
> > +            FIELD_PREP(RISCV_IOMMU_QUEUE_LOGSZ_FIELD, logsz);
> > +
> > +       /* Update base register and read back to verify hw accepted our write */
> > +       riscv_iommu_writeq(iommu, queue->qbr, qb);
> > +       rb = riscv_iommu_readq(iommu, queue->qbr);
> > +       if (rb != qb) {
> > +               dev_err(iommu->dev, "queue #%u allocation failed\n", queue->qid);
> > +               return -ENODEV;
> > +       }
> > +
> > +       /* Update actual queue mask */
> > +       queue->mask = (2U << logsz) - 1;
> > +
> > +       dev_dbg(iommu->dev, "queue #%u allocated 2^%u entries",
> > +               queue->qid, logsz + 1);
> > +
> > +       return 0;
> > +}
> > +
> > +/* Check interrupt queue status, IPSR */
> > +static irqreturn_t riscv_iommu_queue_ipsr(int irq, void *data)
> > +{
> > +       struct riscv_iommu_queue *queue = (struct riscv_iommu_queue *)data;
> > +
> > +       if (riscv_iommu_readl(queue->iommu, RISCV_IOMMU_REG_IPSR) & Q_IPSR(queue))
> > +               return IRQ_WAKE_THREAD;
> > +
> > +       return IRQ_NONE;
> > +}
> > +
> > +static int riscv_iommu_queue_vec(struct riscv_iommu_device *iommu, int n)
> > +{
> > +       /* Reuse IVEC.CIV mask for all interrupt vectors mapping. */
> > +       return (iommu->ivec >> (n * 4)) & RISCV_IOMMU_IVEC_CIV;
> > +}
> > +
> > +/*
> > + * Enable queue processing in the hardware, register interrupt handler.
> > + *
> > + * @queue - data structure, already allocated with riscv_iommu_queue_alloc()
> > + * @irq_handler - threaded interrupt handler.
> > + */
> > +static int riscv_iommu_queue_enable(struct riscv_iommu_device *iommu,
> > +                                   struct riscv_iommu_queue *queue,
> > +                                   irq_handler_t irq_handler)
> > +{
> > +       const unsigned int irq = iommu->irqs[riscv_iommu_queue_vec(iommu, queue->qid)];
> > +       u32 csr;
> > +       int rc;
> > +
> > +       if (queue->iommu)
> > +               return -EBUSY;
> > +
> > +       /* Polling not implemented */
> > +       if (!irq)
> > +               return -ENODEV;
> > +
> > +       queue->iommu = iommu;
> > +       rc = request_threaded_irq(irq, riscv_iommu_queue_ipsr, irq_handler,
> > +                                 IRQF_ONESHOT | IRQF_SHARED,
> > +                                 dev_name(iommu->dev), queue);
> > +       if (rc) {
> > +               queue->iommu = NULL;
> > +               return rc;
> > +       }
> > +
> > +       /*
> > +        * Enable queue with interrupts, clear any memory fault if any.
> > +        * Wait for the hardware to acknowledge request and activate queue
> > +        * processing.
> > +        * Note: All CSR bitfields are in the same offsets for all queues.
> > +        */
> > +       riscv_iommu_writel(iommu, queue->qcr,
> > +                          RISCV_IOMMU_QUEUE_ENABLE |
> > +                          RISCV_IOMMU_QUEUE_INTR_ENABLE |
> > +                          RISCV_IOMMU_QUEUE_MEM_FAULT);
> > +
> > +       riscv_iommu_readl_timeout(iommu, queue->qcr,
> > +                                 csr, !(csr & RISCV_IOMMU_QUEUE_BUSY),
> > +                                 10, RISCV_IOMMU_QCSR_TIMEOUT);
> > +
> > +       if (RISCV_IOMMU_QUEUE_ACTIVE != (csr & (RISCV_IOMMU_QUEUE_ACTIVE |
> > +                                               RISCV_IOMMU_QUEUE_BUSY |
> > +                                               RISCV_IOMMU_QUEUE_MEM_FAULT))) {
> > +               /* Best effort to stop and disable failing hardware queue. */
> > +               riscv_iommu_writel(iommu, queue->qcr, 0);
> > +               free_irq(irq, queue);
> > +               queue->iommu = NULL;
> > +               dev_err(iommu->dev, "queue #%u failed to start\n", queue->qid);
> > +               return -EBUSY;
> > +       }
> > +
> > +       /* Clear any pending interrupt flag. */
> > +       riscv_iommu_writel(iommu, RISCV_IOMMU_REG_IPSR, Q_IPSR(queue));
> > +
> > +       return 0;
> > +}
> > +
> > +/*
> > + * Disable queue. Wait for the hardware to acknowledge request and
> > + * stop processing enqueued requests. Report errors but continue.
> > + */
> > +static void riscv_iommu_queue_disable(struct riscv_iommu_queue *queue)
> > +{
> > +       struct riscv_iommu_device *iommu = queue->iommu;
> > +       u32 csr;
> > +
> > +       if (!iommu)
> > +               return;
> > +
> > +       free_irq(iommu->irqs[riscv_iommu_queue_vec(iommu, queue->qid)], queue);
> > +       riscv_iommu_writel(iommu, queue->qcr, 0);
> > +       riscv_iommu_readl_timeout(iommu, queue->qcr,
> > +                                 csr, !(csr & RISCV_IOMMU_QUEUE_BUSY),
> > +                                 10, RISCV_IOMMU_QCSR_TIMEOUT);
> > +
> > +       if (csr & (RISCV_IOMMU_QUEUE_ACTIVE | RISCV_IOMMU_QUEUE_BUSY))
> > +               dev_err(iommu->dev, "fail to disable hardware queue #%u, csr 0x%x\n",
> > +                       queue->qid, csr);
> > +
> > +       queue->iommu = NULL;
> > +}
> > +
> > +/*
> > + * Returns number of available valid queue entries and the first item index.
> > + * Update shadow producer index if necessary.
> > + */
> > +static int riscv_iommu_queue_consume(struct riscv_iommu_queue *queue,
> > +                                    unsigned int *index)
> > +{
> > +       unsigned int head = atomic_read(&queue->head);
> > +       unsigned int tail = atomic_read(&queue->tail);
> > +       unsigned int last = Q_ITEM(queue, tail);
> > +       int available = (int)(tail - head);
> > +
> > +       *index = head;
> > +
> > +       if (available > 0)
> > +               return available;
> > +
> > +       /* read hardware producer index, check reserved register bits are not set. */
> > +       if (riscv_iommu_readl_timeout(queue->iommu, Q_TAIL(queue),
> > +                                     tail, (tail & ~queue->mask) == 0,
> > +                                     0, RISCV_IOMMU_QUEUE_TIMEOUT)) {
> > +               dev_err_once(queue->iommu->dev,
> > +                            "Hardware error: queue access timeout\n");
> > +               return 0;
> > +       }
> > +
> > +       if (tail == last)
> > +               return 0;
> > +
> > +       /* update shadow producer index */
> > +       return (int)(atomic_add_return((tail - last) & queue->mask, &queue->tail) - head);
> > +}
> > +
> > +/*
> > + * Release processed queue entries, should match riscv_iommu_queue_consume() calls.
> > + */
> > +static void riscv_iommu_queue_release(struct riscv_iommu_queue *queue, int count)
> > +{
> > +       const unsigned int head = atomic_add_return(count, &queue->head);
> > +
> > +       riscv_iommu_writel(queue->iommu, Q_HEAD(queue), Q_ITEM(queue, head));
> > +}
> > +
> > +/* Return actual consumer index based on hardware reported queue head index. */
> > +static unsigned int riscv_iommu_queue_cons(struct riscv_iommu_queue *queue)
> > +{
> > +       const unsigned int cons = atomic_read(&queue->head);
> > +       const unsigned int last = Q_ITEM(queue, cons);
> > +       unsigned int head;
> > +
> > +       if (riscv_iommu_readl_timeout(queue->iommu, Q_HEAD(queue), head,
> > +                                     !(head & ~queue->mask),
> > +                                     0, RISCV_IOMMU_QUEUE_TIMEOUT))
> > +               return cons;
> > +
> > +       return cons + ((head - last) & queue->mask);
> > +}
> > +
> > +/* Wait for submitted item to be processed. */
> > +static int riscv_iommu_queue_wait(struct riscv_iommu_queue *queue,
> > +                                 unsigned int index,
> > +                                 unsigned int timeout_us)
> > +{
> > +       unsigned int cons = atomic_read(&queue->head);
> > +
> > +       /* Already processed by the consumer */
> > +       if ((int)(cons - index) > 0)
> > +               return 0;
> > +
> > +       /* Monitor consumer index */
> > +       return readx_poll_timeout(riscv_iommu_queue_cons, queue, cons,
> > +                                (int)(cons - index) > 0, 0, timeout_us);
> > +}
>
> Apart from iofence command, it seems to me that we might not be able
> to use the head pointer to determine whether the command is executed,
> because spec mentioned that the IOMMU advancing cqh is not a guarantee
> that the commands fetched by the IOMMU have been executed or
> committed. For iofence completion, perhaps using ADD and AV to write
> memory would be better as well.
>

I'll update the comment to the function that this is *only* allowed to
track IOFENCE.C completion. Call to riscv_iommu_queue_wait() is used
only for this purpose (unless specification will be extended to track
other commands with head/tail updates).

Previous version of the command tracking used address/AV notifier, but
after several iterations and testing we've considered tracking
head/tail as best approach.

> > +
> > +/* Enqueue an entry and wait to be processed if timeout_us > 0
> > + *
> > + * Error handling for IOMMU hardware not responding in reasonable time
> > + * will be added as separate patch series along with other RAS features.
> > + * For now, only report hardware failure and continue.
> > + */
> > +static void riscv_iommu_queue_send(struct riscv_iommu_queue *queue,
> > +                                  void *entry, size_t entry_size,
> > +                                  unsigned int timeout_us)
> > +{
> > +       unsigned int prod;
> > +       unsigned int head;
> > +       unsigned int tail;
> > +       unsigned long flags;
> > +
> > +       /* Do not preempt submission flow. */
> > +       local_irq_save(flags);
> > +
> > +       /* 1. Allocate some space in the queue */
> > +       prod = atomic_inc_return(&queue->prod) - 1;
> > +       head = atomic_read(&queue->head);
> > +
> > +       /* 2. Wait for space availability. */
> > +       if ((prod - head) > queue->mask) {
> > +               if (readx_poll_timeout(atomic_read, &queue->head,
> > +                                      head, (prod - head) < queue->mask,
> > +                                      0, RISCV_IOMMU_QUEUE_TIMEOUT))
> > +                       goto err_busy;
> > +       } else if ((prod - head) == queue->mask) {
> > +               const unsigned int last = Q_ITEM(queue, head);
> > +
> > +               if (riscv_iommu_readl_timeout(queue->iommu, Q_HEAD(queue), head,
> > +                                             !(head & ~queue->mask) && head != last,
> > +                                             0, RISCV_IOMMU_QUEUE_TIMEOUT))
> > +                       goto err_busy;
> > +               atomic_add((head - last) & queue->mask, &queue->head);
> > +       }
> > +
> > +       /* 3. Store entry in the ring buffer. */
> > +       memcpy(queue->base + Q_ITEM(queue, prod) * entry_size, entry, entry_size);
> > +
> > +       /* 4. Wait for all previous entries to be ready */
> > +       if (readx_poll_timeout(atomic_read, &queue->tail, tail, prod == tail,
> > +                              0, RISCV_IOMMU_QUEUE_TIMEOUT))
> > +               goto err_busy;
> > +
> > +       /* 5. Complete submission and restore local interrupts */
> > +       dma_wmb();
> > +       riscv_iommu_writel(queue->iommu, Q_TAIL(queue), Q_ITEM(queue, prod + 1));
> > +       atomic_inc(&queue->tail);
> > +       local_irq_restore(flags);
> > +
> > +       if (timeout_us && riscv_iommu_queue_wait(queue, prod, timeout_us))
> > +               dev_err_once(queue->iommu->dev,
> > +                            "Hardware error: command execution timeout\n");
> > +
> > +       return;
> > +
> > +err_busy:
> > +       local_irq_restore(flags);
> > +       dev_err_once(queue->iommu->dev, "Hardware error: command enqueue failed\n");
> > +}
> > +
> > +/*
> > + * IOMMU Command queue chapter 3.1
> > + */
> > +
> > +/* Command queue interrupt handler thread function */
> > +static irqreturn_t riscv_iommu_cmdq_process(int irq, void *data)
> > +{
> > +       const struct riscv_iommu_queue *queue = (struct riscv_iommu_queue *)data;
> > +       unsigned int ctrl;
> > +
> > +       /* Clear MF/CQ errors, complete error recovery to be implemented. */
> > +       ctrl = riscv_iommu_readl(queue->iommu, queue->qcr);
> > +       if (ctrl & (RISCV_IOMMU_CQCSR_CQMF | RISCV_IOMMU_CQCSR_CMD_TO |
> > +                   RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_FENCE_W_IP)) {
> > +               riscv_iommu_writel(queue->iommu, queue->qcr, ctrl);
>
> For RISCV_IOMMU_CQCSR_CMD_ILL and RISCV_IOMMU_CQCSR_CMD_TO, I think we
> need to adjust the head/tail pointer, otherwise, IOMMU might keep
> trying to execute the problematic command.
>
> > +               dev_warn(queue->iommu->dev,
> > +                        "Queue #%u error; fault:%d timeout:%d illegal:%d fence_w_ip:%d\n",
>
> It might be a bit weird if we view fence_w_ip as queue error and print
> the message as error?

Driver does not set WSI bit in IOFENCE.C command structure, so it's
unexpected to see fence_w_ip set. This message is warning only about
unexpected command queue interrupt, listing state of possible
triggers.

>
> > +                        queue->qid,
> > +                        !!(ctrl & RISCV_IOMMU_CQCSR_CQMF),
> > +                        !!(ctrl & RISCV_IOMMU_CQCSR_CMD_TO),
> > +                        !!(ctrl & RISCV_IOMMU_CQCSR_CMD_ILL),
> > +                        !!(ctrl & RISCV_IOMMU_CQCSR_FENCE_W_IP));
> > +       }
> > +
> > +       /* Placeholder for command queue interrupt notifiers */
> > +
> > +       /* Clear command interrupt pending. */
> > +       riscv_iommu_writel(queue->iommu, RISCV_IOMMU_REG_IPSR, Q_IPSR(queue));
> > +
> > +       return IRQ_HANDLED;
> > +}
> > +
> > +/* Send command to the IOMMU command queue */
> > +static void riscv_iommu_cmd_send(struct riscv_iommu_device *iommu,
> > +                                struct riscv_iommu_command *cmd,
> > +                                unsigned int timeout_us)
> > +{
> > +       riscv_iommu_queue_send(&iommu->cmdq, cmd, sizeof(*cmd), timeout_us);
> > +}
> > +
> > +/*
> > + * IOMMU Fault/Event queue chapter 3.2
> > + */
> > +
> > +static void riscv_iommu_fault(struct riscv_iommu_device *iommu,
> > +                             struct riscv_iommu_fq_record *event)
> > +{
> > +       unsigned int err = FIELD_GET(RISCV_IOMMU_FQ_HDR_CAUSE, event->hdr);
> > +       unsigned int devid = FIELD_GET(RISCV_IOMMU_FQ_HDR_DID, event->hdr);
> > +
> > +       /* Placeholder for future fault handling implementation, report only. */
> > +       if (err)
> > +               dev_warn_ratelimited(iommu->dev,
> > +                                    "Fault %d devid: 0x%x iotval: %llx iotval2: %llx\n",
> > +                                    err, devid, event->iotval, event->iotval2);
> > +}
> > +
> > +/* Fault queue interrupt handler thread function */
> > +static irqreturn_t riscv_iommu_fltq_process(int irq, void *data)
> > +{
> > +       struct riscv_iommu_queue *queue = (struct riscv_iommu_queue *)data;
> > +       struct riscv_iommu_device *iommu = queue->iommu;
> > +       struct riscv_iommu_fq_record *events;
> > +       unsigned int ctrl, idx;
> > +       int cnt, len;
> > +
> > +       events = (struct riscv_iommu_fq_record *)queue->base;
> > +
> > +       /* Clear fault interrupt pending and process all received fault events. */
> > +       riscv_iommu_writel(iommu, RISCV_IOMMU_REG_IPSR, Q_IPSR(queue));
> > +
> > +       do {
> > +               cnt = riscv_iommu_queue_consume(queue, &idx);
> > +               for (len = 0; len < cnt; idx++, len++)
> > +                       riscv_iommu_fault(iommu, &events[Q_ITEM(queue, idx)]);
> > +               riscv_iommu_queue_release(queue, cnt);
> > +       } while (cnt > 0);
> > +
> > +       /* Clear MF/OF errors, complete error recovery to be implemented. */
> > +       ctrl = riscv_iommu_readl(iommu, queue->qcr);
> > +       if (ctrl & (RISCV_IOMMU_FQCSR_FQMF | RISCV_IOMMU_FQCSR_FQOF)) {
> > +               riscv_iommu_writel(iommu, queue->qcr, ctrl);
> > +               dev_warn(iommu->dev,
> > +                        "Queue #%u error; memory fault:%d overflow:%d\n",
> > +                        queue->qid,
> > +                        !!(ctrl & RISCV_IOMMU_FQCSR_FQMF),
> > +                        !!(ctrl & RISCV_IOMMU_FQCSR_FQOF));
> > +       }
> > +
> > +       return IRQ_HANDLED;
> > +}
> > +
> >  /* Lookup and initialize device context info structure. */
> >  static struct riscv_iommu_dc *riscv_iommu_get_dc(struct riscv_iommu_device *iommu,
> >                                                  unsigned int devid)
> > @@ -250,6 +683,7 @@ static int riscv_iommu_iodir_set_mode(struct riscv_iommu_device *iommu,
> >         struct device *dev = iommu->dev;
> >         u64 ddtp, rq_ddtp;
> >         unsigned int mode, rq_mode = ddtp_mode;
> > +       struct riscv_iommu_command cmd;
> >
> >         ddtp = riscv_iommu_read_ddtp(iommu);
> >         if (ddtp & RISCV_IOMMU_DDTP_BUSY)
> > @@ -317,6 +751,18 @@ static int riscv_iommu_iodir_set_mode(struct riscv_iommu_device *iommu,
> >         if (mode != ddtp_mode)
> >                 dev_dbg(dev, "DDTP hw mode %u, requested %u\n", mode, ddtp_mode);
> >
> > +       /* Invalidate device context cache */
> > +       riscv_iommu_cmd_iodir_inval_ddt(&cmd);
> > +       riscv_iommu_cmd_send(iommu, &cmd, 0);
> > +
> > +       /* Invalidate address translation cache */
> > +       riscv_iommu_cmd_inval_vma(&cmd);
> > +       riscv_iommu_cmd_send(iommu, &cmd, 0);
> > +
> > +       /* IOFENCE.C */
> > +       riscv_iommu_cmd_iofence(&cmd);
> > +       riscv_iommu_cmd_send(iommu, &cmd, RISCV_IOMMU_IOTINVAL_TIMEOUT);
> > +
> >         return 0;
> >  }
> >
> > @@ -492,6 +938,26 @@ static int riscv_iommu_init_check(struct riscv_iommu_device *iommu)
> >                         return -EINVAL;
> >         }
> >
> > +       /*
> > +        * Distribute interrupt vectors, always use first vector for CIV.
> > +        * At least one interrupt is required. Read back and verify.
> > +        */
> > +       if (!iommu->irqs_count)
> > +               return -EINVAL;
> > +
> > +       iommu->ivec = 0;
> > +       iommu->ivec |= FIELD_PREP(RISCV_IOMMU_IVEC_FIV, 1 % iommu->irqs_count);
> > +       iommu->ivec |= FIELD_PREP(RISCV_IOMMU_IVEC_PIV, 2 % iommu->irqs_count);
> > +       iommu->ivec |= FIELD_PREP(RISCV_IOMMU_IVEC_PMIV, 3 % iommu->irqs_count);
> > +       riscv_iommu_writeq(iommu, RISCV_IOMMU_REG_IVEC, iommu->ivec);
> > +
> > +       iommu->ivec = riscv_iommu_readq(iommu, RISCV_IOMMU_REG_IVEC);
> > +       if (riscv_iommu_queue_vec(iommu, RISCV_IOMMU_IVEC_CIV) >= RISCV_IOMMU_INTR_COUNT ||
> > +           riscv_iommu_queue_vec(iommu, RISCV_IOMMU_IVEC_FIV) >= RISCV_IOMMU_INTR_COUNT ||
> > +           riscv_iommu_queue_vec(iommu, RISCV_IOMMU_IVEC_PIV) >= RISCV_IOMMU_INTR_COUNT ||
> > +           riscv_iommu_queue_vec(iommu, RISCV_IOMMU_IVEC_PMIV) >= RISCV_IOMMU_INTR_COUNT)
> > +               return -EINVAL;
> > +
> >         return 0;
> >  }
> >
> > @@ -500,12 +966,17 @@ void riscv_iommu_remove(struct riscv_iommu_device *iommu)
> >         iommu_device_unregister(&iommu->iommu);
> >         iommu_device_sysfs_remove(&iommu->iommu);
> >         riscv_iommu_iodir_set_mode(iommu, RISCV_IOMMU_DDTP_MODE_OFF);
> > +       riscv_iommu_queue_disable(&iommu->cmdq);
> > +       riscv_iommu_queue_disable(&iommu->fltq);
> >  }
> >
> >  int riscv_iommu_init(struct riscv_iommu_device *iommu)
> >  {
> >         int rc;
> >
> > +       RISCV_IOMMU_QUEUE_INIT(&iommu->cmdq, CQ);
> > +       RISCV_IOMMU_QUEUE_INIT(&iommu->fltq, FQ);
> > +
> >         rc = riscv_iommu_init_check(iommu);
> >         if (rc)
> >                 return dev_err_probe(iommu->dev, rc, "unexpected device state\n");
> > @@ -514,10 +985,28 @@ int riscv_iommu_init(struct riscv_iommu_device *iommu)
> >         if (rc)
> >                 return rc;
> >
> > -       rc = riscv_iommu_iodir_set_mode(iommu, RISCV_IOMMU_DDTP_MODE_MAX);
> > +       rc = riscv_iommu_queue_alloc(iommu, &iommu->cmdq,
> > +                                    sizeof(struct riscv_iommu_command));
> > +       if (rc)
> > +               return rc;
> > +
> > +       rc = riscv_iommu_queue_alloc(iommu, &iommu->fltq,
> > +                                    sizeof(struct riscv_iommu_fq_record));
> > +       if (rc)
> > +               return rc;
> > +
> > +       rc = riscv_iommu_queue_enable(iommu, &iommu->cmdq, riscv_iommu_cmdq_process);
> >         if (rc)
> >                 return rc;
> >
> > +       rc = riscv_iommu_queue_enable(iommu, &iommu->fltq, riscv_iommu_fltq_process);
> > +       if (rc)
> > +               goto err_queue_disable;
> > +
> > +       rc = riscv_iommu_iodir_set_mode(iommu, RISCV_IOMMU_DDTP_MODE_MAX);
> > +       if (rc)
> > +               goto err_queue_disable;
> > +
> >         rc = iommu_device_sysfs_add(&iommu->iommu, NULL, NULL, "riscv-iommu@%s",
> >                                     dev_name(iommu->dev));
> >         if (rc) {
> > @@ -537,5 +1026,8 @@ int riscv_iommu_init(struct riscv_iommu_device *iommu)
> >         iommu_device_sysfs_remove(&iommu->iommu);
> >  err_iodir_off:
> >         riscv_iommu_iodir_set_mode(iommu, RISCV_IOMMU_DDTP_MODE_OFF);
> > +err_queue_disable:
> > +       riscv_iommu_queue_disable(&iommu->fltq);
> > +       riscv_iommu_queue_disable(&iommu->cmdq);
> >         return rc;
> >  }
> > diff --git a/drivers/iommu/riscv/iommu.h b/drivers/iommu/riscv/iommu.h
> > index f1696926582c..03e0c45bc7e1 100644
> > --- a/drivers/iommu/riscv/iommu.h
> > +++ b/drivers/iommu/riscv/iommu.h
> > @@ -17,6 +17,22 @@
> >
> >  #include "iommu-bits.h"
> >
> > +struct riscv_iommu_device;
> > +
> > +struct riscv_iommu_queue {
> > +       atomic_t prod;                          /* unbounded producer allocation index */
> > +       atomic_t head;                          /* unbounded shadow ring buffer consumer index */
> > +       atomic_t tail;                          /* unbounded shadow ring buffer producer index */
> > +       unsigned int mask;                      /* index mask, queue length - 1 */
> > +       unsigned int irq;                       /* allocated interrupt number */
> > +       struct riscv_iommu_device *iommu;       /* iommu device handling the queue when active */
> > +       void *base;                             /* ring buffer kernel pointer */
> > +       dma_addr_t phys;                        /* ring buffer physical address */
> > +       u16 qbr;                                /* base register offset, head and tail reference */
> > +       u16 qcr;                                /* control and status register offset */
> > +       u8 qid;                                 /* queue identifier, same as RISCV_IOMMU_INTR_XX */
> > +};
> > +
> >  struct riscv_iommu_device {
> >         /* iommu core interface */
> >         struct iommu_device iommu;
> > @@ -34,6 +50,11 @@ struct riscv_iommu_device {
> >         /* available interrupt numbers, MSI or WSI */
> >         unsigned int irqs[RISCV_IOMMU_INTR_COUNT];
> >         unsigned int irqs_count;
> > +       unsigned int ivec;
> > +
> > +       /* hardware queues */
> > +       struct riscv_iommu_queue cmdq;
> > +       struct riscv_iommu_queue fltq;
> >
> >         /* device directory */
> >         unsigned int ddt_mode;
> > --
> > 2.34.1
> >

Best,
- Tomasz

> >
> > _______________________________________________
> > linux-riscv mailing list
> > linux-riscv@...ts.infradead.org
> > http://lists.infradead.org/mailman/listinfo/linux-riscv

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ