lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CANk1AXQVs2CBRAZn-SO-wa9phktC0kg_Ouf9im6sswDtEJ0Kiw@mail.gmail.com>
Date:   Fri, 10 Feb 2017 10:37:40 -0600
From:   Alan Tull <atull@...nel.org>
To:     Jason Gunthorpe <jgunthorpe@...idianresearch.com>
Cc:     Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
        atull <atull@...nsource.altera.com>,
        Moritz Fischer <moritz.fischer@...us.com>,
        linux-kernel <linux-kernel@...r.kernel.org>,
        linux-fpga@...r.kernel.org
Subject: Re: [PATCH v4 fpga 4/4] fpga zynq: Use the scatterlist interface

On Wed, Feb 1, 2017 at 1:48 PM, Jason Gunthorpe
<jgunthorpe@...idianresearch.com> wrote:
> This allows the driver to avoid a high order coherent DMA allocation
> and memory copy. With this patch it can DMA directly from the kernel
> pages that the bitfile is stored in.
>
> Since this is now a gather DMA operation the driver uses the ISR
> to feed the chips DMA queue with each entry from the SGL.
>
> Signed-off-by: Jason Gunthorpe <jgunthorpe@...idianresearch.com>
> Acked-by: Moritz Fischer <moritz.fischer@...us.com>

Acked-by: Alan Tull <atull@...nel.org>

> ---
>  drivers/fpga/zynq-fpga.c | 174 ++++++++++++++++++++++++++++++++++++-----------
>  1 file changed, 135 insertions(+), 39 deletions(-)
>
> diff --git a/drivers/fpga/zynq-fpga.c b/drivers/fpga/zynq-fpga.c
> index c3fc2a231e2810..34cb98139442df 100644
> --- a/drivers/fpga/zynq-fpga.c
> +++ b/drivers/fpga/zynq-fpga.c
> @@ -30,6 +30,7 @@
>  #include <linux/pm.h>
>  #include <linux/regmap.h>
>  #include <linux/string.h>
> +#include <linux/scatterlist.h>
>
>  /* Offsets into SLCR regmap */
>
> @@ -80,6 +81,7 @@
>
>  /* FPGA init status */
>  #define STATUS_DMA_Q_F                 BIT(31)
> +#define STATUS_DMA_Q_E                 BIT(30)
>  #define STATUS_PCFG_INIT_MASK          BIT(4)
>
>  /* Interrupt Status/Mask Register Bit definitions */
> @@ -98,12 +100,16 @@
>  #define DMA_INVALID_ADDRESS            GENMASK(31, 0)
>  /* Used to unlock the dev */
>  #define UNLOCK_MASK                    0x757bdf0d
> -/* Timeout for DMA to complete */
> -#define DMA_DONE_TIMEOUT               msecs_to_jiffies(1000)
>  /* Timeout for polling reset bits */
>  #define INIT_POLL_TIMEOUT              2500000
>  /* Delay for polling reset bits */
>  #define INIT_POLL_DELAY                        20
> +/* Signal this is the last DMA transfer, wait for the AXI and PCAP before
> + * interrupting
> + */
> +#define DMA_SRC_LAST_TRANSFER          1
> +/* Timeout for DMA completion */
> +#define DMA_TIMEOUT_MS                 5000
>
>  /* Masks for controlling stuff in SLCR */
>  /* Disable all Level shifters */
> @@ -124,6 +130,11 @@ struct zynq_fpga_priv {
>         void __iomem *io_base;
>         struct regmap *slcr;
>
> +       spinlock_t dma_lock;
> +       unsigned int dma_elm;
> +       unsigned int dma_nelms;
> +       struct scatterlist *cur_sg;
> +
>         struct completion dma_done;
>  };
>
> @@ -149,13 +160,80 @@ static inline void zynq_fpga_set_irq(struct zynq_fpga_priv *priv, u32 enable)
>         zynq_fpga_write(priv, INT_MASK_OFFSET, ~enable);
>  }
>
> +/* Must be called with dma_lock held */
> +static void zynq_step_dma(struct zynq_fpga_priv *priv)
> +{
> +       u32 addr;
> +       u32 len;
> +       bool first;
> +
> +       first = priv->dma_elm == 0;
> +       while (priv->cur_sg) {
> +               /* Feed the DMA queue until it is full. */
> +               if (zynq_fpga_read(priv, STATUS_OFFSET) & STATUS_DMA_Q_F)
> +                       break;
> +
> +               addr = sg_dma_address(priv->cur_sg);
> +               len = sg_dma_len(priv->cur_sg);
> +               if (priv->dma_elm + 1 == priv->dma_nelms) {
> +                       /* The last transfer waits for the PCAP to finish too,
> +                        * notice this also changes the irq_mask to ignore
> +                        * IXR_DMA_DONE_MASK which ensures we do not trigger
> +                        * the completion too early.
> +                        */
> +                       addr |= DMA_SRC_LAST_TRANSFER;
> +                       priv->cur_sg = NULL;
> +               } else {
> +                       priv->cur_sg = sg_next(priv->cur_sg);
> +                       priv->dma_elm++;
> +               }
> +
> +               zynq_fpga_write(priv, DMA_SRC_ADDR_OFFSET, addr);
> +               zynq_fpga_write(priv, DMA_DST_ADDR_OFFSET, DMA_INVALID_ADDRESS);
> +               zynq_fpga_write(priv, DMA_SRC_LEN_OFFSET, len / 4);
> +               zynq_fpga_write(priv, DMA_DEST_LEN_OFFSET, 0);
> +       }
> +
> +       /* Once the first transfer is queued we can turn on the ISR, future
> +        * calls to zynq_step_dma will happen from the ISR context. The
> +        * dma_lock spinlock guarentees this handover is done coherently, the
> +        * ISR enable is put at the end to avoid another CPU spinning in the
> +        * ISR on this lock.
> +        */
> +       if (first && priv->cur_sg) {
> +               zynq_fpga_set_irq(priv,
> +                                 IXR_DMA_DONE_MASK | IXR_ERROR_FLAGS_MASK);
> +       } else if (!priv->cur_sg) {
> +               /* The last transfer changes to DMA & PCAP mode since we do
> +                * not want to continue until everything has been flushed into
> +                * the PCAP.
> +                */
> +               zynq_fpga_set_irq(priv,
> +                                 IXR_D_P_DONE_MASK | IXR_ERROR_FLAGS_MASK);
> +       }
> +}
> +
>  static irqreturn_t zynq_fpga_isr(int irq, void *data)
>  {
>         struct zynq_fpga_priv *priv = data;
> +       u32 intr_status;
>
> -       /* disable DMA and error IRQs */
> -       zynq_fpga_set_irq(priv, 0);
> +       /* If anything other than DMA completion is reported stop and hand
> +        * control back to zynq_fpga_ops_write, something went wrong,
> +        * otherwise progress the DMA.
> +        */
> +       spin_lock(&priv->dma_lock);
> +       intr_status = zynq_fpga_read(priv, INT_STS_OFFSET);
> +       if (!(intr_status & IXR_ERROR_FLAGS_MASK) &&
> +           (intr_status & IXR_DMA_DONE_MASK) && priv->cur_sg) {
> +               zynq_fpga_write(priv, INT_STS_OFFSET, IXR_DMA_DONE_MASK);
> +               zynq_step_dma(priv);
> +               spin_unlock(&priv->dma_lock);
> +               return IRQ_HANDLED;
> +       }
> +       spin_unlock(&priv->dma_lock);
>
> +       zynq_fpga_set_irq(priv, 0);
>         complete(&priv->dma_done);
>
>         return IRQ_HANDLED;
> @@ -266,10 +344,11 @@ static int zynq_fpga_ops_write_init(struct fpga_manager *mgr,
>         zynq_fpga_write(priv, CTRL_OFFSET,
>                         (CTRL_PCAP_PR_MASK | CTRL_PCAP_MODE_MASK | ctrl));
>
> -       /* check that we have room in the command queue */
> +       /* We expect that the command queue is empty right now. */
>         status = zynq_fpga_read(priv, STATUS_OFFSET);
> -       if (status & STATUS_DMA_Q_F) {
> -               dev_err(&mgr->dev, "DMA command queue full\n");
> +       if ((status & STATUS_DMA_Q_F) ||
> +           (status & STATUS_DMA_Q_E) != STATUS_DMA_Q_E) {
> +               dev_err(&mgr->dev, "DMA command queue not right\n");
>                 err = -EBUSY;
>                 goto out_err;
>         }
> @@ -288,27 +367,36 @@ static int zynq_fpga_ops_write_init(struct fpga_manager *mgr,
>         return err;
>  }
>
> -static int zynq_fpga_ops_write(struct fpga_manager *mgr,
> -                              const char *buf, size_t count)
> +static int zynq_fpga_ops_write(struct fpga_manager *mgr, struct sg_table *sgt)
>  {
>         struct zynq_fpga_priv *priv;
>         const char *why;
>         int err;
> -       char *kbuf;
> -       size_t in_count;
> -       dma_addr_t dma_addr;
> -       u32 transfer_length;
>         u32 intr_status;
> +       unsigned long timeout;
> +       unsigned long flags;
> +       struct scatterlist *sg;
> +       int i;
>
> -       in_count = count;
>         priv = mgr->priv;
>
> -       kbuf =
> -           dma_alloc_coherent(mgr->dev.parent, count, &dma_addr, GFP_KERNEL);
> -       if (!kbuf)
> -               return -ENOMEM;
> +       /* The hardware can only DMA multiples of 4 bytes, and it requires the
> +        * starting addresses to be aligned to 64 bits (UG585 pg 212).
> +        */
> +       for_each_sg(sgt->sgl, sg, sgt->nents, i) {
> +               if ((sg->offset % 8) || (sg->length % 4)) {
> +                       dev_err(&mgr->dev,
> +                           "Invalid bitstream, chunks must be aligned\n");
> +                       return -EINVAL;
> +               }
> +       }
>
> -       memcpy(kbuf, buf, count);
> +       priv->dma_nelms =
> +           dma_map_sg(mgr->dev.parent, sgt->sgl, sgt->nents, DMA_TO_DEVICE);
> +       if (priv->dma_nelms == 0) {
> +               dev_err(&mgr->dev, "Unable to DMA map (TO_DEVICE)\n");
> +               return -ENOMEM;
> +       }
>
>         /* enable clock */
>         err = clk_enable(priv->clk);
> @@ -316,28 +404,31 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr,
>                 goto out_free;
>
>         zynq_fpga_write(priv, INT_STS_OFFSET, IXR_ALL_MASK);
> -
>         reinit_completion(&priv->dma_done);
>
> -       /* enable DMA and error IRQs */
> -       zynq_fpga_set_irq(priv, IXR_D_P_DONE_MASK | IXR_ERROR_FLAGS_MASK);
> -
> -       /* the +1 in the src addr is used to hold off on DMA_DONE IRQ
> -        * until both AXI and PCAP are done ...
> -        */
> -       zynq_fpga_write(priv, DMA_SRC_ADDR_OFFSET, (u32)(dma_addr) + 1);
> -       zynq_fpga_write(priv, DMA_DST_ADDR_OFFSET, (u32)DMA_INVALID_ADDRESS);
> -
> -       /* convert #bytes to #words */
> -       transfer_length = (count + 3) / 4;
> +       /* zynq_step_dma will turn on interrupts */
> +       spin_lock_irqsave(&priv->dma_lock, flags);
> +       priv->dma_elm = 0;
> +       priv->cur_sg = sgt->sgl;
> +       zynq_step_dma(priv);
> +       spin_unlock_irqrestore(&priv->dma_lock, flags);
>
> -       zynq_fpga_write(priv, DMA_SRC_LEN_OFFSET, transfer_length);
> -       zynq_fpga_write(priv, DMA_DEST_LEN_OFFSET, 0);
> +       timeout = wait_for_completion_timeout(&priv->dma_done,
> +                                             msecs_to_jiffies(DMA_TIMEOUT_MS));
>
> -       wait_for_completion(&priv->dma_done);
> +       spin_lock_irqsave(&priv->dma_lock, flags);
> +       zynq_fpga_set_irq(priv, 0);
> +       priv->cur_sg = NULL;
> +       spin_unlock_irqrestore(&priv->dma_lock, flags);
>
>         intr_status = zynq_fpga_read(priv, INT_STS_OFFSET);
> -       zynq_fpga_write(priv, INT_STS_OFFSET, intr_status);
> +       zynq_fpga_write(priv, INT_STS_OFFSET, IXR_ALL_MASK);
> +
> +       /* There doesn't seem to be a way to force cancel any DMA, so if
> +        * something went wrong we are relying on the hardware to have halted
> +        * the DMA before we get here, if there was we could use
> +        * wait_for_completion_interruptible too.
> +        */
>
>         if (intr_status & IXR_ERROR_FLAGS_MASK) {
>                 why = "DMA reported error";
> @@ -345,8 +436,12 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr,
>                 goto out_report;
>         }
>
> -       if (!((intr_status & IXR_D_P_DONE_MASK) == IXR_D_P_DONE_MASK)) {
> -               why = "DMA did not complete";
> +       if (priv->cur_sg ||
> +           !((intr_status & IXR_D_P_DONE_MASK) == IXR_D_P_DONE_MASK)) {
> +               if (timeout == 0)
> +                       why = "DMA timed out";
> +               else
> +                       why = "DMA did not complete";
>                 err = -EIO;
>                 goto out_report;
>         }
> @@ -369,7 +464,7 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr,
>         clk_disable(priv->clk);
>
>  out_free:
> -       dma_free_coherent(mgr->dev.parent, count, kbuf, dma_addr);
> +       dma_unmap_sg(mgr->dev.parent, sgt->sgl, sgt->nents, DMA_TO_DEVICE);
>         return err;
>  }
>
> @@ -433,7 +528,7 @@ static const struct fpga_manager_ops zynq_fpga_ops = {
>         .initial_header_size = 128,
>         .state = zynq_fpga_ops_state,
>         .write_init = zynq_fpga_ops_write_init,
> -       .write = zynq_fpga_ops_write,
> +       .write_sg = zynq_fpga_ops_write,
>         .write_complete = zynq_fpga_ops_write_complete,
>  };
>
> @@ -447,6 +542,7 @@ static int zynq_fpga_probe(struct platform_device *pdev)
>         priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
>         if (!priv)
>                 return -ENOMEM;
> +       spin_lock_init(&priv->dma_lock);
>
>         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
>         priv->io_base = devm_ioremap_resource(dev, res);
> --
> 2.7.4
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ