Message ID | 1485978525-13676-5-git-send-email-jgunthorpe@obsidianresearch.com (mailing list archive) |
---|---|
State | Accepted, archived |
Headers | show |
On Wed, Feb 1, 2017 at 1:48 PM, Jason Gunthorpe <jgunthorpe@obsidianresearch.com> wrote: > This allows the driver to avoid a high order coherent DMA allocation > and memory copy. With this patch it can DMA directly from the kernel > pages that the bitfile is stored in. > > Since this is now a gather DMA operation the driver uses the ISR > to feed the chips DMA queue with each entry from the SGL. > > Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com> > Acked-by: Moritz Fischer <moritz.fischer@ettus.com> Acked-by: Alan Tull <atull@kernel.org> > --- > drivers/fpga/zynq-fpga.c | 174 ++++++++++++++++++++++++++++++++++++----------- > 1 file changed, 135 insertions(+), 39 deletions(-) > > diff --git a/drivers/fpga/zynq-fpga.c b/drivers/fpga/zynq-fpga.c > index c3fc2a231e2810..34cb98139442df 100644 > --- a/drivers/fpga/zynq-fpga.c > +++ b/drivers/fpga/zynq-fpga.c > @@ -30,6 +30,7 @@ > #include <linux/pm.h> > #include <linux/regmap.h> > #include <linux/string.h> > +#include <linux/scatterlist.h> > > /* Offsets into SLCR regmap */ > > @@ -80,6 +81,7 @@ > > /* FPGA init status */ > #define STATUS_DMA_Q_F BIT(31) > +#define STATUS_DMA_Q_E BIT(30) > #define STATUS_PCFG_INIT_MASK BIT(4) > > /* Interrupt Status/Mask Register Bit definitions */ > @@ -98,12 +100,16 @@ > #define DMA_INVALID_ADDRESS GENMASK(31, 0) > /* Used to unlock the dev */ > #define UNLOCK_MASK 0x757bdf0d > -/* Timeout for DMA to complete */ > -#define DMA_DONE_TIMEOUT msecs_to_jiffies(1000) > /* Timeout for polling reset bits */ > #define INIT_POLL_TIMEOUT 2500000 > /* Delay for polling reset bits */ > #define INIT_POLL_DELAY 20 > +/* Signal this is the last DMA transfer, wait for the AXI and PCAP before > + * interrupting > + */ > +#define DMA_SRC_LAST_TRANSFER 1 > +/* Timeout for DMA completion */ > +#define DMA_TIMEOUT_MS 5000 > > /* Masks for controlling stuff in SLCR */ > /* Disable all Level shifters */ > @@ -124,6 +130,11 @@ struct zynq_fpga_priv { > void __iomem *io_base; > struct regmap *slcr; > > + spinlock_t dma_lock; > + unsigned int dma_elm; > + unsigned int dma_nelms; > + struct scatterlist *cur_sg; > + > struct completion dma_done; > }; > > @@ -149,13 +160,80 @@ static inline void zynq_fpga_set_irq(struct zynq_fpga_priv *priv, u32 enable) > zynq_fpga_write(priv, INT_MASK_OFFSET, ~enable); > } > > +/* Must be called with dma_lock held */ > +static void zynq_step_dma(struct zynq_fpga_priv *priv) > +{ > + u32 addr; > + u32 len; > + bool first; > + > + first = priv->dma_elm == 0; > + while (priv->cur_sg) { > + /* Feed the DMA queue until it is full. */ > + if (zynq_fpga_read(priv, STATUS_OFFSET) & STATUS_DMA_Q_F) > + break; > + > + addr = sg_dma_address(priv->cur_sg); > + len = sg_dma_len(priv->cur_sg); > + if (priv->dma_elm + 1 == priv->dma_nelms) { > + /* The last transfer waits for the PCAP to finish too, > + * notice this also changes the irq_mask to ignore > + * IXR_DMA_DONE_MASK which ensures we do not trigger > + * the completion too early. > + */ > + addr |= DMA_SRC_LAST_TRANSFER; > + priv->cur_sg = NULL; > + } else { > + priv->cur_sg = sg_next(priv->cur_sg); > + priv->dma_elm++; > + } > + > + zynq_fpga_write(priv, DMA_SRC_ADDR_OFFSET, addr); > + zynq_fpga_write(priv, DMA_DST_ADDR_OFFSET, DMA_INVALID_ADDRESS); > + zynq_fpga_write(priv, DMA_SRC_LEN_OFFSET, len / 4); > + zynq_fpga_write(priv, DMA_DEST_LEN_OFFSET, 0); > + } > + > + /* Once the first transfer is queued we can turn on the ISR, future > + * calls to zynq_step_dma will happen from the ISR context. The > + * dma_lock spinlock guarentees this handover is done coherently, the > + * ISR enable is put at the end to avoid another CPU spinning in the > + * ISR on this lock. > + */ > + if (first && priv->cur_sg) { > + zynq_fpga_set_irq(priv, > + IXR_DMA_DONE_MASK | IXR_ERROR_FLAGS_MASK); > + } else if (!priv->cur_sg) { > + /* The last transfer changes to DMA & PCAP mode since we do > + * not want to continue until everything has been flushed into > + * the PCAP. > + */ > + zynq_fpga_set_irq(priv, > + IXR_D_P_DONE_MASK | IXR_ERROR_FLAGS_MASK); > + } > +} > + > static irqreturn_t zynq_fpga_isr(int irq, void *data) > { > struct zynq_fpga_priv *priv = data; > + u32 intr_status; > > - /* disable DMA and error IRQs */ > - zynq_fpga_set_irq(priv, 0); > + /* If anything other than DMA completion is reported stop and hand > + * control back to zynq_fpga_ops_write, something went wrong, > + * otherwise progress the DMA. > + */ > + spin_lock(&priv->dma_lock); > + intr_status = zynq_fpga_read(priv, INT_STS_OFFSET); > + if (!(intr_status & IXR_ERROR_FLAGS_MASK) && > + (intr_status & IXR_DMA_DONE_MASK) && priv->cur_sg) { > + zynq_fpga_write(priv, INT_STS_OFFSET, IXR_DMA_DONE_MASK); > + zynq_step_dma(priv); > + spin_unlock(&priv->dma_lock); > + return IRQ_HANDLED; > + } > + spin_unlock(&priv->dma_lock); > > + zynq_fpga_set_irq(priv, 0); > complete(&priv->dma_done); > > return IRQ_HANDLED; > @@ -266,10 +344,11 @@ static int zynq_fpga_ops_write_init(struct fpga_manager *mgr, > zynq_fpga_write(priv, CTRL_OFFSET, > (CTRL_PCAP_PR_MASK | CTRL_PCAP_MODE_MASK | ctrl)); > > - /* check that we have room in the command queue */ > + /* We expect that the command queue is empty right now. */ > status = zynq_fpga_read(priv, STATUS_OFFSET); > - if (status & STATUS_DMA_Q_F) { > - dev_err(&mgr->dev, "DMA command queue full\n"); > + if ((status & STATUS_DMA_Q_F) || > + (status & STATUS_DMA_Q_E) != STATUS_DMA_Q_E) { > + dev_err(&mgr->dev, "DMA command queue not right\n"); > err = -EBUSY; > goto out_err; > } > @@ -288,27 +367,36 @@ static int zynq_fpga_ops_write_init(struct fpga_manager *mgr, > return err; > } > > -static int zynq_fpga_ops_write(struct fpga_manager *mgr, > - const char *buf, size_t count) > +static int zynq_fpga_ops_write(struct fpga_manager *mgr, struct sg_table *sgt) > { > struct zynq_fpga_priv *priv; > const char *why; > int err; > - char *kbuf; > - size_t in_count; > - dma_addr_t dma_addr; > - u32 transfer_length; > u32 intr_status; > + unsigned long timeout; > + unsigned long flags; > + struct scatterlist *sg; > + int i; > > - in_count = count; > priv = mgr->priv; > > - kbuf = > - dma_alloc_coherent(mgr->dev.parent, count, &dma_addr, GFP_KERNEL); > - if (!kbuf) > - return -ENOMEM; > + /* The hardware can only DMA multiples of 4 bytes, and it requires the > + * starting addresses to be aligned to 64 bits (UG585 pg 212). > + */ > + for_each_sg(sgt->sgl, sg, sgt->nents, i) { > + if ((sg->offset % 8) || (sg->length % 4)) { > + dev_err(&mgr->dev, > + "Invalid bitstream, chunks must be aligned\n"); > + return -EINVAL; > + } > + } > > - memcpy(kbuf, buf, count); > + priv->dma_nelms = > + dma_map_sg(mgr->dev.parent, sgt->sgl, sgt->nents, DMA_TO_DEVICE); > + if (priv->dma_nelms == 0) { > + dev_err(&mgr->dev, "Unable to DMA map (TO_DEVICE)\n"); > + return -ENOMEM; > + } > > /* enable clock */ > err = clk_enable(priv->clk); > @@ -316,28 +404,31 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr, > goto out_free; > > zynq_fpga_write(priv, INT_STS_OFFSET, IXR_ALL_MASK); > - > reinit_completion(&priv->dma_done); > > - /* enable DMA and error IRQs */ > - zynq_fpga_set_irq(priv, IXR_D_P_DONE_MASK | IXR_ERROR_FLAGS_MASK); > - > - /* the +1 in the src addr is used to hold off on DMA_DONE IRQ > - * until both AXI and PCAP are done ... > - */ > - zynq_fpga_write(priv, DMA_SRC_ADDR_OFFSET, (u32)(dma_addr) + 1); > - zynq_fpga_write(priv, DMA_DST_ADDR_OFFSET, (u32)DMA_INVALID_ADDRESS); > - > - /* convert #bytes to #words */ > - transfer_length = (count + 3) / 4; > + /* zynq_step_dma will turn on interrupts */ > + spin_lock_irqsave(&priv->dma_lock, flags); > + priv->dma_elm = 0; > + priv->cur_sg = sgt->sgl; > + zynq_step_dma(priv); > + spin_unlock_irqrestore(&priv->dma_lock, flags); > > - zynq_fpga_write(priv, DMA_SRC_LEN_OFFSET, transfer_length); > - zynq_fpga_write(priv, DMA_DEST_LEN_OFFSET, 0); > + timeout = wait_for_completion_timeout(&priv->dma_done, > + msecs_to_jiffies(DMA_TIMEOUT_MS)); > > - wait_for_completion(&priv->dma_done); > + spin_lock_irqsave(&priv->dma_lock, flags); > + zynq_fpga_set_irq(priv, 0); > + priv->cur_sg = NULL; > + spin_unlock_irqrestore(&priv->dma_lock, flags); > > intr_status = zynq_fpga_read(priv, INT_STS_OFFSET); > - zynq_fpga_write(priv, INT_STS_OFFSET, intr_status); > + zynq_fpga_write(priv, INT_STS_OFFSET, IXR_ALL_MASK); > + > + /* There doesn't seem to be a way to force cancel any DMA, so if > + * something went wrong we are relying on the hardware to have halted > + * the DMA before we get here, if there was we could use > + * wait_for_completion_interruptible too. > + */ > > if (intr_status & IXR_ERROR_FLAGS_MASK) { > why = "DMA reported error"; > @@ -345,8 +436,12 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr, > goto out_report; > } > > - if (!((intr_status & IXR_D_P_DONE_MASK) == IXR_D_P_DONE_MASK)) { > - why = "DMA did not complete"; > + if (priv->cur_sg || > + !((intr_status & IXR_D_P_DONE_MASK) == IXR_D_P_DONE_MASK)) { > + if (timeout == 0) > + why = "DMA timed out"; > + else > + why = "DMA did not complete"; > err = -EIO; > goto out_report; > } > @@ -369,7 +464,7 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr, > clk_disable(priv->clk); > > out_free: > - dma_free_coherent(mgr->dev.parent, count, kbuf, dma_addr); > + dma_unmap_sg(mgr->dev.parent, sgt->sgl, sgt->nents, DMA_TO_DEVICE); > return err; > } > > @@ -433,7 +528,7 @@ static const struct fpga_manager_ops zynq_fpga_ops = { > .initial_header_size = 128, > .state = zynq_fpga_ops_state, > .write_init = zynq_fpga_ops_write_init, > - .write = zynq_fpga_ops_write, > + .write_sg = zynq_fpga_ops_write, > .write_complete = zynq_fpga_ops_write_complete, > }; > > @@ -447,6 +542,7 @@ static int zynq_fpga_probe(struct platform_device *pdev) > priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); > if (!priv) > return -ENOMEM; > + spin_lock_init(&priv->dma_lock); > > res = platform_get_resource(pdev, IORESOURCE_MEM, 0); > priv->io_base = devm_ioremap_resource(dev, res); > -- > 2.7.4 > -- To unsubscribe from this list: send the line "unsubscribe linux-fpga" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/fpga/zynq-fpga.c b/drivers/fpga/zynq-fpga.c index c3fc2a231e2810..34cb98139442df 100644 --- a/drivers/fpga/zynq-fpga.c +++ b/drivers/fpga/zynq-fpga.c @@ -30,6 +30,7 @@ #include <linux/pm.h> #include <linux/regmap.h> #include <linux/string.h> +#include <linux/scatterlist.h> /* Offsets into SLCR regmap */ @@ -80,6 +81,7 @@ /* FPGA init status */ #define STATUS_DMA_Q_F BIT(31) +#define STATUS_DMA_Q_E BIT(30) #define STATUS_PCFG_INIT_MASK BIT(4) /* Interrupt Status/Mask Register Bit definitions */ @@ -98,12 +100,16 @@ #define DMA_INVALID_ADDRESS GENMASK(31, 0) /* Used to unlock the dev */ #define UNLOCK_MASK 0x757bdf0d -/* Timeout for DMA to complete */ -#define DMA_DONE_TIMEOUT msecs_to_jiffies(1000) /* Timeout for polling reset bits */ #define INIT_POLL_TIMEOUT 2500000 /* Delay for polling reset bits */ #define INIT_POLL_DELAY 20 +/* Signal this is the last DMA transfer, wait for the AXI and PCAP before + * interrupting + */ +#define DMA_SRC_LAST_TRANSFER 1 +/* Timeout for DMA completion */ +#define DMA_TIMEOUT_MS 5000 /* Masks for controlling stuff in SLCR */ /* Disable all Level shifters */ @@ -124,6 +130,11 @@ struct zynq_fpga_priv { void __iomem *io_base; struct regmap *slcr; + spinlock_t dma_lock; + unsigned int dma_elm; + unsigned int dma_nelms; + struct scatterlist *cur_sg; + struct completion dma_done; }; @@ -149,13 +160,80 @@ static inline void zynq_fpga_set_irq(struct zynq_fpga_priv *priv, u32 enable) zynq_fpga_write(priv, INT_MASK_OFFSET, ~enable); } +/* Must be called with dma_lock held */ +static void zynq_step_dma(struct zynq_fpga_priv *priv) +{ + u32 addr; + u32 len; + bool first; + + first = priv->dma_elm == 0; + while (priv->cur_sg) { + /* Feed the DMA queue until it is full. */ + if (zynq_fpga_read(priv, STATUS_OFFSET) & STATUS_DMA_Q_F) + break; + + addr = sg_dma_address(priv->cur_sg); + len = sg_dma_len(priv->cur_sg); + if (priv->dma_elm + 1 == priv->dma_nelms) { + /* The last transfer waits for the PCAP to finish too, + * notice this also changes the irq_mask to ignore + * IXR_DMA_DONE_MASK which ensures we do not trigger + * the completion too early. + */ + addr |= DMA_SRC_LAST_TRANSFER; + priv->cur_sg = NULL; + } else { + priv->cur_sg = sg_next(priv->cur_sg); + priv->dma_elm++; + } + + zynq_fpga_write(priv, DMA_SRC_ADDR_OFFSET, addr); + zynq_fpga_write(priv, DMA_DST_ADDR_OFFSET, DMA_INVALID_ADDRESS); + zynq_fpga_write(priv, DMA_SRC_LEN_OFFSET, len / 4); + zynq_fpga_write(priv, DMA_DEST_LEN_OFFSET, 0); + } + + /* Once the first transfer is queued we can turn on the ISR, future + * calls to zynq_step_dma will happen from the ISR context. The + * dma_lock spinlock guarentees this handover is done coherently, the + * ISR enable is put at the end to avoid another CPU spinning in the + * ISR on this lock. + */ + if (first && priv->cur_sg) { + zynq_fpga_set_irq(priv, + IXR_DMA_DONE_MASK | IXR_ERROR_FLAGS_MASK); + } else if (!priv->cur_sg) { + /* The last transfer changes to DMA & PCAP mode since we do + * not want to continue until everything has been flushed into + * the PCAP. + */ + zynq_fpga_set_irq(priv, + IXR_D_P_DONE_MASK | IXR_ERROR_FLAGS_MASK); + } +} + static irqreturn_t zynq_fpga_isr(int irq, void *data) { struct zynq_fpga_priv *priv = data; + u32 intr_status; - /* disable DMA and error IRQs */ - zynq_fpga_set_irq(priv, 0); + /* If anything other than DMA completion is reported stop and hand + * control back to zynq_fpga_ops_write, something went wrong, + * otherwise progress the DMA. + */ + spin_lock(&priv->dma_lock); + intr_status = zynq_fpga_read(priv, INT_STS_OFFSET); + if (!(intr_status & IXR_ERROR_FLAGS_MASK) && + (intr_status & IXR_DMA_DONE_MASK) && priv->cur_sg) { + zynq_fpga_write(priv, INT_STS_OFFSET, IXR_DMA_DONE_MASK); + zynq_step_dma(priv); + spin_unlock(&priv->dma_lock); + return IRQ_HANDLED; + } + spin_unlock(&priv->dma_lock); + zynq_fpga_set_irq(priv, 0); complete(&priv->dma_done); return IRQ_HANDLED; @@ -266,10 +344,11 @@ static int zynq_fpga_ops_write_init(struct fpga_manager *mgr, zynq_fpga_write(priv, CTRL_OFFSET, (CTRL_PCAP_PR_MASK | CTRL_PCAP_MODE_MASK | ctrl)); - /* check that we have room in the command queue */ + /* We expect that the command queue is empty right now. */ status = zynq_fpga_read(priv, STATUS_OFFSET); - if (status & STATUS_DMA_Q_F) { - dev_err(&mgr->dev, "DMA command queue full\n"); + if ((status & STATUS_DMA_Q_F) || + (status & STATUS_DMA_Q_E) != STATUS_DMA_Q_E) { + dev_err(&mgr->dev, "DMA command queue not right\n"); err = -EBUSY; goto out_err; } @@ -288,27 +367,36 @@ static int zynq_fpga_ops_write_init(struct fpga_manager *mgr, return err; } -static int zynq_fpga_ops_write(struct fpga_manager *mgr, - const char *buf, size_t count) +static int zynq_fpga_ops_write(struct fpga_manager *mgr, struct sg_table *sgt) { struct zynq_fpga_priv *priv; const char *why; int err; - char *kbuf; - size_t in_count; - dma_addr_t dma_addr; - u32 transfer_length; u32 intr_status; + unsigned long timeout; + unsigned long flags; + struct scatterlist *sg; + int i; - in_count = count; priv = mgr->priv; - kbuf = - dma_alloc_coherent(mgr->dev.parent, count, &dma_addr, GFP_KERNEL); - if (!kbuf) - return -ENOMEM; + /* The hardware can only DMA multiples of 4 bytes, and it requires the + * starting addresses to be aligned to 64 bits (UG585 pg 212). + */ + for_each_sg(sgt->sgl, sg, sgt->nents, i) { + if ((sg->offset % 8) || (sg->length % 4)) { + dev_err(&mgr->dev, + "Invalid bitstream, chunks must be aligned\n"); + return -EINVAL; + } + } - memcpy(kbuf, buf, count); + priv->dma_nelms = + dma_map_sg(mgr->dev.parent, sgt->sgl, sgt->nents, DMA_TO_DEVICE); + if (priv->dma_nelms == 0) { + dev_err(&mgr->dev, "Unable to DMA map (TO_DEVICE)\n"); + return -ENOMEM; + } /* enable clock */ err = clk_enable(priv->clk); @@ -316,28 +404,31 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr, goto out_free; zynq_fpga_write(priv, INT_STS_OFFSET, IXR_ALL_MASK); - reinit_completion(&priv->dma_done); - /* enable DMA and error IRQs */ - zynq_fpga_set_irq(priv, IXR_D_P_DONE_MASK | IXR_ERROR_FLAGS_MASK); - - /* the +1 in the src addr is used to hold off on DMA_DONE IRQ - * until both AXI and PCAP are done ... - */ - zynq_fpga_write(priv, DMA_SRC_ADDR_OFFSET, (u32)(dma_addr) + 1); - zynq_fpga_write(priv, DMA_DST_ADDR_OFFSET, (u32)DMA_INVALID_ADDRESS); - - /* convert #bytes to #words */ - transfer_length = (count + 3) / 4; + /* zynq_step_dma will turn on interrupts */ + spin_lock_irqsave(&priv->dma_lock, flags); + priv->dma_elm = 0; + priv->cur_sg = sgt->sgl; + zynq_step_dma(priv); + spin_unlock_irqrestore(&priv->dma_lock, flags); - zynq_fpga_write(priv, DMA_SRC_LEN_OFFSET, transfer_length); - zynq_fpga_write(priv, DMA_DEST_LEN_OFFSET, 0); + timeout = wait_for_completion_timeout(&priv->dma_done, + msecs_to_jiffies(DMA_TIMEOUT_MS)); - wait_for_completion(&priv->dma_done); + spin_lock_irqsave(&priv->dma_lock, flags); + zynq_fpga_set_irq(priv, 0); + priv->cur_sg = NULL; + spin_unlock_irqrestore(&priv->dma_lock, flags); intr_status = zynq_fpga_read(priv, INT_STS_OFFSET); - zynq_fpga_write(priv, INT_STS_OFFSET, intr_status); + zynq_fpga_write(priv, INT_STS_OFFSET, IXR_ALL_MASK); + + /* There doesn't seem to be a way to force cancel any DMA, so if + * something went wrong we are relying on the hardware to have halted + * the DMA before we get here, if there was we could use + * wait_for_completion_interruptible too. + */ if (intr_status & IXR_ERROR_FLAGS_MASK) { why = "DMA reported error"; @@ -345,8 +436,12 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr, goto out_report; } - if (!((intr_status & IXR_D_P_DONE_MASK) == IXR_D_P_DONE_MASK)) { - why = "DMA did not complete"; + if (priv->cur_sg || + !((intr_status & IXR_D_P_DONE_MASK) == IXR_D_P_DONE_MASK)) { + if (timeout == 0) + why = "DMA timed out"; + else + why = "DMA did not complete"; err = -EIO; goto out_report; } @@ -369,7 +464,7 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr, clk_disable(priv->clk); out_free: - dma_free_coherent(mgr->dev.parent, count, kbuf, dma_addr); + dma_unmap_sg(mgr->dev.parent, sgt->sgl, sgt->nents, DMA_TO_DEVICE); return err; } @@ -433,7 +528,7 @@ static const struct fpga_manager_ops zynq_fpga_ops = { .initial_header_size = 128, .state = zynq_fpga_ops_state, .write_init = zynq_fpga_ops_write_init, - .write = zynq_fpga_ops_write, + .write_sg = zynq_fpga_ops_write, .write_complete = zynq_fpga_ops_write_complete, }; @@ -447,6 +542,7 @@ static int zynq_fpga_probe(struct platform_device *pdev) priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; + spin_lock_init(&priv->dma_lock); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); priv->io_base = devm_ioremap_resource(dev, res);