Message ID | 20210427131344.139443-9-senozhatsky@chromium.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | videobuf2: support new noncontiguous DMA API | expand |
On Tue, Apr 27, 2021 at 10:13:43PM +0900, Sergey Senozhatsky wrote: > This adds support for new noncontiguous DMA API, which > requires allocators to have two execution branches: one > for the current API, and one for the new one. > > Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org> > [hch: untested conversion to the ne API] > Signed-off-by: Christoph Hellwig <hch@lst.de> I think you can skip my signoff and the note here for the trivial conversion. The code looks fine to me from the DMA API perspective: Acked-by: Christoph Hellwig <hch@lst.de>
On 27/04/2021 15:13, Sergey Senozhatsky wrote: > This adds support for new noncontiguous DMA API, which > requires allocators to have two execution branches: one > for the current API, and one for the new one. > > Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org> > [hch: untested conversion to the ne API] > Signed-off-by: Christoph Hellwig <hch@lst.de> > --- > .../common/videobuf2/videobuf2-dma-contig.c | 140 +++++++++++++++--- > 1 file changed, 116 insertions(+), 24 deletions(-) > > diff --git a/drivers/media/common/videobuf2/videobuf2-dma-contig.c b/drivers/media/common/videobuf2/videobuf2-dma-contig.c > index 1e218bc440c6..40eaaef1565b 100644 > --- a/drivers/media/common/videobuf2/videobuf2-dma-contig.c > +++ b/drivers/media/common/videobuf2/videobuf2-dma-contig.c > @@ -17,6 +17,7 @@ > #include <linux/sched.h> > #include <linux/slab.h> > #include <linux/dma-mapping.h> > +#include <linux/highmem.h> > > #include <media/videobuf2-v4l2.h> > #include <media/videobuf2-dma-contig.h> > @@ -42,6 +43,7 @@ struct vb2_dc_buf { > struct dma_buf_attachment *db_attach; > > struct vb2_buffer *vb; > + bool coherent_mem; > }; > > /*********************************************/ > @@ -78,14 +80,22 @@ static void *vb2_dc_cookie(struct vb2_buffer *vb, void *buf_priv) > static void *vb2_dc_vaddr(struct vb2_buffer *vb, void *buf_priv) > { > struct vb2_dc_buf *buf = buf_priv; > - struct dma_buf_map map; > - int ret; > > - if (!buf->vaddr && buf->db_attach) { > - ret = dma_buf_vmap(buf->db_attach->dmabuf, &map); > - buf->vaddr = ret ? NULL : map.vaddr; > + if (buf->vaddr) > + return buf->vaddr; > + > + if (buf->db_attach) { > + struct dma_buf_map map; > + > + if (!dma_buf_vmap(buf->db_attach->dmabuf, &map)) > + buf->vaddr = map.vaddr; > + > + return buf->vaddr; > } > > + /* Non-coherent memory */ > + buf->vaddr = dma_vmap_noncontiguous(buf->dev, buf->size, buf->dma_sgt); > + This function can use some comments. What is happening AFAICS is that buf->vaddr is either set in vb2_dc_alloc_coherent (unless DMA_ATTR_NO_KERNEL_MAPPING was set), it is obtained through dma_buf_vmap() if the buffer was attached to a dma_buf, or it is allocated via dma_vmap_noncontiguous() for non-coherent memory. But this leaves coherent memory with DMA_ATTR_NO_KERNEL_MAPPING set, what is vaddr in that case? I think it will call dma_vmap_noncontiguous() incorrectly in that case, shouldn't there be a check for !buf->coherent_mem before the call to dma_vmap_noncontiguous()? It's quite confusing since vaddr can be set in different functions. > return buf->vaddr; > } > > @@ -101,13 +111,26 @@ static void vb2_dc_prepare(void *buf_priv) > struct vb2_dc_buf *buf = buf_priv; > struct sg_table *sgt = buf->dma_sgt; > > + /* This takes care of DMABUF and user-enforced cache sync hint */ > if (buf->vb->skip_cache_sync_on_prepare) > return; > > + /* > + * Coherent MMAP buffers do not need to be synced, unlike USERPTR > + * and non-coherent MMAP buffers. > + */ > + if (buf->vb->memory == V4L2_MEMORY_MMAP && buf->coherent_mem) > + return; > + > if (!sgt) > return; > > + /* For both USERPTR and non-coherent MMAP */ > dma_sync_sgtable_for_device(buf->dev, sgt, buf->dma_dir); > + > + /* Non-coherent MMAP only */ > + if (!buf->coherent_mem && buf->vaddr) > + flush_kernel_vmap_range(buf->vaddr, buf->size); > } > > static void vb2_dc_finish(void *buf_priv) > @@ -115,19 +138,46 @@ static void vb2_dc_finish(void *buf_priv) > struct vb2_dc_buf *buf = buf_priv; > struct sg_table *sgt = buf->dma_sgt; > > + /* This takes care of DMABUF and user-enforced cache sync hint */ > if (buf->vb->skip_cache_sync_on_finish) > return; > > + /* > + * Coherent MMAP buffers do not need to be synced, unlike USERPTR > + * and non-coherent MMAP buffers. > + */ > + if (buf->vb->memory == V4L2_MEMORY_MMAP && buf->coherent_mem) > + return; > + > if (!sgt) > return; > > + /* For both USERPTR and non-coherent MMAP */ > dma_sync_sgtable_for_cpu(buf->dev, sgt, buf->dma_dir); > + > + /* Non-coherent MMAP only */ > + if (!buf->coherent_mem && buf->vaddr) > + invalidate_kernel_vmap_range(buf->vaddr, buf->size); > } > > /*********************************************/ > /* callbacks for MMAP buffers */ > /*********************************************/ > > +static void __vb2_dc_put(struct vb2_dc_buf *buf) > +{ > + if (buf->coherent_mem) { > + dma_free_attrs(buf->dev, buf->size, buf->cookie, > + buf->dma_addr, buf->attrs); > + return; > + } > + > + if (buf->vaddr) > + dma_vunmap_noncontiguous(buf->dev, buf->vaddr); > + dma_free_noncontiguous(buf->dev, buf->size, > + buf->dma_sgt, buf->dma_addr); > +} Is there a reason for creating __vb2_dc_put()? I found it more a hindrance when reviewing than an advantage. I prefer to have it moved to vb2_dc_put, that way all the clean up happens in that single function. > + > static void vb2_dc_put(void *buf_priv) > { > struct vb2_dc_buf *buf = buf_priv; > @@ -139,17 +189,52 @@ static void vb2_dc_put(void *buf_priv) > sg_free_table(buf->sgt_base); > kfree(buf->sgt_base); > } > - dma_free_attrs(buf->dev, buf->size, buf->cookie, buf->dma_addr, > - buf->attrs); > + __vb2_dc_put(buf); > put_device(buf->dev); > kfree(buf); > } > > +static int vb2_dc_alloc_coherent(struct vb2_dc_buf *buf) > +{ > + struct vb2_queue *q = buf->vb->vb2_queue; > + > + buf->cookie = dma_alloc_attrs(buf->dev, > + buf->size, > + &buf->dma_addr, > + GFP_KERNEL | q->gfp_flags, > + buf->attrs); > + if (!buf->cookie) > + return -ENOMEM; > + if ((q->dma_attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0) > + buf->vaddr = buf->cookie; > + return 0; > +} > + > +static int vb2_dc_alloc_non_coherent(struct vb2_dc_buf *buf) > +{ > + struct vb2_queue *q = buf->vb->vb2_queue; > + > + buf->dma_sgt = dma_alloc_noncontiguous(buf->dev, > + buf->size, > + buf->dma_dir, > + GFP_KERNEL | q->gfp_flags, > + buf->attrs); > + if (!buf->dma_sgt) > + return -ENOMEM; > + /* > + * For requests that need kernel mapping (DMA_ATTR_NO_KERNEL_MAPPING I'm confused about this comment: DMA_ATTR_NO_KERNEL_MAPPING is only checked in vb2_dc_alloc_coherent(), so is this comment valid? > + * bit is cleared) we perform dma_vmap_noncontiguous() later, in > + * vb2_dc_vadd(). Typo: vb2_dc_vadd -> vb2_dc_vaddr > + */ So this leaves buf->vaddr to NULL. Is it possible that after allocating the buffer, the buffer is exported as a dma_buf and another device calls dma_buf_ops vb2_dc_dmabuf_ops_vmap, which in turn calls dma_buf_map_set_vaddr(map, buf->vaddr); with a NULL buf->vaddr? I may be barking up the wrong tree here, to really understand it I would have to spend more time. > + return 0; > +} > + > static void *vb2_dc_alloc(struct vb2_buffer *vb, > struct device *dev, > unsigned long size) > { > struct vb2_dc_buf *buf; > + int ret; > > if (WARN_ON(!dev)) > return ERR_PTR(-EINVAL); > @@ -159,27 +244,28 @@ static void *vb2_dc_alloc(struct vb2_buffer *vb, > return ERR_PTR(-ENOMEM); > > buf->attrs = vb->vb2_queue->dma_attrs; > - buf->cookie = dma_alloc_attrs(dev, size, &buf->dma_addr, > - GFP_KERNEL | vb->vb2_queue->gfp_flags, > - buf->attrs); > - if (!buf->cookie) { > - dev_err(dev, "dma_alloc_coherent of size %ld failed\n", size); > - kfree(buf); > - return ERR_PTR(-ENOMEM); > - } > - > - if ((buf->attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0) > - buf->vaddr = buf->cookie; > + buf->dma_dir = vb->vb2_queue->dma_dir; > + buf->vb = vb; > + buf->coherent_mem = vb->vb2_queue->coherent_mem; > > + buf->size = size; > /* Prevent the device from being released while the buffer is used */ > buf->dev = get_device(dev); > - buf->size = size; > - buf->dma_dir = vb->vb2_queue->dma_dir; > + > + if (buf->coherent_mem) > + ret = vb2_dc_alloc_coherent(buf); > + else > + ret = vb2_dc_alloc_non_coherent(buf); > + > + if (ret) { > + dev_err(dev, "dma alloc of size %ld failed\n", size); > + kfree(buf); > + return ERR_PTR(-ENOMEM); > + } > > buf->handler.refcount = &buf->refcount; > buf->handler.put = vb2_dc_put; > buf->handler.arg = buf; > - buf->vb = vb; > > refcount_set(&buf->refcount, 1); > > @@ -196,9 +282,12 @@ static int vb2_dc_mmap(void *buf_priv, struct vm_area_struct *vma) > return -EINVAL; > } > > - ret = dma_mmap_attrs(buf->dev, vma, buf->cookie, > - buf->dma_addr, buf->size, buf->attrs); > - > + if (buf->coherent_mem) > + ret = dma_mmap_attrs(buf->dev, vma, buf->cookie, buf->dma_addr, > + buf->size, buf->attrs); > + else > + ret = dma_mmap_noncontiguous(buf->dev, vma, buf->size, > + buf->dma_sgt); > if (ret) { > pr_err("Remapping memory failed, error: %d\n", ret); > return ret; > @@ -390,6 +479,9 @@ static struct sg_table *vb2_dc_get_base_sgt(struct vb2_dc_buf *buf) > int ret; > struct sg_table *sgt; > > + if (!buf->coherent_mem) > + return buf->dma_sgt; > + > sgt = kmalloc(sizeof(*sgt), GFP_KERNEL); > if (!sgt) { > dev_err(buf->dev, "failed to alloc sg table\n"); > Regards, Hans
On (21/06/03 13:59), Hans Verkuil wrote: [[.] > > static void *vb2_dc_vaddr(struct vb2_buffer *vb, void *buf_priv) > > { > > struct vb2_dc_buf *buf = buf_priv; > > - struct dma_buf_map map; > > - int ret; > > > > - if (!buf->vaddr && buf->db_attach) { > > - ret = dma_buf_vmap(buf->db_attach->dmabuf, &map); > > - buf->vaddr = ret ? NULL : map.vaddr; > > + if (buf->vaddr) > > + return buf->vaddr; > > + > > + if (buf->db_attach) { > > + struct dma_buf_map map; > > + > > + if (!dma_buf_vmap(buf->db_attach->dmabuf, &map)) > > + buf->vaddr = map.vaddr; > > + > > + return buf->vaddr; > > } > > > > + /* Non-coherent memory */ > > + buf->vaddr = dma_vmap_noncontiguous(buf->dev, buf->size, buf->dma_sgt); > > + > > This function can use some comments. What is happening AFAICS is that > buf->vaddr is either set in vb2_dc_alloc_coherent (unless > DMA_ATTR_NO_KERNEL_MAPPING was set), it is obtained through dma_buf_vmap() > if the buffer was attached to a dma_buf, or it is allocated via > dma_vmap_noncontiguous() for non-coherent memory. Yeah, it's complicated. Maybe we can make things more symmetrical. > But this leaves coherent memory with DMA_ATTR_NO_KERNEL_MAPPING set, what > is vaddr in that case? I think it will call dma_vmap_noncontiguous() > incorrectly in that case, shouldn't there be a check for !buf->coherent_mem > before the call to dma_vmap_noncontiguous()? Thanks a lot for looking into it. So vb2_dc_vaddr() can look like this: static void *vb2_dc_vaddr(struct vb2_buffer *vb, void *buf_priv) { struct vb2_dc_buf *buf = buf_priv; if (buf->vaddr) return buf->vaddr; if (buf->db_attach) { struct dma_buf_map map; if (!dma_buf_vmap(buf->db_attach->dmabuf, &map)) buf->vaddr = map.vaddr; return buf->vaddr; } if (!buf->coherent_mem) buf->vaddr = dma_vmap_noncontiguous(buf->dev, buf->size, buf->dma_sgt); return buf->vaddr; } And in vb2_dc_alloc functions set vaddr for !DMA_ATTR_NO_KERNEL_MAPPING in both coherent and non-coherent. So that we probably can have less branches when ->vaddr is NULL for one type of allocations, and is not NULL for another. static int vb2_dc_alloc_coherent(struct vb2_dc_buf *buf) { struct vb2_queue *q = buf->vb->vb2_queue; buf->cookie = dma_alloc_attrs(buf->dev, buf->size, &buf->dma_addr, GFP_KERNEL | q->gfp_flags, buf->attrs); if (!buf->cookie) return -ENOMEM; if (q->dma_attrs & DMA_ATTR_NO_KERNEL_MAPPING) return 0; buf->vaddr = buf->cookie; return 0; } static int vb2_dc_alloc_non_coherent(struct vb2_dc_buf *buf) { struct vb2_queue *q = buf->vb->vb2_queue; buf->dma_sgt = dma_alloc_noncontiguous(buf->dev, buf->size, buf->dma_dir, GFP_KERNEL | q->gfp_flags, buf->attrs); if (!buf->dma_sgt) return -ENOMEM; if (q->dma_attrs & DMA_ATTR_NO_KERNEL_MAPPING) return 0; buf->vaddr = dma_vmap_noncontiguous(buf->dev, buf->size, buf->dma_sgt); if (!buf->vaddr) { dma_free_noncontiguous(buf->dev, buf->size, buf->dma_sgt, buf->dma_addr); return -ENOMEM; } return 0; }
On (21/06/03 13:59), Hans Verkuil wrote: > > > > +static void __vb2_dc_put(struct vb2_dc_buf *buf) > > +{ > > + if (buf->coherent_mem) { > > + dma_free_attrs(buf->dev, buf->size, buf->cookie, > > + buf->dma_addr, buf->attrs); > > + return; > > + } > > + > > + if (buf->vaddr) > > + dma_vunmap_noncontiguous(buf->dev, buf->vaddr); > > + dma_free_noncontiguous(buf->dev, buf->size, > > + buf->dma_sgt, buf->dma_addr); > > +} > > Is there a reason for creating __vb2_dc_put()? I found it more > a hindrance when reviewing than an advantage. I prefer to have > it moved to vb2_dc_put, that way all the clean up happens in that > single function. Done.
On Thu, Jun 17, 2021 at 04:56:17PM +0900, Sergey Senozhatsky wrote: > > This function can use some comments. What is happening AFAICS is that > > buf->vaddr is either set in vb2_dc_alloc_coherent (unless > > DMA_ATTR_NO_KERNEL_MAPPING was set), it is obtained through dma_buf_vmap() > > if the buffer was attached to a dma_buf, or it is allocated via > > dma_vmap_noncontiguous() for non-coherent memory. > > Yeah, it's complicated. Maybe we can make things more symmetrical. > > > But this leaves coherent memory with DMA_ATTR_NO_KERNEL_MAPPING set, what > > is vaddr in that case? I think it will call dma_vmap_noncontiguous() > > incorrectly in that case, shouldn't there be a check for !buf->coherent_mem > > before the call to dma_vmap_noncontiguous()? > > Thanks a lot for looking into it. Can we just kill off DMA_ATTR_NO_KERNEL_MAPPING in v4l now? There really is no good reason to use that with dma_alloc_noncoherent/noncontiguous available, and I plan to eventually remove that interface entirely.
On Thu, Jun 17, 2021 at 5:01 PM Christoph Hellwig <hch@lst.de> wrote: > > On Thu, Jun 17, 2021 at 04:56:17PM +0900, Sergey Senozhatsky wrote: > > > This function can use some comments. What is happening AFAICS is that > > > buf->vaddr is either set in vb2_dc_alloc_coherent (unless > > > DMA_ATTR_NO_KERNEL_MAPPING was set), it is obtained through dma_buf_vmap() > > > if the buffer was attached to a dma_buf, or it is allocated via > > > dma_vmap_noncontiguous() for non-coherent memory. > > > > Yeah, it's complicated. Maybe we can make things more symmetrical. > > > > > But this leaves coherent memory with DMA_ATTR_NO_KERNEL_MAPPING set, what > > > is vaddr in that case? I think it will call dma_vmap_noncontiguous() > > > incorrectly in that case, shouldn't there be a check for !buf->coherent_mem > > > before the call to dma_vmap_noncontiguous()? > > > > Thanks a lot for looking into it. > > Can we just kill off DMA_ATTR_NO_KERNEL_MAPPING in v4l now? There really is > no good reason to use that with dma_alloc_noncoherent/noncontiguous > available, and I plan to eventually remove that interface entirely. We still have use cases for dma_alloc_coherent() and DMA_ATTR_NO_KERNEL_MAPPING. Perhaps the way to handle this would be to make the dma_alloc_coherent() behave the same way as dma_alloc_noncontiguous(), where it just allocates the memory without handling the kernel mapping? Best regards, Tomasz
On Thu, Jun 17, 2021 at 05:30:11PM +0900, Tomasz Figa wrote:
> We still have use cases for dma_alloc_coherent() and DMA_ATTR_NO_KERNEL_MAPPING.
dma_alloc_coherent does not take a flags argument, so you can't use
DMA_ATTR_NO_KERNEL_MAPPING with it. What would your use case be here
anyway? In general DMA_ATTR_NO_KERNEL_MAPPING is rather misnamed, as
usually there is a kernel mapping, just not one that is coherent and
should be used.
On Thu, Jun 17, 2021 at 5:52 PM Christoph Hellwig <hch@lst.de> wrote: > > On Thu, Jun 17, 2021 at 05:30:11PM +0900, Tomasz Figa wrote: > > We still have use cases for dma_alloc_coherent() and DMA_ATTR_NO_KERNEL_MAPPING. > > dma_alloc_coherent does not take a flags argument, so you can't use > DMA_ATTR_NO_KERNEL_MAPPING with it. What would your use case be here > anyway? In general DMA_ATTR_NO_KERNEL_MAPPING is rather misnamed, as > usually there is a kernel mapping, just not one that is coherent and > should be used. Sorry, I meant dma_alloc_attrs() and yes, it's indeed a misnomer. Our use case basically has no need for the additional coherent mapping, so creation of it can be skipped to save some vmalloc space. (Yes, it probably only matters for 32-bit architectures.)
On Thu, Jun 17, 2021 at 06:40:58PM +0900, Tomasz Figa wrote: > Sorry, I meant dma_alloc_attrs() and yes, it's indeed a misnomer. Our > use case basically has no need for the additional coherent mapping, so > creation of it can be skipped to save some vmalloc space. (Yes, it > probably only matters for 32-bit architectures.) Yes, that is the normal use case, and it is solved by using dma_alloc_noncoherent or dma_alloc_noncontigous without the vmap step.
On Thu, Jun 17, 2021 at 7:07 PM Christoph Hellwig <hch@lst.de> wrote: > > On Thu, Jun 17, 2021 at 06:40:58PM +0900, Tomasz Figa wrote: > > Sorry, I meant dma_alloc_attrs() and yes, it's indeed a misnomer. Our > > use case basically has no need for the additional coherent mapping, so > > creation of it can be skipped to save some vmalloc space. (Yes, it > > probably only matters for 32-bit architectures.) > > Yes, that is the normal use case, and it is solved by using > dma_alloc_noncoherent or dma_alloc_noncontigous without the vmap > step. True, silly me. Probably not enough coffee at the time I was looking at it. With that, wouldn't it be possible to completely get rid of dma_alloc_{coherent,attrs}() and use dma_alloc_noncontiguous() + optional kernel and/or userspace mapping helper everywhere? (Possibly renaming it to something as simple as dma_alloc().
On Fri, Jun 18, 2021 at 12:21:33PM +0900, Tomasz Figa wrote: > > On Thu, Jun 17, 2021 at 06:40:58PM +0900, Tomasz Figa wrote: > > > Sorry, I meant dma_alloc_attrs() and yes, it's indeed a misnomer. Our > > > use case basically has no need for the additional coherent mapping, so > > > creation of it can be skipped to save some vmalloc space. (Yes, it > > > probably only matters for 32-bit architectures.) > > > > Yes, that is the normal use case, and it is solved by using > > dma_alloc_noncoherent or dma_alloc_noncontigous without the vmap > > step. > > True, silly me. Probably not enough coffee at the time I was looking at it. > > With that, wouldn't it be possible to completely get rid of > dma_alloc_{coherent,attrs}() and use dma_alloc_noncontiguous() + > optional kernel and/or userspace mapping helper everywhere? (Possibly > renaming it to something as simple as dma_alloc(). Well, dma_alloc_coherent users want a non-cached mapping. And while some architectures provide that using a vmap with "uncached" bits in the PTE to provide that, this: a) is not possibly everywhere b) even where possible is not always the best idea as it creates mappings with differnet cachability bets And even without that dma_alloc_noncoherent causes less overhead than dma_alloc_noncontigious if you only need a single contiguous range. So while I'm happy we have something useful for more complex drivers like v4l I think the simple dma_alloc_coherent API, including some of the less crazy flags for dma_alloc_attrs is the right thing to use for more than 90% of the use cases.
On Fri, Jun 18, 2021 at 1:25 PM Christoph Hellwig <hch@lst.de> wrote: > > On Fri, Jun 18, 2021 at 12:21:33PM +0900, Tomasz Figa wrote: > > > On Thu, Jun 17, 2021 at 06:40:58PM +0900, Tomasz Figa wrote: > > > > Sorry, I meant dma_alloc_attrs() and yes, it's indeed a misnomer. Our > > > > use case basically has no need for the additional coherent mapping, so > > > > creation of it can be skipped to save some vmalloc space. (Yes, it > > > > probably only matters for 32-bit architectures.) > > > > > > Yes, that is the normal use case, and it is solved by using > > > dma_alloc_noncoherent or dma_alloc_noncontigous without the vmap > > > step. > > > > True, silly me. Probably not enough coffee at the time I was looking at it. > > > > With that, wouldn't it be possible to completely get rid of > > dma_alloc_{coherent,attrs}() and use dma_alloc_noncontiguous() + > > optional kernel and/or userspace mapping helper everywhere? (Possibly > > renaming it to something as simple as dma_alloc(). > > Well, dma_alloc_coherent users want a non-cached mapping. And while > some architectures provide that using a vmap with "uncached" bits in the > PTE to provide that, this: > > a) is not possibly everywhere > b) even where possible is not always the best idea as it creates mappings > with differnet cachability bets I think this could be addressed by having a dma_vmap() helper that does the right thing, whether it's vmap() or dma_common_pages_remap() as appropriate. Or would be this still insufficient for some architectures? > > And even without that dma_alloc_noncoherent causes less overhead than > dma_alloc_noncontigious if you only need a single contiguous range. > Given that behind the scenes dma_alloc_noncontiguous() would also just call __dma_alloc_pages() for devices that need contiguous pages, would the overhead be basically the creation of a single-entry sgtable? > So while I'm happy we have something useful for more complex drivers like > v4l I think the simple dma_alloc_coherent API, including some of the less > crazy flags for dma_alloc_attrs is the right thing to use for more than > 90% of the use cases. One thing to take into account here is that many drivers use the existing "simple" way, just because there wasn't a viable alternative to do something better. Agreed, though, that we shouldn't optimize for the rare cases. Best regards, Tomasz
On Fri, Jun 18, 2021 at 01:44:08PM +0900, Tomasz Figa wrote: > > Well, dma_alloc_coherent users want a non-cached mapping. And while > > some architectures provide that using a vmap with "uncached" bits in the > > PTE to provide that, this: > > > > a) is not possibly everywhere > > b) even where possible is not always the best idea as it creates mappings > > with differnet cachability bets > > I think this could be addressed by having a dma_vmap() helper that > does the right thing, whether it's vmap() or dma_common_pages_remap() > as appropriate. Or would be this still insufficient for some > architectures? It can't always do the right thing. E.g. for the case where uncached memory needs to be allocated from a special boot time fixed pool. > > And even without that dma_alloc_noncoherent causes less overhead than > > dma_alloc_noncontigious if you only need a single contiguous range. > > > > Given that behind the scenes dma_alloc_noncontiguous() would also just > call __dma_alloc_pages() for devices that need contiguous pages, would > the overhead be basically the creation of a single-entry sgtable? In the best case: yes. > > So while I'm happy we have something useful for more complex drivers like > > v4l I think the simple dma_alloc_coherent API, including some of the less > > crazy flags for dma_alloc_attrs is the right thing to use for more than > > 90% of the use cases. > > One thing to take into account here is that many drivers use the > existing "simple" way, just because there wasn't a viable alternative > to do something better. Agreed, though, that we shouldn't optimize for > the rare cases. While that might be true for a few drivers, it is absolutely not true for the wide majority. I think you media people are a little special, with only the GPU folks contending for "specialness" :) (although media handles it way better, gpu folks just create local hacks that can't work portably).
Hi Hans, On (21/06/17 16:56), Sergey Senozhatsky wrote: [..] > static void *vb2_dc_vaddr(struct vb2_buffer *vb, void *buf_priv) > { > struct vb2_dc_buf *buf = buf_priv; > > if (buf->vaddr) > return buf->vaddr; > > if (buf->db_attach) { > struct dma_buf_map map; > > if (!dma_buf_vmap(buf->db_attach->dmabuf, &map)) > buf->vaddr = map.vaddr; > > return buf->vaddr; > } > > if (!buf->coherent_mem) > buf->vaddr = dma_vmap_noncontiguous(buf->dev, buf->size, > buf->dma_sgt); > return buf->vaddr; > } > > And in vb2_dc_alloc functions set vaddr for !DMA_ATTR_NO_KERNEL_MAPPING > in both coherent and non-coherent. So that we probably can have less > branches when ->vaddr is NULL for one type of allocations, and is not > NULL for another. > > static int vb2_dc_alloc_coherent(struct vb2_dc_buf *buf) > { > struct vb2_queue *q = buf->vb->vb2_queue; > > buf->cookie = dma_alloc_attrs(buf->dev, > buf->size, > &buf->dma_addr, > GFP_KERNEL | q->gfp_flags, > buf->attrs); > if (!buf->cookie) > return -ENOMEM; > > if (q->dma_attrs & DMA_ATTR_NO_KERNEL_MAPPING) > return 0; > > buf->vaddr = buf->cookie; > return 0; > } > > static int vb2_dc_alloc_non_coherent(struct vb2_dc_buf *buf) > { > struct vb2_queue *q = buf->vb->vb2_queue; > > buf->dma_sgt = dma_alloc_noncontiguous(buf->dev, > buf->size, > buf->dma_dir, > GFP_KERNEL | q->gfp_flags, > buf->attrs); > if (!buf->dma_sgt) > return -ENOMEM; > > if (q->dma_attrs & DMA_ATTR_NO_KERNEL_MAPPING) > return 0; > > buf->vaddr = dma_vmap_noncontiguous(buf->dev, buf->size, buf->dma_sgt); > if (!buf->vaddr) { > dma_free_noncontiguous(buf->dev, buf->size, > buf->dma_sgt, buf->dma_addr); > return -ENOMEM; > } > return 0; > } I guess this should address the case when "after allocating the buffer, the buffer is exported as a dma_buf and another device calls dma_buf_ops vb2_dc_dmabuf_ops_vmap, which in turn calls dma_buf_map_set_vaddr(map, buf->vaddr); with a NULL buf->vaddr" Because ->vaddr will not be NULL now after allocation for both coherent and non-coherent buffers (modulo DMA_ATTR_NO_KERNEL_MAPPING requests). What do you think?
On Tue, Jun 22, 2021 at 4:33 PM Christoph Hellwig <hch@lst.de> wrote: > > On Fri, Jun 18, 2021 at 01:44:08PM +0900, Tomasz Figa wrote: > > > Well, dma_alloc_coherent users want a non-cached mapping. And while > > > some architectures provide that using a vmap with "uncached" bits in the > > > PTE to provide that, this: > > > > > > a) is not possibly everywhere > > > b) even where possible is not always the best idea as it creates mappings > > > with differnet cachability bets > > > > I think this could be addressed by having a dma_vmap() helper that > > does the right thing, whether it's vmap() or dma_common_pages_remap() > > as appropriate. Or would be this still insufficient for some > > architectures? > > It can't always do the right thing. E.g. for the case where uncached > memory needs to be allocated from a special boot time fixed pool. > Fair enough. Thanks for elaborating. > > > And even without that dma_alloc_noncoherent causes less overhead than > > > dma_alloc_noncontigious if you only need a single contiguous range. > > > > > > > Given that behind the scenes dma_alloc_noncontiguous() would also just > > call __dma_alloc_pages() for devices that need contiguous pages, would > > the overhead be basically the creation of a single-entry sgtable? > > In the best case: yes. > > > > So while I'm happy we have something useful for more complex drivers like > > > v4l I think the simple dma_alloc_coherent API, including some of the less > > > crazy flags for dma_alloc_attrs is the right thing to use for more than > > > 90% of the use cases. > > > > One thing to take into account here is that many drivers use the > > existing "simple" way, just because there wasn't a viable alternative > > to do something better. Agreed, though, that we shouldn't optimize for > > the rare cases. > > While that might be true for a few drivers, it is absolutely not true > for the wide majority. I think you media people are a little special, > with only the GPU folks contending for "specialness" :) (although > media handles it way better, gpu folks just create local hacks that > can't work portably). I don't have the evidence to argue, so let's just leave it at "time will tell". I think it's great that we have the possibility to do the more special things and we can see where it goes from now on. :) Best regards, Tomasz
On Fri, Jun 25, 2021 at 12:10 PM Sergey Senozhatsky <senozhatsky@chromium.org> wrote: > > Hi Hans, > > On (21/06/17 16:56), Sergey Senozhatsky wrote: > [..] > > static void *vb2_dc_vaddr(struct vb2_buffer *vb, void *buf_priv) > > { > > struct vb2_dc_buf *buf = buf_priv; > > > > if (buf->vaddr) > > return buf->vaddr; > > > > if (buf->db_attach) { > > struct dma_buf_map map; > > > > if (!dma_buf_vmap(buf->db_attach->dmabuf, &map)) > > buf->vaddr = map.vaddr; > > > > return buf->vaddr; > > } > > > > if (!buf->coherent_mem) > > buf->vaddr = dma_vmap_noncontiguous(buf->dev, buf->size, > > buf->dma_sgt); > > return buf->vaddr; > > } > > > > And in vb2_dc_alloc functions set vaddr for !DMA_ATTR_NO_KERNEL_MAPPING > > in both coherent and non-coherent. So that we probably can have less > > branches when ->vaddr is NULL for one type of allocations, and is not > > NULL for another. I'd prefer if it stayed as is. This opportunistic mapping as in the current revision is quite nice, because most of the drivers don't bother to set DMA_ATTR_NO_KERNEL_MAPPING even if they don't need the kernel mapping. Also, even if the driver itself doesn't need the kernel mapping, we can still create one on demand if the DMA-buf importer demands it from us. > > > > static int vb2_dc_alloc_coherent(struct vb2_dc_buf *buf) > > { > > struct vb2_queue *q = buf->vb->vb2_queue; > > > > buf->cookie = dma_alloc_attrs(buf->dev, > > buf->size, > > &buf->dma_addr, > > GFP_KERNEL | q->gfp_flags, > > buf->attrs); > > if (!buf->cookie) > > return -ENOMEM; > > > > if (q->dma_attrs & DMA_ATTR_NO_KERNEL_MAPPING) > > return 0; > > > > buf->vaddr = buf->cookie; > > return 0; > > } > > > > static int vb2_dc_alloc_non_coherent(struct vb2_dc_buf *buf) > > { > > struct vb2_queue *q = buf->vb->vb2_queue; > > > > buf->dma_sgt = dma_alloc_noncontiguous(buf->dev, > > buf->size, > > buf->dma_dir, > > GFP_KERNEL | q->gfp_flags, > > buf->attrs); > > if (!buf->dma_sgt) > > return -ENOMEM; > > > > if (q->dma_attrs & DMA_ATTR_NO_KERNEL_MAPPING) > > return 0; > > > > buf->vaddr = dma_vmap_noncontiguous(buf->dev, buf->size, buf->dma_sgt); > > if (!buf->vaddr) { > > dma_free_noncontiguous(buf->dev, buf->size, > > buf->dma_sgt, buf->dma_addr); > > return -ENOMEM; > > } > > return 0; > > } > > I guess this should address the case when > > "after allocating the buffer, the buffer is exported as a dma_buf and > another device calls dma_buf_ops vb2_dc_dmabuf_ops_vmap, which in turn > calls dma_buf_map_set_vaddr(map, buf->vaddr); with a NULL buf->vaddr" Sorry, I fail to get what this is about. Where does this quote come from? > > Because ->vaddr will not be NULL now after allocation for both coherent > and non-coherent buffers (modulo DMA_ATTR_NO_KERNEL_MAPPING requests). > > What do you think? Hans, any feedback on this? Thanks. Best regards, Tomasz
On (21/07/07 21:48), Tomasz Figa wrote: > > [..] > > > static void *vb2_dc_vaddr(struct vb2_buffer *vb, void *buf_priv) > > > { > > > struct vb2_dc_buf *buf = buf_priv; > > > > > > if (buf->vaddr) > > > return buf->vaddr; > > > > > > if (buf->db_attach) { > > > struct dma_buf_map map; > > > > > > if (!dma_buf_vmap(buf->db_attach->dmabuf, &map)) > > > buf->vaddr = map.vaddr; > > > > > > return buf->vaddr; > > > } > > > > > > if (!buf->coherent_mem) > > > buf->vaddr = dma_vmap_noncontiguous(buf->dev, buf->size, > > > buf->dma_sgt); > > > return buf->vaddr; > > > } > > > > > > And in vb2_dc_alloc functions set vaddr for !DMA_ATTR_NO_KERNEL_MAPPING > > > in both coherent and non-coherent. So that we probably can have less > > > branches when ->vaddr is NULL for one type of allocations, and is not > > > NULL for another. > > I'd prefer if it stayed as is. This opportunistic mapping as in the > current revision is quite nice, because most of the drivers don't > bother to set DMA_ATTR_NO_KERNEL_MAPPING even if they don't need the > kernel mapping. Also, even if the driver itself doesn't need the > kernel mapping, we can still create one on demand if the DMA-buf > importer demands it from us. [..] > > > static int vb2_dc_alloc_coherent(struct vb2_dc_buf *buf) > > > { > > > struct vb2_queue *q = buf->vb->vb2_queue; > > > > > > buf->cookie = dma_alloc_attrs(buf->dev, > > > buf->size, > > > &buf->dma_addr, > > > GFP_KERNEL | q->gfp_flags, > > > buf->attrs); > > > if (!buf->cookie) > > > return -ENOMEM; > > > > > > if (q->dma_attrs & DMA_ATTR_NO_KERNEL_MAPPING) > > > return 0; > > > > > > buf->vaddr = buf->cookie; > > > return 0; > > > } > > > > > > static int vb2_dc_alloc_non_coherent(struct vb2_dc_buf *buf) > > > { > > > struct vb2_queue *q = buf->vb->vb2_queue; > > > > > > buf->dma_sgt = dma_alloc_noncontiguous(buf->dev, > > > buf->size, > > > buf->dma_dir, > > > GFP_KERNEL | q->gfp_flags, > > > buf->attrs); > > > if (!buf->dma_sgt) > > > return -ENOMEM; > > > > > > if (q->dma_attrs & DMA_ATTR_NO_KERNEL_MAPPING) > > > return 0; > > > > > > buf->vaddr = dma_vmap_noncontiguous(buf->dev, buf->size, buf->dma_sgt); > > > if (!buf->vaddr) { > > > dma_free_noncontiguous(buf->dev, buf->size, > > > buf->dma_sgt, buf->dma_addr); > > > return -ENOMEM; > > > } > > > return 0; > > > } > > > > I guess this should address the case when > > > > "after allocating the buffer, the buffer is exported as a dma_buf and > > another device calls dma_buf_ops vb2_dc_dmabuf_ops_vmap, which in turn > > calls dma_buf_map_set_vaddr(map, buf->vaddr); with a NULL buf->vaddr" > > Sorry, I fail to get what this is about. Where does this quote come from? Bottom half of https://lore.kernel.org/lkml/10a0903a-e295-5cba-683a-1eb89a0804ed@xs4all.nl/
On Wed, Jul 7, 2021 at 10:29 PM Sergey Senozhatsky <senozhatsky@chromium.org> wrote: > > On (21/07/07 21:48), Tomasz Figa wrote: > > > [..] > > > > static void *vb2_dc_vaddr(struct vb2_buffer *vb, void *buf_priv) > > > > { > > > > struct vb2_dc_buf *buf = buf_priv; > > > > > > > > if (buf->vaddr) > > > > return buf->vaddr; > > > > > > > > if (buf->db_attach) { > > > > struct dma_buf_map map; > > > > > > > > if (!dma_buf_vmap(buf->db_attach->dmabuf, &map)) > > > > buf->vaddr = map.vaddr; > > > > > > > > return buf->vaddr; > > > > } > > > > > > > > if (!buf->coherent_mem) > > > > buf->vaddr = dma_vmap_noncontiguous(buf->dev, buf->size, > > > > buf->dma_sgt); > > > > return buf->vaddr; > > > > } > > > > > > > > And in vb2_dc_alloc functions set vaddr for !DMA_ATTR_NO_KERNEL_MAPPING > > > > in both coherent and non-coherent. So that we probably can have less > > > > branches when ->vaddr is NULL for one type of allocations, and is not > > > > NULL for another. > > > > I'd prefer if it stayed as is. This opportunistic mapping as in the > > current revision is quite nice, because most of the drivers don't > > bother to set DMA_ATTR_NO_KERNEL_MAPPING even if they don't need the > > kernel mapping. Also, even if the driver itself doesn't need the > > kernel mapping, we can still create one on demand if the DMA-buf > > importer demands it from us. > > [..] > > > > > static int vb2_dc_alloc_coherent(struct vb2_dc_buf *buf) > > > > { > > > > struct vb2_queue *q = buf->vb->vb2_queue; > > > > > > > > buf->cookie = dma_alloc_attrs(buf->dev, > > > > buf->size, > > > > &buf->dma_addr, > > > > GFP_KERNEL | q->gfp_flags, > > > > buf->attrs); > > > > if (!buf->cookie) > > > > return -ENOMEM; > > > > > > > > if (q->dma_attrs & DMA_ATTR_NO_KERNEL_MAPPING) > > > > return 0; > > > > > > > > buf->vaddr = buf->cookie; > > > > return 0; > > > > } > > > > > > > > static int vb2_dc_alloc_non_coherent(struct vb2_dc_buf *buf) > > > > { > > > > struct vb2_queue *q = buf->vb->vb2_queue; > > > > > > > > buf->dma_sgt = dma_alloc_noncontiguous(buf->dev, > > > > buf->size, > > > > buf->dma_dir, > > > > GFP_KERNEL | q->gfp_flags, > > > > buf->attrs); > > > > if (!buf->dma_sgt) > > > > return -ENOMEM; > > > > > > > > if (q->dma_attrs & DMA_ATTR_NO_KERNEL_MAPPING) > > > > return 0; > > > > > > > > buf->vaddr = dma_vmap_noncontiguous(buf->dev, buf->size, buf->dma_sgt); > > > > if (!buf->vaddr) { > > > > dma_free_noncontiguous(buf->dev, buf->size, > > > > buf->dma_sgt, buf->dma_addr); > > > > return -ENOMEM; > > > > } > > > > return 0; > > > > } > > > > > > I guess this should address the case when > > > > > > "after allocating the buffer, the buffer is exported as a dma_buf and > > > another device calls dma_buf_ops vb2_dc_dmabuf_ops_vmap, which in turn > > > calls dma_buf_map_set_vaddr(map, buf->vaddr); with a NULL buf->vaddr" > > > > Sorry, I fail to get what this is about. Where does this quote come from? > > Bottom half of https://lore.kernel.org/lkml/10a0903a-e295-5cba-683a-1eb89a0804ed@xs4all.nl/ I see, thanks for the pointer. Yes, vb2_dc_dmabuf_ops_vmap() needs to be changed so that it calls vb2_dc_vaddr() internally instead of relying on buf->vaddr directly.
On (21/07/07 23:10), Tomasz Figa wrote: > > > > > > > > I guess this should address the case when > > > > > > > > "after allocating the buffer, the buffer is exported as a dma_buf and > > > > another device calls dma_buf_ops vb2_dc_dmabuf_ops_vmap, which in turn > > > > calls dma_buf_map_set_vaddr(map, buf->vaddr); with a NULL buf->vaddr" > > > > > > Sorry, I fail to get what this is about. Where does this quote come from? > > > > Bottom half of https://lore.kernel.org/lkml/10a0903a-e295-5cba-683a-1eb89a0804ed@xs4all.nl/ > > I see, thanks for the pointer. Yes, vb2_dc_dmabuf_ops_vmap() needs to > be changed so that it calls vb2_dc_vaddr() internally instead of > relying on buf->vaddr directly. Done.
diff --git a/drivers/media/common/videobuf2/videobuf2-dma-contig.c b/drivers/media/common/videobuf2/videobuf2-dma-contig.c index 1e218bc440c6..40eaaef1565b 100644 --- a/drivers/media/common/videobuf2/videobuf2-dma-contig.c +++ b/drivers/media/common/videobuf2/videobuf2-dma-contig.c @@ -17,6 +17,7 @@ #include <linux/sched.h> #include <linux/slab.h> #include <linux/dma-mapping.h> +#include <linux/highmem.h> #include <media/videobuf2-v4l2.h> #include <media/videobuf2-dma-contig.h> @@ -42,6 +43,7 @@ struct vb2_dc_buf { struct dma_buf_attachment *db_attach; struct vb2_buffer *vb; + bool coherent_mem; }; /*********************************************/ @@ -78,14 +80,22 @@ static void *vb2_dc_cookie(struct vb2_buffer *vb, void *buf_priv) static void *vb2_dc_vaddr(struct vb2_buffer *vb, void *buf_priv) { struct vb2_dc_buf *buf = buf_priv; - struct dma_buf_map map; - int ret; - if (!buf->vaddr && buf->db_attach) { - ret = dma_buf_vmap(buf->db_attach->dmabuf, &map); - buf->vaddr = ret ? NULL : map.vaddr; + if (buf->vaddr) + return buf->vaddr; + + if (buf->db_attach) { + struct dma_buf_map map; + + if (!dma_buf_vmap(buf->db_attach->dmabuf, &map)) + buf->vaddr = map.vaddr; + + return buf->vaddr; } + /* Non-coherent memory */ + buf->vaddr = dma_vmap_noncontiguous(buf->dev, buf->size, buf->dma_sgt); + return buf->vaddr; } @@ -101,13 +111,26 @@ static void vb2_dc_prepare(void *buf_priv) struct vb2_dc_buf *buf = buf_priv; struct sg_table *sgt = buf->dma_sgt; + /* This takes care of DMABUF and user-enforced cache sync hint */ if (buf->vb->skip_cache_sync_on_prepare) return; + /* + * Coherent MMAP buffers do not need to be synced, unlike USERPTR + * and non-coherent MMAP buffers. + */ + if (buf->vb->memory == V4L2_MEMORY_MMAP && buf->coherent_mem) + return; + if (!sgt) return; + /* For both USERPTR and non-coherent MMAP */ dma_sync_sgtable_for_device(buf->dev, sgt, buf->dma_dir); + + /* Non-coherent MMAP only */ + if (!buf->coherent_mem && buf->vaddr) + flush_kernel_vmap_range(buf->vaddr, buf->size); } static void vb2_dc_finish(void *buf_priv) @@ -115,19 +138,46 @@ static void vb2_dc_finish(void *buf_priv) struct vb2_dc_buf *buf = buf_priv; struct sg_table *sgt = buf->dma_sgt; + /* This takes care of DMABUF and user-enforced cache sync hint */ if (buf->vb->skip_cache_sync_on_finish) return; + /* + * Coherent MMAP buffers do not need to be synced, unlike USERPTR + * and non-coherent MMAP buffers. + */ + if (buf->vb->memory == V4L2_MEMORY_MMAP && buf->coherent_mem) + return; + if (!sgt) return; + /* For both USERPTR and non-coherent MMAP */ dma_sync_sgtable_for_cpu(buf->dev, sgt, buf->dma_dir); + + /* Non-coherent MMAP only */ + if (!buf->coherent_mem && buf->vaddr) + invalidate_kernel_vmap_range(buf->vaddr, buf->size); } /*********************************************/ /* callbacks for MMAP buffers */ /*********************************************/ +static void __vb2_dc_put(struct vb2_dc_buf *buf) +{ + if (buf->coherent_mem) { + dma_free_attrs(buf->dev, buf->size, buf->cookie, + buf->dma_addr, buf->attrs); + return; + } + + if (buf->vaddr) + dma_vunmap_noncontiguous(buf->dev, buf->vaddr); + dma_free_noncontiguous(buf->dev, buf->size, + buf->dma_sgt, buf->dma_addr); +} + static void vb2_dc_put(void *buf_priv) { struct vb2_dc_buf *buf = buf_priv; @@ -139,17 +189,52 @@ static void vb2_dc_put(void *buf_priv) sg_free_table(buf->sgt_base); kfree(buf->sgt_base); } - dma_free_attrs(buf->dev, buf->size, buf->cookie, buf->dma_addr, - buf->attrs); + __vb2_dc_put(buf); put_device(buf->dev); kfree(buf); } +static int vb2_dc_alloc_coherent(struct vb2_dc_buf *buf) +{ + struct vb2_queue *q = buf->vb->vb2_queue; + + buf->cookie = dma_alloc_attrs(buf->dev, + buf->size, + &buf->dma_addr, + GFP_KERNEL | q->gfp_flags, + buf->attrs); + if (!buf->cookie) + return -ENOMEM; + if ((q->dma_attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0) + buf->vaddr = buf->cookie; + return 0; +} + +static int vb2_dc_alloc_non_coherent(struct vb2_dc_buf *buf) +{ + struct vb2_queue *q = buf->vb->vb2_queue; + + buf->dma_sgt = dma_alloc_noncontiguous(buf->dev, + buf->size, + buf->dma_dir, + GFP_KERNEL | q->gfp_flags, + buf->attrs); + if (!buf->dma_sgt) + return -ENOMEM; + /* + * For requests that need kernel mapping (DMA_ATTR_NO_KERNEL_MAPPING + * bit is cleared) we perform dma_vmap_noncontiguous() later, in + * vb2_dc_vadd(). + */ + return 0; +} + static void *vb2_dc_alloc(struct vb2_buffer *vb, struct device *dev, unsigned long size) { struct vb2_dc_buf *buf; + int ret; if (WARN_ON(!dev)) return ERR_PTR(-EINVAL); @@ -159,27 +244,28 @@ static void *vb2_dc_alloc(struct vb2_buffer *vb, return ERR_PTR(-ENOMEM); buf->attrs = vb->vb2_queue->dma_attrs; - buf->cookie = dma_alloc_attrs(dev, size, &buf->dma_addr, - GFP_KERNEL | vb->vb2_queue->gfp_flags, - buf->attrs); - if (!buf->cookie) { - dev_err(dev, "dma_alloc_coherent of size %ld failed\n", size); - kfree(buf); - return ERR_PTR(-ENOMEM); - } - - if ((buf->attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0) - buf->vaddr = buf->cookie; + buf->dma_dir = vb->vb2_queue->dma_dir; + buf->vb = vb; + buf->coherent_mem = vb->vb2_queue->coherent_mem; + buf->size = size; /* Prevent the device from being released while the buffer is used */ buf->dev = get_device(dev); - buf->size = size; - buf->dma_dir = vb->vb2_queue->dma_dir; + + if (buf->coherent_mem) + ret = vb2_dc_alloc_coherent(buf); + else + ret = vb2_dc_alloc_non_coherent(buf); + + if (ret) { + dev_err(dev, "dma alloc of size %ld failed\n", size); + kfree(buf); + return ERR_PTR(-ENOMEM); + } buf->handler.refcount = &buf->refcount; buf->handler.put = vb2_dc_put; buf->handler.arg = buf; - buf->vb = vb; refcount_set(&buf->refcount, 1); @@ -196,9 +282,12 @@ static int vb2_dc_mmap(void *buf_priv, struct vm_area_struct *vma) return -EINVAL; } - ret = dma_mmap_attrs(buf->dev, vma, buf->cookie, - buf->dma_addr, buf->size, buf->attrs); - + if (buf->coherent_mem) + ret = dma_mmap_attrs(buf->dev, vma, buf->cookie, buf->dma_addr, + buf->size, buf->attrs); + else + ret = dma_mmap_noncontiguous(buf->dev, vma, buf->size, + buf->dma_sgt); if (ret) { pr_err("Remapping memory failed, error: %d\n", ret); return ret; @@ -390,6 +479,9 @@ static struct sg_table *vb2_dc_get_base_sgt(struct vb2_dc_buf *buf) int ret; struct sg_table *sgt; + if (!buf->coherent_mem) + return buf->dma_sgt; + sgt = kmalloc(sizeof(*sgt), GFP_KERNEL); if (!sgt) { dev_err(buf->dev, "failed to alloc sg table\n");