diff mbox

[1/4] pxa_camera: Remove YUV planar formats hole

Message ID 1236282351-28471-2-git-send-email-robert.jarzmik@free.fr (mailing list archive)
State RFC
Headers show

Commit Message

Robert Jarzmik March 5, 2009, 7:45 p.m. UTC
All planes were PAGE aligned (ie. 4096 bytes aligned). This
is not consistent with YUV422 format, which requires Y, U
and V planes glued together.  The new implementation forces
the alignement on 8 bytes (DMA requirement), which is almost
always the case (granted by width x height being a multiple
of 8).

The test cases include tests in both YUV422 and RGB565 :
 - a picture of size 111 x 111 (cross RAM pages example)
 - a picture of size 1023 x 4 in (under 1 RAM page)
 - a picture of size 1024 x 4 in (exactly 1 RAM page)
 - a picture of size 1025 x 4 in (over 1 RAM page)
 - a picture of size 1280 x 1024 (many RAM pages)

Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
---
 drivers/media/video/pxa_camera.c |  165 ++++++++++++++++++++++++++------------
 1 files changed, 114 insertions(+), 51 deletions(-)

Comments

Guennadi Liakhovetski March 5, 2009, 8:29 p.m. UTC | #1
On Thu, 5 Mar 2009, Robert Jarzmik wrote:

> All planes were PAGE aligned (ie. 4096 bytes aligned). This
> is not consistent with YUV422 format, which requires Y, U
> and V planes glued together.  The new implementation forces
> the alignement on 8 bytes (DMA requirement), which is almost
> always the case (granted by width x height being a multiple
> of 8).

This is not a review yet - just an explanation why I was suggesting to 
adjust height and width - you say yourself, that YUV422P (I think, this is 
wat you meant, not just YUV422) requires planes to immediately follow one 
another. But you have to align them on 8 byte boundary for DMA, so, you 
violate the standard, right? If so, I would rather suggest to adjust width 
and height for planar formats to comply to the standard. Or have I 
misunderstood you?

Thanks
Guennadi

> 
> The test cases include tests in both YUV422 and RGB565 :
>  - a picture of size 111 x 111 (cross RAM pages example)
>  - a picture of size 1023 x 4 in (under 1 RAM page)
>  - a picture of size 1024 x 4 in (exactly 1 RAM page)
>  - a picture of size 1025 x 4 in (over 1 RAM page)
>  - a picture of size 1280 x 1024 (many RAM pages)
> 
> Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
> ---
>  drivers/media/video/pxa_camera.c |  165 ++++++++++++++++++++++++++------------
>  1 files changed, 114 insertions(+), 51 deletions(-)
> 
> diff --git a/drivers/media/video/pxa_camera.c b/drivers/media/video/pxa_camera.c
> index e3e6b29..54df071 100644
> --- a/drivers/media/video/pxa_camera.c
> +++ b/drivers/media/video/pxa_camera.c
> @@ -242,14 +242,13 @@ static int pxa_videobuf_setup(struct videobuf_queue *vq, unsigned int *count,
>  	dev_dbg(&icd->dev, "count=%d, size=%d\n", *count, *size);
>  
>  	/* planar capture requires Y, U and V buffers to be page aligned */
> -	if (pcdev->channels == 3) {
> -		*size = PAGE_ALIGN(icd->width * icd->height); /* Y pages */
> -		*size += PAGE_ALIGN(icd->width * icd->height / 2); /* U pages */
> -		*size += PAGE_ALIGN(icd->width * icd->height / 2); /* V pages */
> -	} else {
> -		*size = icd->width * icd->height *
> -			((icd->current_fmt->depth + 7) >> 3);
> -	}
> +	if (pcdev->channels == 3)
> +		*size = roundup(icd->width * icd->height, 8) /* Y pages */
> +			+ roundup(icd->width * icd->height / 2, 8) /* U pages */
> +			+ roundup(icd->width * icd->height / 2, 8); /* V pages */
> +	else
> +		*size = roundup(icd->width * icd->height *
> +				((icd->current_fmt->depth + 7) >> 3), 8);
>  
>  	if (0 == *count)
>  		*count = 32;
> @@ -289,19 +288,63 @@ static void free_buffer(struct videobuf_queue *vq, struct pxa_buffer *buf)
>  	buf->vb.state = VIDEOBUF_NEEDS_INIT;
>  }
>  
> +static int calculate_dma_sglen(struct scatterlist *sglist, int sglen,
> +			       int sg_first_ofs, int size)
> +{
> +	int i, offset, dma_len, xfer_len;
> +	struct scatterlist *sg;
> +
> +	offset = sg_first_ofs;
> +	for_each_sg(sglist, sg, sglen, i) {
> +		dma_len = sg_dma_len(sg);
> +
> +		/* PXA27x Developer's Manual 27.4.4.1: round up to 8 bytes */
> +		xfer_len = roundup(min(dma_len - offset, size), 8);
> +
> +		size = max(0, size - xfer_len);
> +		offset = 0;
> +		if (size == 0)
> +			break;
> +	}
> +
> +	BUG_ON(size != 0);
> +	return i + 1;
> +}
> +
> +/**
> + * pxa_init_dma_channel - init dma descriptors
> + * @pcdev: pxa camera device
> + * @buf: pxa buffer to find pxa dma channel
> + * @dma: dma video buffer
> + * @channel: dma channel (0 => 'Y', 1 => 'U', 2 => 'V')
> + * @cibr: camera read fifo
> + * @size: bytes to transfer
> + * @sg_first: index of first element of sg_list
> + * @sg_first_ofs: offset in first element of sg_list
> + *
> + * Prepares the pxa dma descriptors to transfer one camera channel.
> + * Beware sg_first and sg_first_ofs are both input and output parameters.
> + *
> + * Returns 0
> + */
>  static int pxa_init_dma_channel(struct pxa_camera_dev *pcdev,
>  				struct pxa_buffer *buf,
>  				struct videobuf_dmabuf *dma, int channel,
> -				int sglen, int sg_start, int cibr,
> -				unsigned int size)
> +				int cibr, int size,
> +				struct scatterlist **sg_first, int *sg_first_ofs)
>  {
>  	struct pxa_cam_dma *pxa_dma = &buf->dmas[channel];
> -	int i;
> +	struct scatterlist *sg;
> +	int i, offset, sglen;
> +	int dma_len = 0, xfer_len = 0;
>  
>  	if (pxa_dma->sg_cpu)
>  		dma_free_coherent(pcdev->dev, pxa_dma->sg_size,
>  				  pxa_dma->sg_cpu, pxa_dma->sg_dma);
>  
> +	sglen = calculate_dma_sglen(*sg_first, dma->sglen,
> +				    *sg_first_ofs, size);
> +
>  	pxa_dma->sg_size = (sglen + 1) * sizeof(struct pxa_dma_desc);
>  	pxa_dma->sg_cpu = dma_alloc_coherent(pcdev->dev, pxa_dma->sg_size,
>  					     &pxa_dma->sg_dma, GFP_KERNEL);
> @@ -309,27 +352,51 @@ static int pxa_init_dma_channel(struct pxa_camera_dev *pcdev,
>  		return -ENOMEM;
>  
>  	pxa_dma->sglen = sglen;
> +	offset = *sg_first_ofs;
>  
> -	for (i = 0; i < sglen; i++) {
> -		int sg_i = sg_start + i;
> -		struct scatterlist *sg = dma->sglist;
> -		unsigned int dma_len = sg_dma_len(&sg[sg_i]), xfer_len;
> +	dev_dbg(pcdev->dev, "DMA: sg_first=%p, sglen=%d, ofs=%d, dma.desc=%x\n",
> +		*sg_first, sglen, *sg_first_ofs, pxa_dma->sg_dma);
>  
> -		pxa_dma->sg_cpu[i].dsadr = pcdev->res->start + cibr;
> -		pxa_dma->sg_cpu[i].dtadr = sg_dma_address(&sg[sg_i]);
> +
> +	for_each_sg(*sg_first, sg, sglen, i) {
> +		dma_len = sg_dma_len(sg);
>  
>  		/* PXA27x Developer's Manual 27.4.4.1: round up to 8 bytes */
> -		xfer_len = (min(dma_len, size) + 7) & ~7;
> +		xfer_len = roundup(min(dma_len - offset, size), 8);
>  
> +		size = max(0, size - xfer_len);
> +
> +		pxa_dma->sg_cpu[i].dsadr = pcdev->res->start + cibr;
> +		pxa_dma->sg_cpu[i].dtadr = sg_dma_address(sg) + offset;
>  		pxa_dma->sg_cpu[i].dcmd =
>  			DCMD_FLOWSRC | DCMD_BURST8 | DCMD_INCTRGADDR | xfer_len;
> -		size -= dma_len;
>  		pxa_dma->sg_cpu[i].ddadr =
>  			pxa_dma->sg_dma + (i + 1) * sizeof(struct pxa_dma_desc);
> +
> +		dev_vdbg(pcdev->dev, "DMA: desc.%08x->@phys=0x%08x, len=%d\n",
> +			 pxa_dma->sg_dma + i * sizeof(struct pxa_dma_desc),
> +			 sg_dma_address(sg) + offset, xfer_len);
> +		offset = 0;
> +
> +		if (size == 0)
> +			break;
>  	}
>  
> -	pxa_dma->sg_cpu[sglen - 1].ddadr = DDADR_STOP;
> -	pxa_dma->sg_cpu[sglen - 1].dcmd |= DCMD_ENDIRQEN;
> +	pxa_dma->sg_cpu[sglen].ddadr = DDADR_STOP;
> +	pxa_dma->sg_cpu[sglen].dcmd  = DCMD_FLOWSRC | DCMD_BURST8 | DCMD_ENDIRQEN;
> +
> +	*sg_first_ofs = xfer_len;
> +	/*
> +	 * Handle 1 special case :
> +	 *  - if we finish the DMA transfer in the last 7 bytes of a RAM page
> +	 *    then we return the sg element pointing on the next page
> +	 */
> +	if (*sg_first_ofs >= dma_len) {
> +		*sg_first_ofs -= dma_len;
> +		*sg_first = sg_next(sg);
> +	} else {
> +		*sg_first = sg;
> +	}
>  
>  	return 0;
>  }
> @@ -342,9 +409,7 @@ static int pxa_videobuf_prepare(struct videobuf_queue *vq,
>  	struct pxa_camera_dev *pcdev = ici->priv;
>  	struct pxa_buffer *buf = container_of(vb, struct pxa_buffer, vb);
>  	int ret;
> -	int sglen_y,  sglen_yu = 0, sglen_u = 0, sglen_v = 0;
> -	int size_y, size_u = 0, size_v = 0;
> -
> +	int size_y = 0, size_u = 0, size_v = 0;
>  	dev_dbg(&icd->dev, "%s (vb=0x%p) 0x%08lx %d\n", __func__,
>  		vb, vb->baddr, vb->bsize);
>  
> @@ -381,53 +446,51 @@ static int pxa_videobuf_prepare(struct videobuf_queue *vq,
>  	}
>  
>  	if (vb->state == VIDEOBUF_NEEDS_INIT) {
> -		unsigned int size = vb->size;
> +		int size = vb->size;
> +		int next_ofs = 0;
>  		struct videobuf_dmabuf *dma = videobuf_to_dma(vb);
> +		struct scatterlist *sg;
>  
>  		ret = videobuf_iolock(vq, vb, NULL);
>  		if (ret)
>  			goto fail;
>  
>  		if (pcdev->channels == 3) {
> -			/* FIXME the calculations should be more precise */
> -			sglen_y = dma->sglen / 2;
> -			sglen_u = sglen_v = dma->sglen / 4 + 1;
> -			sglen_yu = sglen_y + sglen_u;
>  			size_y = size / 2;
>  			size_u = size_v = size / 4;
>  		} else {
> -			sglen_y = dma->sglen;
>  			size_y = size;
>  		}
>  
> -		/* init DMA for Y channel */
> -		ret = pxa_init_dma_channel(pcdev, buf, dma, 0, sglen_y,
> -					   0, 0x28, size_y);
> +		sg = dma->sglist;
>  
> +		/* init DMA for Y channel */
> +		ret = pxa_init_dma_channel(pcdev, buf, dma, 0, CIBR0, size_y,
> +					   &sg, &next_ofs);
>  		if (ret) {
>  			dev_err(pcdev->dev,
>  				"DMA initialization for Y/RGB failed\n");
>  			goto fail;
>  		}
>  
> -		if (pcdev->channels == 3) {
> -			/* init DMA for U channel */
> -			ret = pxa_init_dma_channel(pcdev, buf, dma, 1, sglen_u,
> -						   sglen_y, 0x30, size_u);
> -			if (ret) {
> -				dev_err(pcdev->dev,
> -					"DMA initialization for U failed\n");
> -				goto fail_u;
> -			}
> +		/* init DMA for U channel */
> +		if (size_u)
> +			ret = pxa_init_dma_channel(pcdev, buf, dma, 1, CIBR1,
> +						   size_u, &sg, &next_ofs);
> +		if (ret) {
> +			dev_err(pcdev->dev,
> +				"DMA initialization for U failed\n");
> +			goto fail_u;
> +		}
>  
> -			/* init DMA for V channel */
> -			ret = pxa_init_dma_channel(pcdev, buf, dma, 2, sglen_v,
> -						   sglen_yu, 0x38, size_v);
> -			if (ret) {
> -				dev_err(pcdev->dev,
> -					"DMA initialization for V failed\n");
> -				goto fail_v;
> -			}
> +		/* init DMA for V channel */
> +		if (size_v)
> +			ret = pxa_init_dma_channel(pcdev, buf, dma, 2, CIBR2,
> +						   size_u, &sg, &next_ofs);
> +		if (ret) {
> +			dev_err(pcdev->dev,
> +				"DMA initialization for V failed\n");
> +			goto fail_v;
>  		}
>  
>  		vb->state = VIDEOBUF_PREPARED;
> -- 
> 1.5.6.5
> 

---
Guennadi Liakhovetski, Ph.D.
Freelance Open-Source Software Developer
--
To unsubscribe from this list: send the line "unsubscribe linux-media" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Robert Jarzmik March 5, 2009, 9:10 p.m. UTC | #2
Guennadi Liakhovetski <g.liakhovetski@gmx.de> writes:

> This is not a review yet - just an explanation why I was suggesting to 
> adjust height and width - you say yourself, that YUV422P (I think, this is 
> wat you meant, not just YUV422) requires planes to immediately follow one 
> another. But you have to align them on 8 byte boundary for DMA, so, you 
> violate the standard, right? If so, I would rather suggest to adjust width 
> and height for planar formats to comply to the standard. Or have I 
> misunderstood you?
No, you understand perfectly.

And now, what do we do :
 - adjust height ?
 - adjust height ?
 - adjust both ?

I couldn't decide which one, any hint ?

--
Robert
--
To unsubscribe from this list: send the line "unsubscribe linux-media" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Trent Piepho March 5, 2009, 9:22 p.m. UTC | #3
On Thu, 5 Mar 2009, Robert Jarzmik wrote:
> Guennadi Liakhovetski <g.liakhovetski@gmx.de> writes:
>
> > This is not a review yet - just an explanation why I was suggesting to
> > adjust height and width - you say yourself, that YUV422P (I think, this is
> > wat you meant, not just YUV422) requires planes to immediately follow one
> > another. But you have to align them on 8 byte boundary for DMA, so, you
> > violate the standard, right? If so, I would rather suggest to adjust width
> > and height for planar formats to comply to the standard. Or have I
> > misunderstood you?
> No, you understand perfectly.
>
> And now, what do we do :
>  - adjust height ?
>  - adjust height ?
>  - adjust both ?
>
> I couldn't decide which one, any hint ?

Shame the planes have to be contiguous.  Software like ffmpeg doesn't
require this and could handle planes with gaps between them without
trouble.  Plans aligned on 8 bytes boundaries would probably be faster in
fact.  Be better if v4l2_buffer gave us offsets for each plane.

If you must adjust, probably better to adjust both.
--
To unsubscribe from this list: send the line "unsubscribe linux-media" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Guennadi Liakhovetski March 5, 2009, 10:15 p.m. UTC | #4
On Thu, 5 Mar 2009, Trent Piepho wrote:

> On Thu, 5 Mar 2009, Robert Jarzmik wrote:
> > Guennadi Liakhovetski <g.liakhovetski@gmx.de> writes:
> >
> > > This is not a review yet - just an explanation why I was suggesting to
> > > adjust height and width - you say yourself, that YUV422P (I think, this is
> > > wat you meant, not just YUV422) requires planes to immediately follow one
> > > another. But you have to align them on 8 byte boundary for DMA, so, you
> > > violate the standard, right? If so, I would rather suggest to adjust width
> > > and height for planar formats to comply to the standard. Or have I
> > > misunderstood you?
> > No, you understand perfectly.
> >
> > And now, what do we do :
> >  - adjust height ?
> >  - adjust height ?
> >  - adjust both ?
> >
> > I couldn't decide which one, any hint ?
> 
> Shame the planes have to be contiguous.  Software like ffmpeg doesn't
> require this and could handle planes with gaps between them without
> trouble.  Plans aligned on 8 bytes boundaries would probably be faster in
> fact.  Be better if v4l2_buffer gave us offsets for each plane.
> 
> If you must adjust, probably better to adjust both.

Yes, adjusting both is also what I was suggesting in my original review. 
How about aligning the bigger of the two to 4 bytes and the smaller to 2? 

Thanks
Guennadi
---
Guennadi Liakhovetski, Ph.D.
Freelance Open-Source Software Developer
--
To unsubscribe from this list: send the line "unsubscribe linux-media" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Trent Piepho March 6, 2009, 9:30 a.m. UTC | #5
On Thu, 5 Mar 2009, Guennadi Liakhovetski wrote:
> On Thu, 5 Mar 2009, Trent Piepho wrote:
> > On Thu, 5 Mar 2009, Robert Jarzmik wrote:
> > > Guennadi Liakhovetski <g.liakhovetski@gmx.de> writes:
> > > > This is not a review yet - just an explanation why I was suggesting to
> > > > adjust height and width - you say yourself, that YUV422P (I think, this is
> > > > wat you meant, not just YUV422) requires planes to immediately follow one
> > > > another. But you have to align them on 8 byte boundary for DMA, so, you
> > > > violate the standard, right? If so, I would rather suggest to adjust width
> > > > and height for planar formats to comply to the standard. Or have I
> > > > misunderstood you?
> > > No, you understand perfectly.
> > >
> > > And now, what do we do :
> > >  - adjust height ?
> > >  - adjust height ?
> > >  - adjust both ?
> > >
> > > I couldn't decide which one, any hint ?
> >
> > Shame the planes have to be contiguous.  Software like ffmpeg doesn't
> > require this and could handle planes with gaps between them without
> > trouble.  Plans aligned on 8 bytes boundaries would probably be faster in
> > fact.  Be better if v4l2_buffer gave us offsets for each plane.
> >
> > If you must adjust, probably better to adjust both.
>
> Yes, adjusting both is also what I was suggesting in my original review.
> How about aligning the bigger of the two to 4 bytes and the smaller to 2?

In order to 8 byte align the end of the first chroma plane, doesn't the
size need to be a multiple of 32, to take into account the chroma
decimation, assuming yuv 4:2:0?

Here is some code I could put this code into v4l that solves this.

In this case one could say:
v4l_bound_align_image(*width, 48, 640, 2, *height, 32, 480, 0, 5);
That will give you width between 48 and 640 that's a multiple of 4, a
height between 32 and 480, and the image size will be a multiple of 32.  It
will try to adjust the image size as little as possible to get it to work.

For example, give it 175x241 and it comes back with 176x242.  As opposed to
something like 192x241 or 172x240, which are also valid but more different
than the requested size.

/* Clamp x to be between min and max, aligned to a multiple of 2^align.  min
 * and max don't have to be aligned, but there must be at least one valid
 * value.  E.g., min=17,max=31,align=4 is not allowed as there are no multiples
 * of 16 between 17 and 31.  */
static unsigned int clamp_align(unsigned int x, unsigned int min,
                                unsigned int max, unsigned int align)
{
        /* Bits that must be zero to be aligned */
        unsigned int mask = (1 << align) - 1;

        /* Round to nearest aligned value */
        if (x & (1 << (align - 1)))
                x += mask; /* make x&=~mask round up */
        x &= ~mask;

        /* Clamp to aligned value of min and max */
        if (x < min)
                x = (min + mask) & ~mask;
        else if (x > max)
                x = max & ~mask;

        return x;
}

/* Bound an image to have a width between wmin and wmax, and height between
 * hmin and hmax, inclusive.  Additionally, the width will be a multiple of
 * 2^walign, the height will be a multiple of 2^halign, and the overall size
 * (width*height) will be a multiple of 2^salign.  May shrink or enlarge the
 * image to fit the alignment constraints.  The width or height maximum must
 * not be smaller than the corresponding minimum.  The alignments must not be
 * so high there are no possible image sizes within the allowed bounds.
 */
void v4l_bound_align_image(unsigned int *width, unsigned int wmin,
                           unsigned int wmax, unsigned int walign,
                           unsigned int *height, unsigned int hmin,
                           unsigned int hmax, unsigned int halign,
                           unsigned int salign)
{
        *width = clamp_align(*width, wmin, wmax, walign);
        *height = clamp_align(*height, hmin, hmax, halign);

        /* How much alignment do we have? */
        walign = __ffs(*width);
        halign = __ffs(*height);
        /* Enough to satisfy the image alignment? */
        if (walign + halign < salign) {
                /* Max walign where there is still a valid width */
                unsigned int wmaxa = __fls(wmax ^ (wmin - 1));

                salign -= walign + halign;
                /* up the smaller alignment until we have enough */
                do {
                        if (walign <= halign && walign < wmaxa)
                                walign++;
                        else
                                halign++;
                } while(--salign);
                *width = clamp_align(*width, wmin, wmax, walign);
                *height = clamp_align(*height, hmin, hmax, halign);
        }
}
--
To unsubscribe from this list: send the line "unsubscribe linux-media" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Guennadi Liakhovetski March 9, 2009, 10:45 a.m. UTC | #6
On Thu, 5 Mar 2009, Robert Jarzmik wrote:

> All planes were PAGE aligned (ie. 4096 bytes aligned). This
> is not consistent with YUV422 format, which requires Y, U
> and V planes glued together.  The new implementation forces
> the alignement on 8 bytes (DMA requirement), which is almost
> always the case (granted by width x height being a multiple
> of 8).
> 
> The test cases include tests in both YUV422 and RGB565 :
>  - a picture of size 111 x 111 (cross RAM pages example)
>  - a picture of size 1023 x 4 in (under 1 RAM page)
>  - a picture of size 1024 x 4 in (exactly 1 RAM page)
>  - a picture of size 1025 x 4 in (over 1 RAM page)
>  - a picture of size 1280 x 1024 (many RAM pages)
> 
> Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
> ---
>  drivers/media/video/pxa_camera.c |  165 ++++++++++++++++++++++++++------------
>  1 files changed, 114 insertions(+), 51 deletions(-)
> 
> diff --git a/drivers/media/video/pxa_camera.c b/drivers/media/video/pxa_camera.c
> index e3e6b29..54df071 100644
> --- a/drivers/media/video/pxa_camera.c
> +++ b/drivers/media/video/pxa_camera.c
> @@ -242,14 +242,13 @@ static int pxa_videobuf_setup(struct videobuf_queue *vq, unsigned int *count,
>  	dev_dbg(&icd->dev, "count=%d, size=%d\n", *count, *size);
>  
>  	/* planar capture requires Y, U and V buffers to be page aligned */
> -	if (pcdev->channels == 3) {
> -		*size = PAGE_ALIGN(icd->width * icd->height); /* Y pages */
> -		*size += PAGE_ALIGN(icd->width * icd->height / 2); /* U pages */
> -		*size += PAGE_ALIGN(icd->width * icd->height / 2); /* V pages */
> -	} else {
> -		*size = icd->width * icd->height *
> -			((icd->current_fmt->depth + 7) >> 3);
> -	}
> +	if (pcdev->channels == 3)
> +		*size = roundup(icd->width * icd->height, 8) /* Y pages */
> +			+ roundup(icd->width * icd->height / 2, 8) /* U pages */
> +			+ roundup(icd->width * icd->height / 2, 8); /* V pages */
> +	else
> +		*size = roundup(icd->width * icd->height *
> +				((icd->current_fmt->depth + 7) >> 3), 8);
>  
>  	if (0 == *count)
>  		*count = 32;

Ok, this one will change I presume - new alignment calculations and 
line-breaking. In fact, if you adjust width and height earlier in set_fmt, 
maybe you'll just remove any rounding here completely.

> @@ -289,19 +288,63 @@ static void free_buffer(struct videobuf_queue *vq, struct pxa_buffer *buf)
>  	buf->vb.state = VIDEOBUF_NEEDS_INIT;
>  }
>  
> +static int calculate_dma_sglen(struct scatterlist *sglist, int sglen,
> +			       int sg_first_ofs, int size)
> +{
> +	int i, offset, dma_len, xfer_len;
> +	struct scatterlist *sg;
> +
> +	offset = sg_first_ofs;
> +	for_each_sg(sglist, sg, sglen, i) {
> +		dma_len = sg_dma_len(sg);
> +
> +		/* PXA27x Developer's Manual 27.4.4.1: round up to 8 bytes */
> +		xfer_len = roundup(min(dma_len - offset, size), 8);

Ok, let's see. dma_len is PAGE_SIZE. offset is for Y-plane 0, for further 
planes it will be aligned after we recalculate width and height. size will 
be aligned too, so, roundup will disappear, right? You might want to just 
just add a test for these. The calculation itself gives size >= xfer_len

> +
> +		size = max(0, size - xfer_len);

So, max is useless here, just "size -= xfer_len."

> +		offset = 0;
> +		if (size == 0)
> +			break;
> +	}
> +
> +	BUG_ON(size != 0);
> +	return i + 1;
> +}
> +
> +/**
> + * pxa_init_dma_channel - init dma descriptors
> + * @pcdev: pxa camera device
> + * @buf: pxa buffer to find pxa dma channel
> + * @dma: dma video buffer
> + * @channel: dma channel (0 => 'Y', 1 => 'U', 2 => 'V')
> + * @cibr: camera read fifo
> + * @size: bytes to transfer
> + * @sg_first: index of first element of sg_list
> + * @sg_first_ofs: offset in first element of sg_list
> + *
> + * Prepares the pxa dma descriptors to transfer one camera channel.
> + * Beware sg_first and sg_first_ofs are both input and output parameters.
> + *
> + * Returns 0
> + */
>  static int pxa_init_dma_channel(struct pxa_camera_dev *pcdev,
>  				struct pxa_buffer *buf,
>  				struct videobuf_dmabuf *dma, int channel,
> -				int sglen, int sg_start, int cibr,
> -				unsigned int size)
> +				int cibr, int size,
> +				struct scatterlist **sg_first, int *sg_first_ofs)
>  {
>  	struct pxa_cam_dma *pxa_dma = &buf->dmas[channel];
> -	int i;
> +	struct scatterlist *sg;
> +	int i, offset, sglen;
> +	int dma_len = 0, xfer_len = 0;
>  
>  	if (pxa_dma->sg_cpu)
>  		dma_free_coherent(pcdev->dev, pxa_dma->sg_size,
>  				  pxa_dma->sg_cpu, pxa_dma->sg_dma);
>  
> +	sglen = calculate_dma_sglen(*sg_first, dma->sglen,
> +				    *sg_first_ofs, size);
> +
>  	pxa_dma->sg_size = (sglen + 1) * sizeof(struct pxa_dma_desc);
>  	pxa_dma->sg_cpu = dma_alloc_coherent(pcdev->dev, pxa_dma->sg_size,
>  					     &pxa_dma->sg_dma, GFP_KERNEL);
> @@ -309,27 +352,51 @@ static int pxa_init_dma_channel(struct pxa_camera_dev *pcdev,
>  		return -ENOMEM;
>  
>  	pxa_dma->sglen = sglen;
> +	offset = *sg_first_ofs;
>  
> -	for (i = 0; i < sglen; i++) {
> -		int sg_i = sg_start + i;
> -		struct scatterlist *sg = dma->sglist;
> -		unsigned int dma_len = sg_dma_len(&sg[sg_i]), xfer_len;
> +	dev_dbg(pcdev->dev, "DMA: sg_first=%p, sglen=%d, ofs=%d, dma.desc=%x\n",
> +		*sg_first, sglen, *sg_first_ofs, pxa_dma->sg_dma);
>  
> -		pxa_dma->sg_cpu[i].dsadr = pcdev->res->start + cibr;
> -		pxa_dma->sg_cpu[i].dtadr = sg_dma_address(&sg[sg_i]);
> +
> +	for_each_sg(*sg_first, sg, sglen, i) {
> +		dma_len = sg_dma_len(sg);
>  
>  		/* PXA27x Developer's Manual 27.4.4.1: round up to 8 bytes */
> -		xfer_len = (min(dma_len, size) + 7) & ~7;
> +		xfer_len = roundup(min(dma_len - offset, size), 8);
>  
> +		size = max(0, size - xfer_len);

Same here for roundup() and max().

> +
> +		pxa_dma->sg_cpu[i].dsadr = pcdev->res->start + cibr;
> +		pxa_dma->sg_cpu[i].dtadr = sg_dma_address(sg) + offset;
>  		pxa_dma->sg_cpu[i].dcmd =
>  			DCMD_FLOWSRC | DCMD_BURST8 | DCMD_INCTRGADDR | xfer_len;
> -		size -= dma_len;
>  		pxa_dma->sg_cpu[i].ddadr =
>  			pxa_dma->sg_dma + (i + 1) * sizeof(struct pxa_dma_desc);
> +
> +		dev_vdbg(pcdev->dev, "DMA: desc.%08x->@phys=0x%08x, len=%d\n",
> +			 pxa_dma->sg_dma + i * sizeof(struct pxa_dma_desc),
> +			 sg_dma_address(sg) + offset, xfer_len);
> +		offset = 0;
> +
> +		if (size == 0)
> +			break;
>  	}
>  
> -	pxa_dma->sg_cpu[sglen - 1].ddadr = DDADR_STOP;
> -	pxa_dma->sg_cpu[sglen - 1].dcmd |= DCMD_ENDIRQEN;
> +	pxa_dma->sg_cpu[sglen].ddadr = DDADR_STOP;
> +	pxa_dma->sg_cpu[sglen].dcmd  = DCMD_FLOWSRC | DCMD_BURST8 | DCMD_ENDIRQEN;

Why are you now always using the n+1'th element? Even if it is right, it 
rather belongs to the patch "2/4," not "1/4," right? In your earlier email 
you wrote:

>  - in the former pxa_videobuf_queue(), when a buffer was queued while another
>  was already active, a dummy descriptor was added, and then the new buffer was
>  chained with the actively running buffer. See code below :
> 
> -                       } else {
> -                               buf_dma->sg_cpu[nents].ddadr =
> -                                       DDADR(pcdev->dma_chans[i]);
> -                       }
> -
> -                       /* The next descriptor is the dummy descriptor */
> -                       DDADR(pcdev->dma_chans[i]) = buf_dma->sg_dma + nents *
> -                               sizeof(struct pxa_dma_desc);
> 
>    The fix is in the code refactoring, as now the buffer is always added at the
>    tail of the queue through pxa_dma_add_tail_buf().

I don't understand, what this is fixing. It would make a nice 
simplification, if it worked, but see my review to patch "2/4."

> +
> +	*sg_first_ofs = xfer_len;
> +	/*
> +	 * Handle 1 special case :
> +	 *  - if we finish the DMA transfer in the last 7 bytes of a RAM page
> +	 *    then we return the sg element pointing on the next page
> +	 */
> +	if (*sg_first_ofs >= dma_len) {
> +		*sg_first_ofs -= dma_len;
> +		*sg_first = sg_next(sg);
> +	} else {
> +		*sg_first = sg;
> +	}

As we will not be rounding up any more, this special case shouldn't be 
needed either, right?

>  
>  	return 0;
>  }
> @@ -342,9 +409,7 @@ static int pxa_videobuf_prepare(struct videobuf_queue *vq,
>  	struct pxa_camera_dev *pcdev = ici->priv;
>  	struct pxa_buffer *buf = container_of(vb, struct pxa_buffer, vb);
>  	int ret;
> -	int sglen_y,  sglen_yu = 0, sglen_u = 0, sglen_v = 0;
> -	int size_y, size_u = 0, size_v = 0;
> -
> +	int size_y = 0, size_u = 0, size_v = 0;

Isn't size_y always initialised?

>  	dev_dbg(&icd->dev, "%s (vb=0x%p) 0x%08lx %d\n", __func__,
>  		vb, vb->baddr, vb->bsize);
>  
> @@ -381,53 +446,51 @@ static int pxa_videobuf_prepare(struct videobuf_queue *vq,
>  	}
>  
>  	if (vb->state == VIDEOBUF_NEEDS_INIT) {
> -		unsigned int size = vb->size;
> +		int size = vb->size;
> +		int next_ofs = 0;
>  		struct videobuf_dmabuf *dma = videobuf_to_dma(vb);
> +		struct scatterlist *sg;
>  
>  		ret = videobuf_iolock(vq, vb, NULL);
>  		if (ret)
>  			goto fail;
>  
>  		if (pcdev->channels == 3) {
> -			/* FIXME the calculations should be more precise */
> -			sglen_y = dma->sglen / 2;
> -			sglen_u = sglen_v = dma->sglen / 4 + 1;
> -			sglen_yu = sglen_y + sglen_u;
>  			size_y = size / 2;
>  			size_u = size_v = size / 4;
>  		} else {
> -			sglen_y = dma->sglen;
>  			size_y = size;
>  		}
>  
> -		/* init DMA for Y channel */
> -		ret = pxa_init_dma_channel(pcdev, buf, dma, 0, sglen_y,
> -					   0, 0x28, size_y);
> +		sg = dma->sglist;
>  
> +		/* init DMA for Y channel */
> +		ret = pxa_init_dma_channel(pcdev, buf, dma, 0, CIBR0, size_y,
> +					   &sg, &next_ofs);
>  		if (ret) {
>  			dev_err(pcdev->dev,
>  				"DMA initialization for Y/RGB failed\n");
>  			goto fail;
>  		}
>  
> -		if (pcdev->channels == 3) {
> -			/* init DMA for U channel */
> -			ret = pxa_init_dma_channel(pcdev, buf, dma, 1, sglen_u,
> -						   sglen_y, 0x30, size_u);
> -			if (ret) {
> -				dev_err(pcdev->dev,
> -					"DMA initialization for U failed\n");
> -				goto fail_u;
> -			}
> +		/* init DMA for U channel */
> +		if (size_u)
> +			ret = pxa_init_dma_channel(pcdev, buf, dma, 1, CIBR1,
> +						   size_u, &sg, &next_ofs);
> +		if (ret) {
> +			dev_err(pcdev->dev,
> +				"DMA initialization for U failed\n");
> +			goto fail_u;
> +		}
>  
> -			/* init DMA for V channel */
> -			ret = pxa_init_dma_channel(pcdev, buf, dma, 2, sglen_v,
> -						   sglen_yu, 0x38, size_v);
> -			if (ret) {
> -				dev_err(pcdev->dev,
> -					"DMA initialization for V failed\n");
> -				goto fail_v;
> -			}
> +		/* init DMA for V channel */
> +		if (size_v)
> +			ret = pxa_init_dma_channel(pcdev, buf, dma, 2, CIBR2,
> +						   size_u, &sg, &next_ofs);
> +		if (ret) {
> +			dev_err(pcdev->dev,
> +				"DMA initialization for V failed\n");
> +			goto fail_v;
>  		}
>  
>  		vb->state = VIDEOBUF_PREPARED;
> -- 
> 1.5.6.5
> 

Thanks
Guennadi
---
Guennadi Liakhovetski, Ph.D.
Freelance Open-Source Software Developer
--
To unsubscribe from this list: send the line "unsubscribe linux-media" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Robert Jarzmik March 9, 2009, 7:13 p.m. UTC | #7
Guennadi Liakhovetski <g.liakhovetski@gmx.de> writes:

>> diff --git a/drivers/media/video/pxa_camera.c b/drivers/media/video/pxa_camera.c
>> index e3e6b29..54df071 100644
>> --- a/drivers/media/video/pxa_camera.c
>> +++ b/drivers/media/video/pxa_camera.c
>> @@ -242,14 +242,13 @@ static int pxa_videobuf_setup(struct videobuf_queue *vq, unsigned int *count,
>>  	dev_dbg(&icd->dev, "count=%d, size=%d\n", *count, *size);
>>  
>>  	/* planar capture requires Y, U and V buffers to be page aligned */
>> -	if (pcdev->channels == 3) {
>> -		*size = PAGE_ALIGN(icd->width * icd->height); /* Y pages */
>> -		*size += PAGE_ALIGN(icd->width * icd->height / 2); /* U pages */
>> -		*size += PAGE_ALIGN(icd->width * icd->height / 2); /* V pages */
>> -	} else {
>> -		*size = icd->width * icd->height *
>> -			((icd->current_fmt->depth + 7) >> 3);
>> -	}
>> +	if (pcdev->channels == 3)
>> +		*size = roundup(icd->width * icd->height, 8) /* Y pages */
>> +			+ roundup(icd->width * icd->height / 2, 8) /* U pages */
>> +			+ roundup(icd->width * icd->height / 2, 8); /* V pages */
>> +	else
>> +		*size = roundup(icd->width * icd->height *
>> +				((icd->current_fmt->depth + 7) >> 3), 8);
>>  
>>  	if (0 == *count)
>>  		*count = 32;
>
> Ok, this one will change I presume - new alignment calculations and 
> line-breaking. In fact, if you adjust width and height earlier in set_fmt, 
> maybe you'll just remove any rounding here completely.
Helas, not fully.
The problem is with passthrough and rgb formats, where I don't enforce
width/height. In the newest form of the patch I have this :
	if (pcdev->channels == 3)
		*size = icd->width * icd->height * 2;
	else
		*size = roundup(icd->width * icd->height *
				((icd->current_fmt->depth + 7) >> 3), 8);

>
>> @@ -289,19 +288,63 @@ static void free_buffer(struct videobuf_queue *vq, struct pxa_buffer *buf)
>>  	buf->vb.state = VIDEOBUF_NEEDS_INIT;
>>  }
>>  
>> +static int calculate_dma_sglen(struct scatterlist *sglist, int sglen,
>> +			       int sg_first_ofs, int size)
>> +{
>> +	int i, offset, dma_len, xfer_len;
>> +	struct scatterlist *sg;
>> +
>> +	offset = sg_first_ofs;
>> +	for_each_sg(sglist, sg, sglen, i) {
>> +		dma_len = sg_dma_len(sg);
>> +
>> +		/* PXA27x Developer's Manual 27.4.4.1: round up to 8 bytes */
>> +		xfer_len = roundup(min(dma_len - offset, size), 8);
>
> Ok, let's see. dma_len is PAGE_SIZE. offset is for Y-plane 0, for further 
> planes it will be aligned after we recalculate width and height. size will 
> be aligned too, so, roundup will disappear, right?

No, I don't think so.

Consider the case of a RGB565 image which size is 223*33 = 7359 bytes. This
makes a transfer of 4096 bytes and another of 3263 bytes.

But the QIF fifo will give 4096 + 3264 bytes (the last one beeing 0), and this
last byte has to be read from the fifo. As I understand it, the QIF fifo works
with 8 bytes permutations, and that why it's giving always a multiple of 8
bytes. Please, cross-check PXA developper manual, paragraph 27.4.4.1 to see if
you understand the same as I did.

So the roundup() is to be kept :(

> You might want to just 
> just add a test for these. The calculation itself gives size >= xfer_len
>
>> +
>> +		size = max(0, size - xfer_len);
>
> So, max is useless here, just "size -= xfer_len."
And so the max() still hold I think.

>> +		size = max(0, size - xfer_len);
>
> Same here for roundup() and max().
Yes, same discussion.

>>  
>> -	pxa_dma->sg_cpu[sglen - 1].ddadr = DDADR_STOP;
>> -	pxa_dma->sg_cpu[sglen - 1].dcmd |= DCMD_ENDIRQEN;
>> +	pxa_dma->sg_cpu[sglen].ddadr = DDADR_STOP;
>> +	pxa_dma->sg_cpu[sglen].dcmd  = DCMD_FLOWSRC | DCMD_BURST8 | DCMD_ENDIRQEN;
>
> Why are you now always using the n+1'th element? Even if it is right, it 
> rather belongs to the patch "2/4," not "1/4," right? In your earlier email 
> you wrote:
>
>>  - in the former pxa_videobuf_queue(), when a buffer was queued while another
>>  was already active, a dummy descriptor was added, and then the new buffer was
>>  chained with the actively running buffer. See code below :
>> 
>> -                       } else {
>> -                               buf_dma->sg_cpu[nents].ddadr =
>> -                                       DDADR(pcdev->dma_chans[i]);
>> -                       }
>> -
>> -                       /* The next descriptor is the dummy descriptor */
>> -                       DDADR(pcdev->dma_chans[i]) = buf_dma->sg_dma + nents *
>> -                               sizeof(struct pxa_dma_desc);
>> 
>>    The fix is in the code refactoring, as now the buffer is always added at the
>>    tail of the queue through pxa_dma_add_tail_buf().
>
> I don't understand, what this is fixing. It would make a nice 
> simplification, if it worked, but see my review to patch "2/4."
Let's discuss it in patch 2/4 review, yes.

>> +
>> +	*sg_first_ofs = xfer_len;
>> +	/*
>> +	 * Handle 1 special case :
>> +	 *  - if we finish the DMA transfer in the last 7 bytes of a RAM page
>> +	 *    then we return the sg element pointing on the next page
>> +	 */
>> +	if (*sg_first_ofs >= dma_len) {
>> +		*sg_first_ofs -= dma_len;
>> +		*sg_first = sg_next(sg);
>> +	} else {
>> +		*sg_first = sg;
>> +	}
>
> As we will not be rounding up any more, this special case shouldn't be 
> needed either, right?
Same discussion as before. But here, as sg_first and sf_first_ofs only make
sense for 3 planes output, and the rounding takes care of the corner cases, this
part will be simplified, yes.

>
>>  
>>  	return 0;
>>  }
>> @@ -342,9 +409,7 @@ static int pxa_videobuf_prepare(struct videobuf_queue *vq,
>>  	struct pxa_camera_dev *pcdev = ici->priv;
>>  	struct pxa_buffer *buf = container_of(vb, struct pxa_buffer, vb);
>>  	int ret;
>> -	int sglen_y,  sglen_yu = 0, sglen_u = 0, sglen_v = 0;
>> -	int size_y, size_u = 0, size_v = 0;
>> -
>> +	int size_y = 0, size_u = 0, size_v = 0;
>
> Isn't size_y always initialised?
Yes, I'll remove that.

Thanks for the review, let's keep that going on :)

Cheers.

--
Robert
--
To unsubscribe from this list: send the line "unsubscribe linux-media" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Trent Piepho March 10, 2009, 6:33 p.m. UTC | #8
On Mon, 9 Mar 2009, Robert Jarzmik wrote:
> > Ok, this one will change I presume - new alignment calculations and
> > line-breaking. In fact, if you adjust width and height earlier in set_fmt,
> > maybe you'll just remove any rounding here completely.
> Helas, not fully.
> The problem is with passthrough and rgb formats, where I don't enforce
> width/height. In the newest form of the patch I have this :
> 	if (pcdev->channels == 3)
> 		*size = icd->width * icd->height * 2;
> 	else
> 		*size = roundup(icd->width * icd->height *
> 				((icd->current_fmt->depth + 7) >> 3), 8);

If icd->current_fmt->depth could be set to 16 for planar formats, then you
could get rid of the special case here.
--
To unsubscribe from this list: send the line "unsubscribe linux-media" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/media/video/pxa_camera.c b/drivers/media/video/pxa_camera.c
index e3e6b29..54df071 100644
--- a/drivers/media/video/pxa_camera.c
+++ b/drivers/media/video/pxa_camera.c
@@ -242,14 +242,13 @@  static int pxa_videobuf_setup(struct videobuf_queue *vq, unsigned int *count,
 	dev_dbg(&icd->dev, "count=%d, size=%d\n", *count, *size);
 
 	/* planar capture requires Y, U and V buffers to be page aligned */
-	if (pcdev->channels == 3) {
-		*size = PAGE_ALIGN(icd->width * icd->height); /* Y pages */
-		*size += PAGE_ALIGN(icd->width * icd->height / 2); /* U pages */
-		*size += PAGE_ALIGN(icd->width * icd->height / 2); /* V pages */
-	} else {
-		*size = icd->width * icd->height *
-			((icd->current_fmt->depth + 7) >> 3);
-	}
+	if (pcdev->channels == 3)
+		*size = roundup(icd->width * icd->height, 8) /* Y pages */
+			+ roundup(icd->width * icd->height / 2, 8) /* U pages */
+			+ roundup(icd->width * icd->height / 2, 8); /* V pages */
+	else
+		*size = roundup(icd->width * icd->height *
+				((icd->current_fmt->depth + 7) >> 3), 8);
 
 	if (0 == *count)
 		*count = 32;
@@ -289,19 +288,63 @@  static void free_buffer(struct videobuf_queue *vq, struct pxa_buffer *buf)
 	buf->vb.state = VIDEOBUF_NEEDS_INIT;
 }
 
+static int calculate_dma_sglen(struct scatterlist *sglist, int sglen,
+			       int sg_first_ofs, int size)
+{
+	int i, offset, dma_len, xfer_len;
+	struct scatterlist *sg;
+
+	offset = sg_first_ofs;
+	for_each_sg(sglist, sg, sglen, i) {
+		dma_len = sg_dma_len(sg);
+
+		/* PXA27x Developer's Manual 27.4.4.1: round up to 8 bytes */
+		xfer_len = roundup(min(dma_len - offset, size), 8);
+
+		size = max(0, size - xfer_len);
+		offset = 0;
+		if (size == 0)
+			break;
+	}
+
+	BUG_ON(size != 0);
+	return i + 1;
+}
+
+/**
+ * pxa_init_dma_channel - init dma descriptors
+ * @pcdev: pxa camera device
+ * @buf: pxa buffer to find pxa dma channel
+ * @dma: dma video buffer
+ * @channel: dma channel (0 => 'Y', 1 => 'U', 2 => 'V')
+ * @cibr: camera read fifo
+ * @size: bytes to transfer
+ * @sg_first: index of first element of sg_list
+ * @sg_first_ofs: offset in first element of sg_list
+ *
+ * Prepares the pxa dma descriptors to transfer one camera channel.
+ * Beware sg_first and sg_first_ofs are both input and output parameters.
+ *
+ * Returns 0
+ */
 static int pxa_init_dma_channel(struct pxa_camera_dev *pcdev,
 				struct pxa_buffer *buf,
 				struct videobuf_dmabuf *dma, int channel,
-				int sglen, int sg_start, int cibr,
-				unsigned int size)
+				int cibr, int size,
+				struct scatterlist **sg_first, int *sg_first_ofs)
 {
 	struct pxa_cam_dma *pxa_dma = &buf->dmas[channel];
-	int i;
+	struct scatterlist *sg;
+	int i, offset, sglen;
+	int dma_len = 0, xfer_len = 0;
 
 	if (pxa_dma->sg_cpu)
 		dma_free_coherent(pcdev->dev, pxa_dma->sg_size,
 				  pxa_dma->sg_cpu, pxa_dma->sg_dma);
 
+	sglen = calculate_dma_sglen(*sg_first, dma->sglen,
+				    *sg_first_ofs, size);
+
 	pxa_dma->sg_size = (sglen + 1) * sizeof(struct pxa_dma_desc);
 	pxa_dma->sg_cpu = dma_alloc_coherent(pcdev->dev, pxa_dma->sg_size,
 					     &pxa_dma->sg_dma, GFP_KERNEL);
@@ -309,27 +352,51 @@  static int pxa_init_dma_channel(struct pxa_camera_dev *pcdev,
 		return -ENOMEM;
 
 	pxa_dma->sglen = sglen;
+	offset = *sg_first_ofs;
 
-	for (i = 0; i < sglen; i++) {
-		int sg_i = sg_start + i;
-		struct scatterlist *sg = dma->sglist;
-		unsigned int dma_len = sg_dma_len(&sg[sg_i]), xfer_len;
+	dev_dbg(pcdev->dev, "DMA: sg_first=%p, sglen=%d, ofs=%d, dma.desc=%x\n",
+		*sg_first, sglen, *sg_first_ofs, pxa_dma->sg_dma);
 
-		pxa_dma->sg_cpu[i].dsadr = pcdev->res->start + cibr;
-		pxa_dma->sg_cpu[i].dtadr = sg_dma_address(&sg[sg_i]);
+
+	for_each_sg(*sg_first, sg, sglen, i) {
+		dma_len = sg_dma_len(sg);
 
 		/* PXA27x Developer's Manual 27.4.4.1: round up to 8 bytes */
-		xfer_len = (min(dma_len, size) + 7) & ~7;
+		xfer_len = roundup(min(dma_len - offset, size), 8);
 
+		size = max(0, size - xfer_len);
+
+		pxa_dma->sg_cpu[i].dsadr = pcdev->res->start + cibr;
+		pxa_dma->sg_cpu[i].dtadr = sg_dma_address(sg) + offset;
 		pxa_dma->sg_cpu[i].dcmd =
 			DCMD_FLOWSRC | DCMD_BURST8 | DCMD_INCTRGADDR | xfer_len;
-		size -= dma_len;
 		pxa_dma->sg_cpu[i].ddadr =
 			pxa_dma->sg_dma + (i + 1) * sizeof(struct pxa_dma_desc);
+
+		dev_vdbg(pcdev->dev, "DMA: desc.%08x->@phys=0x%08x, len=%d\n",
+			 pxa_dma->sg_dma + i * sizeof(struct pxa_dma_desc),
+			 sg_dma_address(sg) + offset, xfer_len);
+		offset = 0;
+
+		if (size == 0)
+			break;
 	}
 
-	pxa_dma->sg_cpu[sglen - 1].ddadr = DDADR_STOP;
-	pxa_dma->sg_cpu[sglen - 1].dcmd |= DCMD_ENDIRQEN;
+	pxa_dma->sg_cpu[sglen].ddadr = DDADR_STOP;
+	pxa_dma->sg_cpu[sglen].dcmd  = DCMD_FLOWSRC | DCMD_BURST8 | DCMD_ENDIRQEN;
+
+	*sg_first_ofs = xfer_len;
+	/*
+	 * Handle 1 special case :
+	 *  - if we finish the DMA transfer in the last 7 bytes of a RAM page
+	 *    then we return the sg element pointing on the next page
+	 */
+	if (*sg_first_ofs >= dma_len) {
+		*sg_first_ofs -= dma_len;
+		*sg_first = sg_next(sg);
+	} else {
+		*sg_first = sg;
+	}
 
 	return 0;
 }
@@ -342,9 +409,7 @@  static int pxa_videobuf_prepare(struct videobuf_queue *vq,
 	struct pxa_camera_dev *pcdev = ici->priv;
 	struct pxa_buffer *buf = container_of(vb, struct pxa_buffer, vb);
 	int ret;
-	int sglen_y,  sglen_yu = 0, sglen_u = 0, sglen_v = 0;
-	int size_y, size_u = 0, size_v = 0;
-
+	int size_y = 0, size_u = 0, size_v = 0;
 	dev_dbg(&icd->dev, "%s (vb=0x%p) 0x%08lx %d\n", __func__,
 		vb, vb->baddr, vb->bsize);
 
@@ -381,53 +446,51 @@  static int pxa_videobuf_prepare(struct videobuf_queue *vq,
 	}
 
 	if (vb->state == VIDEOBUF_NEEDS_INIT) {
-		unsigned int size = vb->size;
+		int size = vb->size;
+		int next_ofs = 0;
 		struct videobuf_dmabuf *dma = videobuf_to_dma(vb);
+		struct scatterlist *sg;
 
 		ret = videobuf_iolock(vq, vb, NULL);
 		if (ret)
 			goto fail;
 
 		if (pcdev->channels == 3) {
-			/* FIXME the calculations should be more precise */
-			sglen_y = dma->sglen / 2;
-			sglen_u = sglen_v = dma->sglen / 4 + 1;
-			sglen_yu = sglen_y + sglen_u;
 			size_y = size / 2;
 			size_u = size_v = size / 4;
 		} else {
-			sglen_y = dma->sglen;
 			size_y = size;
 		}
 
-		/* init DMA for Y channel */
-		ret = pxa_init_dma_channel(pcdev, buf, dma, 0, sglen_y,
-					   0, 0x28, size_y);
+		sg = dma->sglist;
 
+		/* init DMA for Y channel */
+		ret = pxa_init_dma_channel(pcdev, buf, dma, 0, CIBR0, size_y,
+					   &sg, &next_ofs);
 		if (ret) {
 			dev_err(pcdev->dev,
 				"DMA initialization for Y/RGB failed\n");
 			goto fail;
 		}
 
-		if (pcdev->channels == 3) {
-			/* init DMA for U channel */
-			ret = pxa_init_dma_channel(pcdev, buf, dma, 1, sglen_u,
-						   sglen_y, 0x30, size_u);
-			if (ret) {
-				dev_err(pcdev->dev,
-					"DMA initialization for U failed\n");
-				goto fail_u;
-			}
+		/* init DMA for U channel */
+		if (size_u)
+			ret = pxa_init_dma_channel(pcdev, buf, dma, 1, CIBR1,
+						   size_u, &sg, &next_ofs);
+		if (ret) {
+			dev_err(pcdev->dev,
+				"DMA initialization for U failed\n");
+			goto fail_u;
+		}
 
-			/* init DMA for V channel */
-			ret = pxa_init_dma_channel(pcdev, buf, dma, 2, sglen_v,
-						   sglen_yu, 0x38, size_v);
-			if (ret) {
-				dev_err(pcdev->dev,
-					"DMA initialization for V failed\n");
-				goto fail_v;
-			}
+		/* init DMA for V channel */
+		if (size_v)
+			ret = pxa_init_dma_channel(pcdev, buf, dma, 2, CIBR2,
+						   size_u, &sg, &next_ofs);
+		if (ret) {
+			dev_err(pcdev->dev,
+				"DMA initialization for V failed\n");
+			goto fail_v;
 		}
 
 		vb->state = VIDEOBUF_PREPARED;