diff mbox

add raw video support for Samsung SUR40 touchscreen

Message ID 1420626920-9357-1-git-send-email-floe@butterbrot.org (mailing list archive)
State New, archived
Headers show

Commit Message

Florian Echtler Jan. 7, 2015, 10:35 a.m. UTC
This patch add support for the raw video stream from the Samsung SUR40
touchscreen device. Existing input device support is not affected by this
patch and can be used concurrently. videobuf2-dma-contig is used for buffer
management. All tests from current v4l2-compliance -s run pass (see 
http://floe.butterbrot.org/external/results.txt for details).

Note: I'm intentionally using dma-contig instead of vmalloc, as the USB
core apparently _will_ try to use DMA for larger bulk transfers. 

Signed-off-by: Florian Echtler <floe@butterbrot.org>
---
 drivers/input/touchscreen/sur40.c | 423 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 411 insertions(+), 12 deletions(-)

Comments

Hans Verkuil Jan. 19, 2015, 10:38 a.m. UTC | #1
Hi Florian,

Sorry for the delay.

Several comments below...

On 01/07/2015 11:35 AM, Florian Echtler wrote:
> This patch add support for the raw video stream from the Samsung SUR40
> touchscreen device. Existing input device support is not affected by this
> patch and can be used concurrently. videobuf2-dma-contig is used for buffer
> management. All tests from current v4l2-compliance -s run pass (see 
> http://floe.butterbrot.org/external/results.txt for details).
> 
> Note: I'm intentionally using dma-contig instead of vmalloc, as the USB
> core apparently _will_ try to use DMA for larger bulk transfers. 

As far as I can tell from looking through the usb core code it supports
scatter-gather DMA, so you should at least use dma-sg rather than dma-contig.
Physically contiguous memory should always be avoided.

I'm also missing a patch for the Kconfig that adds a dependency on MEDIA_USB_SUPPORT
and that selects VIDEOBUF2_DMA_SG.

> 
> Signed-off-by: Florian Echtler <floe@butterbrot.org>
> ---
>  drivers/input/touchscreen/sur40.c | 423 ++++++++++++++++++++++++++++++++++++--
>  1 file changed, 411 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/input/touchscreen/sur40.c b/drivers/input/touchscreen/sur40.c
> index f1cb051..bcd9ee2 100644
> --- a/drivers/input/touchscreen/sur40.c
> +++ b/drivers/input/touchscreen/sur40.c
> @@ -1,7 +1,7 @@
>  /*
>   * Surface2.0/SUR40/PixelSense input driver
>   *
> - * Copyright (c) 2013 by Florian 'floe' Echtler <floe@butterbrot.org>
> + * Copyright (c) 2014 by Florian 'floe' Echtler <floe@butterbrot.org>
>   *
>   * Derived from the USB Skeleton driver 1.1,
>   * Copyright (c) 2003 Greg Kroah-Hartman (greg@kroah.com)
> @@ -12,6 +12,9 @@
>   * and from the generic hid-multitouch driver,
>   * Copyright (c) 2010-2012 Stephane Chatty <chatty@enac.fr>
>   *
> + * and from the v4l2-pci-skeleton driver,
> + * Copyright (c) Copyright 2014 Cisco Systems, Inc.
> + *
>   * This program is free software; you can redistribute it and/or
>   * modify it under the terms of the GNU General Public License as
>   * published by the Free Software Foundation; either version 2 of
> @@ -31,6 +34,11 @@
>  #include <linux/input-polldev.h>
>  #include <linux/input/mt.h>
>  #include <linux/usb/input.h>
> +#include <linux/videodev2.h>
> +#include <media/v4l2-device.h>
> +#include <media/v4l2-dev.h>
> +#include <media/v4l2-ioctl.h>
> +#include <media/videobuf2-dma-contig.h>
>  
>  /* read 512 bytes from endpoint 0x86 -> get header + blobs */
>  struct sur40_header {
> @@ -82,9 +90,19 @@ struct sur40_data {
>  	struct sur40_blob   blobs[];
>  } __packed;
>  
> +/* read 512 bytes from endpoint 0x82 -> get header below
> + * continue reading 16k blocks until header.size bytes read */
> +struct sur40_image_header {
> +	__le32 magic;     /* "SUBF" */
> +	__le32 packet_id;
> +	__le32 size;      /* always 0x0007e900 = 960x540 */
> +	__le32 timestamp; /* milliseconds (increases by 16 or 17 each frame) */
> +	__le32 unknown;   /* "epoch?" always 02/03 00 00 00 */
> +} __packed;
>  
>  /* version information */
>  #define DRIVER_SHORT   "sur40"
> +#define DRIVER_LONG    "Samsung SUR40"
>  #define DRIVER_AUTHOR  "Florian 'floe' Echtler <floe@butterbrot.org>"
>  #define DRIVER_DESC    "Surface2.0/SUR40/PixelSense input driver"
>  
> @@ -99,6 +117,13 @@ struct sur40_data {
>  /* touch data endpoint */
>  #define TOUCH_ENDPOINT 0x86
>  
> +/* video data endpoint */
> +#define VIDEO_ENDPOINT 0x82
> +
> +/* video header fields */
> +#define VIDEO_HEADER_MAGIC 0x46425553
> +#define VIDEO_PACKET_SIZE  16384
> +
>  /* polling interval (ms) */
>  #define POLL_INTERVAL 10
>  
> @@ -113,21 +138,23 @@ struct sur40_data {
>  #define SUR40_GET_STATE   0xc5 /*  4 bytes state (?) */
>  #define SUR40_GET_SENSORS 0xb1 /*  8 bytes sensors   */
>  
> -/*
> - * Note: an earlier, non-public version of this driver used USB_RECIP_ENDPOINT
> - * here by mistake which is very likely to have corrupted the firmware EEPROM
> - * on two separate SUR40 devices. Thanks to Alan Stern who spotted this bug.
> - * Should you ever run into a similar problem, the background story to this
> - * incident and instructions on how to fix the corrupted EEPROM are available
> - * at https://floe.butterbrot.org/matrix/hacking/surface/brick.html
> -*/
> -
> +/* master device state */
>  struct sur40_state {
>  
>  	struct usb_device *usbdev;
>  	struct device *dev;
>  	struct input_polled_dev *input;
>  
> +	struct v4l2_device v4l2;
> +	struct video_device vdev;
> +	struct mutex lock;
> +
> +	struct vb2_queue queue;
> +	struct vb2_alloc_ctx *alloc_ctx;
> +	struct list_head buf_list;
> +	spinlock_t qlock;
> +	int sequence;
> +
>  	struct sur40_data *bulk_in_buffer;
>  	size_t bulk_in_size;
>  	u8 bulk_in_epaddr;
> @@ -135,6 +162,27 @@ struct sur40_state {
>  	char phys[64];
>  };
>  
> +struct sur40_buffer {
> +	struct vb2_buffer vb;
> +	struct list_head list;
> +};
> +
> +/* forward declarations */
> +static struct video_device sur40_video_device;
> +static struct v4l2_pix_format sur40_video_format;
> +static struct vb2_queue sur40_queue;
> +static void sur40_process_video(struct sur40_state *sur40);
> +
> +/*
> + * Note: an earlier, non-public version of this driver used USB_RECIP_ENDPOINT
> + * here by mistake which is very likely to have corrupted the firmware EEPROM
> + * on two separate SUR40 devices. Thanks to Alan Stern who spotted this bug.
> + * Should you ever run into a similar problem, the background story to this
> + * incident and instructions on how to fix the corrupted EEPROM are available
> + * at https://floe.butterbrot.org/matrix/hacking/surface/brick.html
> +*/
> +
> +/* command wrapper */
>  static int sur40_command(struct sur40_state *dev,
>  			 u8 command, u16 index, void *buffer, u16 size)
>  {
> @@ -247,7 +295,6 @@ static void sur40_report_blob(struct sur40_blob *blob, struct input_dev *input)
>  /* core function: poll for new input data */
>  static void sur40_poll(struct input_polled_dev *polldev)
>  {
> -
>  	struct sur40_state *sur40 = polldev->private;
>  	struct input_dev *input = polldev->input;
>  	int result, bulk_read, need_blobs, packet_blobs, i;
> @@ -314,6 +361,79 @@ static void sur40_poll(struct input_polled_dev *polldev)
>  
>  	input_mt_sync_frame(input);
>  	input_sync(input);
> +
> +	sur40_process_video(sur40);
> +}
> +
> +/* deal with video data */
> +static void sur40_process_video(struct sur40_state *sur40)
> +{
> +
> +	struct sur40_image_header *img = (void *)(sur40->bulk_in_buffer);
> +	int result, bulk_read, bufpos;
> +	struct sur40_buffer *new_buf;
> +	uint8_t *buffer;
> +
> +	if (list_empty(&sur40->buf_list))
> +		return;
> +
> +	/* get a new buffer from the list */
> +	spin_lock(&sur40->qlock);
> +	new_buf = list_entry(sur40->buf_list.next, struct sur40_buffer, list);
> +	list_del(&new_buf->list);
> +	spin_unlock(&sur40->qlock);
> +
> +	/* retrieve data via bulk read */
> +	bufpos = 0;
> +
> +	result = usb_bulk_msg(sur40->usbdev,
> +			usb_rcvbulkpipe(sur40->usbdev, VIDEO_ENDPOINT),
> +			sur40->bulk_in_buffer, sur40->bulk_in_size,
> +			&bulk_read, 1000);
> +
> +	if (result < 0) {
> +		dev_err(sur40->dev, "error in usb_bulk_read\n");
> +		goto err_poll;
> +	}
> +
> +	if (bulk_read != sizeof(struct sur40_image_header)) {
> +		dev_err(sur40->dev, "received %d bytes (%ld expected)\n",
> +			bulk_read, sizeof(struct sur40_image_header));
> +		goto err_poll;
> +	}
> +
> +	if (le32_to_cpu(img->magic) != VIDEO_HEADER_MAGIC) {
> +		dev_err(sur40->dev, "image magic mismatch\n");
> +		goto err_poll;
> +	}
> +
> +	if (le32_to_cpu(img->size) != sur40_video_format.sizeimage) {
> +		dev_err(sur40->dev, "image size mismatch\n");
> +		goto err_poll;
> +	}
> +
> +	buffer = vb2_plane_vaddr(&new_buf->vb, 0);
> +	while (bufpos < sur40_video_format.sizeimage) {
> +		result = usb_bulk_msg(sur40->usbdev,
> +			usb_rcvbulkpipe(sur40->usbdev, VIDEO_ENDPOINT),
> +			buffer+bufpos, VIDEO_PACKET_SIZE,
> +			&bulk_read, 1000);
> +		if (result < 0) {
> +			dev_err(sur40->dev, "error in usb_bulk_read\n");
> +			goto err_poll;
> +		}
> +		bufpos += bulk_read;
> +	}
> +
> +	/* mark as finished */
> +	v4l2_get_timestamp(&new_buf->vb.v4l2_buf.timestamp);
> +	new_buf->vb.v4l2_buf.sequence = sur40->sequence++;
> +	new_buf->vb.v4l2_buf.field = V4L2_FIELD_NONE;
> +	vb2_buffer_done(&new_buf->vb, VB2_BUF_STATE_DONE);
> +	return;
> +
> +err_poll:
> +	vb2_buffer_done(&new_buf->vb, VB2_BUF_STATE_ERROR);
>  }
>  
>  /* Initialize input device parameters. */
> @@ -377,6 +497,11 @@ static int sur40_probe(struct usb_interface *interface,
>  		goto err_free_dev;
>  	}
>  
> +	/* initialize locks/lists */
> +	INIT_LIST_HEAD(&sur40->buf_list);
> +	spin_lock_init(&sur40->qlock);
> +	mutex_init(&sur40->lock);
> +
>  	/* Set up polled input device control structure */
>  	poll_dev->private = sur40;
>  	poll_dev->poll_interval = POLL_INTERVAL;
> @@ -387,7 +512,7 @@ static int sur40_probe(struct usb_interface *interface,
>  	/* Set up regular input device structure */
>  	sur40_input_setup(poll_dev->input);
>  
> -	poll_dev->input->name = "Samsung SUR40";
> +	poll_dev->input->name = DRIVER_LONG;
>  	usb_to_input_id(usbdev, &poll_dev->input->id);
>  	usb_make_path(usbdev, sur40->phys, sizeof(sur40->phys));
>  	strlcat(sur40->phys, "/input0", sizeof(sur40->phys));
> @@ -408,6 +533,7 @@ static int sur40_probe(struct usb_interface *interface,
>  		goto err_free_polldev;
>  	}
>  
> +	/* register the polled input device */
>  	error = input_register_polled_device(poll_dev);
>  	if (error) {
>  		dev_err(&interface->dev,
> @@ -415,12 +541,54 @@ static int sur40_probe(struct usb_interface *interface,
>  		goto err_free_buffer;
>  	}
>  
> +	/* register the video master device */
> +	snprintf(sur40->v4l2.name, sizeof(sur40->v4l2.name), "%s", DRIVER_LONG);
> +	error = v4l2_device_register(sur40->dev, &sur40->v4l2);
> +	if (error) {
> +		dev_err(&interface->dev,
> +			"Unable to register video master device.");
> +		goto err_unreg_v4l2;
> +	}
> +
> +	/* initialize the lock and subdevice */
> +	sur40->queue = sur40_queue;
> +	sur40->queue.drv_priv = sur40;
> +	sur40->queue.lock = &sur40->lock;
> +
> +	/* initialize the queue */
> +	error = vb2_queue_init(&sur40->queue);
> +	if (error)
> +		goto err_unreg_v4l2;
> +
> +	sur40->alloc_ctx = vb2_dma_contig_init_ctx(sur40->dev);
> +	if (IS_ERR(sur40->alloc_ctx)) {
> +		dev_err(sur40->dev, "Can't allocate buffer context");
> +		goto err_unreg_v4l2;
> +	}
> +
> +	sur40->vdev = sur40_video_device;
> +	sur40->vdev.v4l2_dev = &sur40->v4l2;
> +	sur40->vdev.lock = &sur40->lock;
> +	sur40->vdev.queue = &sur40->queue;
> +	video_set_drvdata(&sur40->vdev, sur40);
> +
> +	error = video_register_device(&sur40->vdev, VFL_TYPE_GRABBER, -1);
> +	if (error) {
> +		dev_err(&interface->dev,
> +			"Unable to register video subdevice.");
> +		goto err_unreg_video;
> +	}
> +
>  	/* we can register the device now, as it is ready */
>  	usb_set_intfdata(interface, sur40);
>  	dev_dbg(&interface->dev, "%s is now attached\n", DRIVER_DESC);
>  
>  	return 0;
>  
> +err_unreg_video:
> +	video_unregister_device(&sur40->vdev);
> +err_unreg_v4l2:
> +	v4l2_device_unregister(&sur40->v4l2);
>  err_free_buffer:
>  	kfree(sur40->bulk_in_buffer);
>  err_free_polldev:
> @@ -436,6 +604,10 @@ static void sur40_disconnect(struct usb_interface *interface)

Is this a hardwired device or hotpluggable? If it is hardwired, then this code is
OK, but if it is hotpluggable, then this isn't good enough.

>  {
>  	struct sur40_state *sur40 = usb_get_intfdata(interface);
>  
> +	v4l2_device_unregister(&sur40->v4l2);
> +	video_unregister_device(&sur40->vdev);

Swap these two line: unregister the video device first, then the v4l2_device.

> +	vb2_dma_contig_cleanup_ctx(sur40->alloc_ctx);
> +
>  	input_unregister_polled_device(sur40->input);
>  	input_free_polled_device(sur40->input);
>  	kfree(sur40->bulk_in_buffer);
> @@ -445,12 +617,239 @@ static void sur40_disconnect(struct usb_interface *interface)
>  	dev_dbg(&interface->dev, "%s is now disconnected\n", DRIVER_DESC);
>  }
>  
> +/*
> + * Setup the constraints of the queue: besides setting the number of planes
> + * per buffer and the size and allocation context of each plane, it also
> + * checks if sufficient buffers have been allocated. Usually 3 is a good
> + * minimum number: many DMA engines need a minimum of 2 buffers in the
> + * queue and you need to have another available for userspace processing.
> + */
> +static int sur40_queue_setup(struct vb2_queue *q, const struct v4l2_format *fmt,
> +		       unsigned int *nbuffers, unsigned int *nplanes,
> +		       unsigned int sizes[], void *alloc_ctxs[])
> +{
> +	struct sur40_state *sur40 = vb2_get_drv_priv(q);
> +
> +	if (q->num_buffers + *nbuffers < 3)
> +		*nbuffers = 3 - q->num_buffers;
> +
> +	if (fmt && fmt->fmt.pix.sizeimage < sur40_video_format.sizeimage)
> +		return -EINVAL;
> +
> +	*nplanes = 1;
> +	sizes[0] = fmt ? fmt->fmt.pix.sizeimage : sur40_video_format.sizeimage;
> +	alloc_ctxs[0] = sur40->alloc_ctx;
> +
> +	return 0;
> +}
> +
> +/*
> + * Prepare the buffer for queueing to the DMA engine: check and set the
> + * payload size.
> + */
> +static int sur40_buffer_prepare(struct vb2_buffer *vb)
> +{
> +	struct sur40_state *sur40 = vb2_get_drv_priv(vb->vb2_queue);
> +	unsigned long size = sur40_video_format.sizeimage;
> +
> +	if (vb2_plane_size(vb, 0) < size) {
> +		dev_err(&sur40->usbdev->dev, "buffer too small (%lu < %lu)\n",
> +			 vb2_plane_size(vb, 0), size);
> +		return -EINVAL;
> +	}
> +
> +	vb2_set_plane_payload(vb, 0, size);
> +	return 0;
> +}
> +
> +/*
> + * Queue this buffer to the DMA engine.
> + */
> +static void sur40_buffer_queue(struct vb2_buffer *vb)
> +{
> +	struct sur40_state *sur40 = vb2_get_drv_priv(vb->vb2_queue);
> +	struct sur40_buffer *buf = (struct sur40_buffer *)vb;
> +
> +	spin_lock(&sur40->qlock);
> +	list_add_tail(&buf->list, &sur40->buf_list);
> +	spin_unlock(&sur40->qlock);
> +}
> +
> +static void return_all_buffers(struct sur40_state *sur40,
> +			       enum vb2_buffer_state state)
> +{
> +	struct sur40_buffer *buf, *node;
> +
> +	spin_lock(&sur40->qlock);
> +	list_for_each_entry_safe(buf, node, &sur40->buf_list, list) {
> +		vb2_buffer_done(&buf->vb, state);
> +		list_del(&buf->list);
> +	}
> +	spin_unlock(&sur40->qlock);
> +}
> +
> +/*
> + * Start streaming. First check if the minimum number of buffers have been
> + * queued. If not, then return -ENOBUFS and the vb2 framework will call
> + * this function again the next time a buffer has been queued until enough
> + * buffers are available to actually start the DMA engine.
> + */
> +static int sur40_start_streaming(struct vb2_queue *vq, unsigned int count)
> +{
> +	struct sur40_state *sur40 = vb2_get_drv_priv(vq);
> +
> +	sur40->sequence = 0;
> +	return 0;
> +}
> +
> +/*
> + * Stop the DMA engine. Any remaining buffers in the DMA queue are dequeued
> + * and passed on to the vb2 framework marked as STATE_ERROR.
> + */
> +static void sur40_stop_streaming(struct vb2_queue *vq)
> +{
> +	struct sur40_state *sur40 = vb2_get_drv_priv(vq);

Add empty line between variable declarations and code.

> +	/* Release all active buffers */
> +	return_all_buffers(sur40, VB2_BUF_STATE_ERROR);
> +}
> +
> +/* V4L ioctl */
> +static int sur40_vidioc_querycap(struct file *file, void *priv,
> +				 struct v4l2_capability *cap)
> +{
> +	struct sur40_state *sur40 = video_drvdata(file);
> +
> +	strlcpy(cap->driver, DRIVER_SHORT, sizeof(cap->driver));
> +	strlcpy(cap->card, DRIVER_LONG, sizeof(cap->card));
> +	usb_make_path(sur40->usbdev, cap->bus_info, sizeof(cap->bus_info));
> +	cap->device_caps = V4L2_CAP_VIDEO_CAPTURE |
> +		V4L2_CAP_READWRITE |
> +		V4L2_CAP_STREAMING;
> +	cap->capabilities = cap->device_caps | V4L2_CAP_DEVICE_CAPS;
> +	return 0;
> +}
> +
> +static int sur40_vidioc_enum_input(struct file *file, void *priv,
> +				   struct v4l2_input *i)
> +{
> +	if (i->index != 0)
> +		return -EINVAL;
> +	i->type = V4L2_INPUT_TYPE_CAMERA;
> +	i->std = V4L2_STD_UNKNOWN;
> +	strlcpy(i->name, "In-Cell Sensor", sizeof(i->name));

Perhaps just say "Sensor" here? I'm not sure what "In-Cell" means.

> +	i->capabilities = 0;
> +	return 0;
> +}
> +
> +static int sur40_vidioc_s_input(struct file *file, void *priv, unsigned int i)
> +{
> +	return (i == 0) ? 0 : -EINVAL;
> +}
> +
> +static int sur40_vidioc_g_input(struct file *file, void *priv, unsigned int *i)
> +{
> +	*i = 0;
> +	return 0;
> +}
> +
> +static int sur40_vidioc_fmt(struct file *file, void *priv,
> +			    struct v4l2_format *f)
> +{
> +	f->fmt.pix = sur40_video_format;
> +	return 0;
> +}
> +
> +static int sur40_vidioc_enum_fmt(struct file *file, void *priv,
> +				 struct v4l2_fmtdesc *f)
> +{
> +	if (f->index != 0)
> +		return -EINVAL;
> +	strlcpy(f->description, "8-bit greyscale", sizeof(f->description));
> +	f->pixelformat = V4L2_PIX_FMT_GREY;
> +	f->flags = 0;
> +	return 0;
> +}
> +
>  static const struct usb_device_id sur40_table[] = {
>  	{ USB_DEVICE(ID_MICROSOFT, ID_SUR40) },  /* Samsung SUR40 */
>  	{ }                                      /* terminating null entry */
>  };
>  MODULE_DEVICE_TABLE(usb, sur40_table);
>  
> +/* V4L2 structures */
> +static struct vb2_ops sur40_queue_ops = {

This should be const.

> +	.queue_setup		= sur40_queue_setup,
> +	.buf_prepare		= sur40_buffer_prepare,
> +	.buf_queue		= sur40_buffer_queue,
> +	.start_streaming	= sur40_start_streaming,
> +	.stop_streaming		= sur40_stop_streaming,
> +	.wait_prepare		= vb2_ops_wait_prepare,
> +	.wait_finish		= vb2_ops_wait_finish,
> +};
> +
> +static struct vb2_queue sur40_queue = {

Make this const.

> +	.type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
> +	.io_modes = VB2_MMAP | VB2_READ,

Please add VB2_DMABUF and VB2_USERPTR.

> +	.buf_struct_size = sizeof(struct sur40_buffer),
> +	.ops = &sur40_queue_ops,
> +	.mem_ops = &vb2_dma_contig_memops,
> +	.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC,
> +	.min_buffers_needed = 3,
> +	.gfp_flags = GFP_DMA,

Drop this. You only set this if there are specific limitations to the DMA.
This line will limit the buffer to the bottom 16 MB only, and that's not
what you want.

> +};
> +
> +static const struct v4l2_file_operations sur40_video_fops = {
> +	.owner = THIS_MODULE,
> +	.open = v4l2_fh_open,
> +	.release = vb2_fop_release,
> +	.unlocked_ioctl = video_ioctl2,
> +	.read = vb2_fop_read,
> +	.mmap = vb2_fop_mmap,
> +	.poll = vb2_fop_poll,
> +};
> +
> +static const struct v4l2_ioctl_ops sur40_video_ioctl_ops = {
> +
> +	.vidioc_querycap	= sur40_vidioc_querycap,
> +
> +	.vidioc_enum_fmt_vid_cap = sur40_vidioc_enum_fmt,
> +	.vidioc_try_fmt_vid_cap	= sur40_vidioc_fmt,
> +	.vidioc_s_fmt_vid_cap	= sur40_vidioc_fmt,
> +	.vidioc_g_fmt_vid_cap	= sur40_vidioc_fmt,
> +
> +	.vidioc_enum_input	= sur40_vidioc_enum_input,
> +	.vidioc_g_input		= sur40_vidioc_g_input,
> +	.vidioc_s_input		= sur40_vidioc_s_input,
> +
> +	.vidioc_reqbufs		= vb2_ioctl_reqbufs,
> +	.vidioc_create_bufs	= vb2_ioctl_create_bufs,
> +	.vidioc_querybuf	= vb2_ioctl_querybuf,
> +	.vidioc_qbuf		= vb2_ioctl_qbuf,
> +	.vidioc_dqbuf		= vb2_ioctl_dqbuf,
> +	.vidioc_expbuf		= vb2_ioctl_expbuf,
> +
> +	.vidioc_streamon	= vb2_ioctl_streamon,
> +	.vidioc_streamoff	= vb2_ioctl_streamoff,
> +};
> +
> +static struct video_device sur40_video_device = {

const

> +	.name = DRIVER_LONG,
> +	.fops = &sur40_video_fops,
> +	.ioctl_ops = &sur40_video_ioctl_ops,
> +	.release = video_device_release_empty,
> +};
> +
> +static struct v4l2_pix_format sur40_video_format = {

const 

> +	.pixelformat = V4L2_PIX_FMT_GREY,
> +	.width  = SENSOR_RES_X / 2,
> +	.height = SENSOR_RES_Y / 2,
> +	.field = V4L2_FIELD_NONE,
> +	.colorspace = V4L2_COLORSPACE_SRGB,
> +	.bytesperline = SENSOR_RES_X / 2,
> +	.sizeimage = (SENSOR_RES_X/2) * (SENSOR_RES_Y/2),
> +	.priv = 0,

Remove the 'priv = 0' line. Not needed.

> +};
> +
>  /* USB-specific object needed to register this driver with the USB subsystem. */
>  static struct usb_driver sur40_driver = {
>  	.name = DRIVER_SHORT,
> 

Regards,

	Hans
--
To unsubscribe from this list: send the line "unsubscribe linux-input" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Florian Echtler Jan. 20, 2015, 9:24 a.m. UTC | #2
Hello Hans,

On 19.01.2015 11:38, Hans Verkuil wrote:
> Sorry for the delay.
No problem, thanks for your feedback.

>> Note: I'm intentionally using dma-contig instead of vmalloc, as the USB
>> core apparently _will_ try to use DMA for larger bulk transfers. 
> As far as I can tell from looking through the usb core code it supports
> scatter-gather DMA, so you should at least use dma-sg rather than dma-contig.
> Physically contiguous memory should always be avoided.
OK, will this work transparently (i.e. just switch from *-contig-* to
*-sg-*)? If not, can you suggest an example driver to use as template?

> I'm also missing a patch for the Kconfig that adds a dependency on MEDIA_USB_SUPPORT
> and that selects VIDEOBUF2_DMA_SG.
Good point, will add that.

>> +err_unreg_video:
>> +	video_unregister_device(&sur40->vdev);
>> +err_unreg_v4l2:
>> +	v4l2_device_unregister(&sur40->v4l2);
>>  err_free_buffer:
>>  	kfree(sur40->bulk_in_buffer);
>>  err_free_polldev:
>> @@ -436,6 +604,10 @@ static void sur40_disconnect(struct usb_interface *interface)
> Is this a hardwired device or hotpluggable? If it is hardwired, then this code is
> OK, but if it is hotpluggable, then this isn't good enough.
It's hardwired. Out of curiosity, what would I have to change for a
hotpluggable one?

>> +	i->type = V4L2_INPUT_TYPE_CAMERA;
>> +	i->std = V4L2_STD_UNKNOWN;
>> +	strlcpy(i->name, "In-Cell Sensor", sizeof(i->name));
> Perhaps just say "Sensor" here? I'm not sure what "In-Cell" means.
In-cell is referring to the concept of integrating sensor pixels
directly with LCD pixels, I think it's what Samsung calls it.

Thanks & best regards, Florian
Hans Verkuil Jan. 20, 2015, 9:30 a.m. UTC | #3
On 01/20/15 10:24, Florian Echtler wrote:
> Hello Hans,
> 
> On 19.01.2015 11:38, Hans Verkuil wrote:
>> Sorry for the delay.
> No problem, thanks for your feedback.
> 
>>> Note: I'm intentionally using dma-contig instead of vmalloc, as the USB
>>> core apparently _will_ try to use DMA for larger bulk transfers. 
>> As far as I can tell from looking through the usb core code it supports
>> scatter-gather DMA, so you should at least use dma-sg rather than dma-contig.
>> Physically contiguous memory should always be avoided.
> OK, will this work transparently (i.e. just switch from *-contig-* to
> *-sg-*)? If not, can you suggest an example driver to use as template?

Yes, that should pretty much be seamless. BTW, the more I think about it,
the more I am convinced that DMA will also be used by the USB core when
you use videobuf2-vmalloc.

I've CC-ed Laurent, I think he knows a lot more about this than I do.

Laurent, when does the USB core use DMA? What do you need to do on the
driver side to have USB use DMA when doing bulk transfers?

> 
>> I'm also missing a patch for the Kconfig that adds a dependency on MEDIA_USB_SUPPORT
>> and that selects VIDEOBUF2_DMA_SG.
>
> Good point, will add that.
> 
>>> +err_unreg_video:
>>> +	video_unregister_device(&sur40->vdev);
>>> +err_unreg_v4l2:
>>> +	v4l2_device_unregister(&sur40->v4l2);
>>>  err_free_buffer:
>>>  	kfree(sur40->bulk_in_buffer);
>>>  err_free_polldev:
>>> @@ -436,6 +604,10 @@ static void sur40_disconnect(struct usb_interface *interface)
>>
>> Is this a hardwired device or hotpluggable? If it is hardwired, then this code is
>> OK, but if it is hotpluggable, then this isn't good enough.
>
> It's hardwired. Out of curiosity, what would I have to change for a
> hotpluggable one?

In that case you can't clean everything up since some application might
still have a filehandle open. You have to wait until the very last filehandle
is closed.

> 
>>> +	i->type = V4L2_INPUT_TYPE_CAMERA;
>>> +	i->std = V4L2_STD_UNKNOWN;
>>> +	strlcpy(i->name, "In-Cell Sensor", sizeof(i->name));
>
>> Perhaps just say "Sensor" here? I'm not sure what "In-Cell" means.
>
> In-cell is referring to the concept of integrating sensor pixels
> directly with LCD pixels, I think it's what Samsung calls it.
> 
> Thanks & best regards, Florian
> 

Regards,

	Hans
--
To unsubscribe from this list: send the line "unsubscribe linux-input" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Laurent Pinchart Jan. 20, 2015, 12:59 p.m. UTC | #4
Hello,

On Tuesday 20 January 2015 10:30:07 Hans Verkuil wrote:
> On 01/20/15 10:24, Florian Echtler wrote:
> > On 19.01.2015 11:38, Hans Verkuil wrote:
> >> Sorry for the delay.
> > 
> > No problem, thanks for your feedback.
> > 
> >>> Note: I'm intentionally using dma-contig instead of vmalloc, as the USB
> >>> core apparently _will_ try to use DMA for larger bulk transfers.
> >> 
> >> As far as I can tell from looking through the usb core code it supports
> >> scatter-gather DMA, so you should at least use dma-sg rather than
> >> dma-contig. Physically contiguous memory should always be avoided.
> > 
> > OK, will this work transparently (i.e. just switch from *-contig-* to
> > *-sg-*)? If not, can you suggest an example driver to use as template?
> 
> Yes, that should pretty much be seamless. BTW, the more I think about it,
> the more I am convinced that DMA will also be used by the USB core when
> you use videobuf2-vmalloc.
> 
> I've CC-ed Laurent, I think he knows a lot more about this than I do.
> 
> Laurent, when does the USB core use DMA? What do you need to do on the
> driver side to have USB use DMA when doing bulk transfers?

How USB HCD drivers map buffers for DMA is HCD-specific, but all drivers 
exepct ehci-tegra, max3421-hcd and musb use the default implementation 
usb_hcd_map_urb_for_dma() (in drivers/usb/core/hcd.c).

Unless the buffer has already been mapped by the USB driver (in which case the 
driver will have set the URB_NO_TRANSFER_DMA_MAP flag in urb->transfer_flags 
and initialized the urb->transfer_dma field), the function will use 
dma_map_sg(), dma_map_page() or dma_map_single() depending on the buffer type 
(controlled through urb->sg and urb->num_sgs). DMA will thus always be used 
*expect* if the platform uses bounce buffers when the buffer can't be mapped 
directly for DMA.

> >> I'm also missing a patch for the Kconfig that adds a dependency on
> >> MEDIA_USB_SUPPORT and that selects VIDEOBUF2_DMA_SG.
> > 
> > Good point, will add that.
> > 
> >>> +err_unreg_video:
> >>> +	video_unregister_device(&sur40->vdev);
> >>> +err_unreg_v4l2:
> >>> +	v4l2_device_unregister(&sur40->v4l2);
> >>> 
> >>>  err_free_buffer:
> >>>  	kfree(sur40->bulk_in_buffer);
> >>>  
> >>>  err_free_polldev:
> >>> @@ -436,6 +604,10 @@ static void sur40_disconnect(struct usb_interface
> >>> *interface)>> 
> >> Is this a hardwired device or hotpluggable? If it is hardwired, then this
> >> code is OK, but if it is hotpluggable, then this isn't good enough.
> > 
> > It's hardwired. Out of curiosity, what would I have to change for a
> > hotpluggable one?
> 
> In that case you can't clean everything up since some application might
> still have a filehandle open. You have to wait until the very last
> filehandle is closed.
> 
> >>> +	i->type = V4L2_INPUT_TYPE_CAMERA;
> >>> +	i->std = V4L2_STD_UNKNOWN;
> >>> +	strlcpy(i->name, "In-Cell Sensor", sizeof(i->name));
> >> 
> >> Perhaps just say "Sensor" here? I'm not sure what "In-Cell" means.
> > 
> > In-cell is referring to the concept of integrating sensor pixels
> > directly with LCD pixels, I think it's what Samsung calls it.
Hans Verkuil Jan. 20, 2015, 1:03 p.m. UTC | #5
On 01/20/15 13:59, Laurent Pinchart wrote:
> Hello,
> 
> On Tuesday 20 January 2015 10:30:07 Hans Verkuil wrote:
>> On 01/20/15 10:24, Florian Echtler wrote:
>>> On 19.01.2015 11:38, Hans Verkuil wrote:
>>>> Sorry for the delay.
>>>
>>> No problem, thanks for your feedback.
>>>
>>>>> Note: I'm intentionally using dma-contig instead of vmalloc, as the USB
>>>>> core apparently _will_ try to use DMA for larger bulk transfers.
>>>>
>>>> As far as I can tell from looking through the usb core code it supports
>>>> scatter-gather DMA, so you should at least use dma-sg rather than
>>>> dma-contig. Physically contiguous memory should always be avoided.
>>>
>>> OK, will this work transparently (i.e. just switch from *-contig-* to
>>> *-sg-*)? If not, can you suggest an example driver to use as template?
>>
>> Yes, that should pretty much be seamless. BTW, the more I think about it,
>> the more I am convinced that DMA will also be used by the USB core when
>> you use videobuf2-vmalloc.
>>
>> I've CC-ed Laurent, I think he knows a lot more about this than I do.
>>
>> Laurent, when does the USB core use DMA? What do you need to do on the
>> driver side to have USB use DMA when doing bulk transfers?
> 
> How USB HCD drivers map buffers for DMA is HCD-specific, but all drivers 
> exepct ehci-tegra, max3421-hcd and musb use the default implementation 
> usb_hcd_map_urb_for_dma() (in drivers/usb/core/hcd.c).
> 
> Unless the buffer has already been mapped by the USB driver (in which case the 
> driver will have set the URB_NO_TRANSFER_DMA_MAP flag in urb->transfer_flags 
> and initialized the urb->transfer_dma field), the function will use 
> dma_map_sg(), dma_map_page() or dma_map_single() depending on the buffer type 
> (controlled through urb->sg and urb->num_sgs). DMA will thus always be used 
> *expect* if the platform uses bounce buffers when the buffer can't be mapped 
> directly for DMA.

So we can safely use videobuf2-vmalloc, right?

Regards,

	Hans

> 
>>>> I'm also missing a patch for the Kconfig that adds a dependency on
>>>> MEDIA_USB_SUPPORT and that selects VIDEOBUF2_DMA_SG.
>>>
>>> Good point, will add that.
>>>
>>>>> +err_unreg_video:
>>>>> +	video_unregister_device(&sur40->vdev);
>>>>> +err_unreg_v4l2:
>>>>> +	v4l2_device_unregister(&sur40->v4l2);
>>>>>
>>>>>  err_free_buffer:
>>>>>  	kfree(sur40->bulk_in_buffer);
>>>>>  
>>>>>  err_free_polldev:
>>>>> @@ -436,6 +604,10 @@ static void sur40_disconnect(struct usb_interface
>>>>> *interface)>> 
>>>> Is this a hardwired device or hotpluggable? If it is hardwired, then this
>>>> code is OK, but if it is hotpluggable, then this isn't good enough.
>>>
>>> It's hardwired. Out of curiosity, what would I have to change for a
>>> hotpluggable one?
>>
>> In that case you can't clean everything up since some application might
>> still have a filehandle open. You have to wait until the very last
>> filehandle is closed.
>>
>>>>> +	i->type = V4L2_INPUT_TYPE_CAMERA;
>>>>> +	i->std = V4L2_STD_UNKNOWN;
>>>>> +	strlcpy(i->name, "In-Cell Sensor", sizeof(i->name));
>>>>
>>>> Perhaps just say "Sensor" here? I'm not sure what "In-Cell" means.
>>>
>>> In-cell is referring to the concept of integrating sensor pixels
>>> directly with LCD pixels, I think it's what Samsung calls it.
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-input" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Laurent Pinchart Jan. 20, 2015, 1:06 p.m. UTC | #6
Hi Hans,

On Tuesday 20 January 2015 14:03:00 Hans Verkuil wrote:
> On 01/20/15 13:59, Laurent Pinchart wrote:
> > On Tuesday 20 January 2015 10:30:07 Hans Verkuil wrote:
> >> On 01/20/15 10:24, Florian Echtler wrote:
> >>> On 19.01.2015 11:38, Hans Verkuil wrote:
> >>>> Sorry for the delay.
> >>> 
> >>> No problem, thanks for your feedback.
> >>> 
> >>>>> Note: I'm intentionally using dma-contig instead of vmalloc, as the
> >>>>> USB core apparently _will_ try to use DMA for larger bulk transfers.
> >>>> 
> >>>> As far as I can tell from looking through the usb core code it supports
> >>>> scatter-gather DMA, so you should at least use dma-sg rather than
> >>>> dma-contig. Physically contiguous memory should always be avoided.
> >>> 
> >>> OK, will this work transparently (i.e. just switch from *-contig-* to
> >>> *-sg-*)? If not, can you suggest an example driver to use as template?
> >> 
> >> Yes, that should pretty much be seamless. BTW, the more I think about it,
> >> the more I am convinced that DMA will also be used by the USB core when
> >> you use videobuf2-vmalloc.
> >> 
> >> I've CC-ed Laurent, I think he knows a lot more about this than I do.
> >> 
> >> Laurent, when does the USB core use DMA? What do you need to do on the
> >> driver side to have USB use DMA when doing bulk transfers?
> > 
> > How USB HCD drivers map buffers for DMA is HCD-specific, but all drivers
> > exepct ehci-tegra, max3421-hcd and musb use the default implementation
> > usb_hcd_map_urb_for_dma() (in drivers/usb/core/hcd.c).
> > 
> > Unless the buffer has already been mapped by the USB driver (in which case
> > the driver will have set the URB_NO_TRANSFER_DMA_MAP flag in
> > urb->transfer_flags and initialized the urb->transfer_dma field), the
> > function will use dma_map_sg(), dma_map_page() or dma_map_single()
> > depending on the buffer type (controlled through urb->sg and
> > urb->num_sgs). DMA will thus always be used *expect* if the platform uses
> > bounce buffers when the buffer can't be mapped directly for DMA.
> 
> So we can safely use videobuf2-vmalloc, right?

That depends on the platform and whether it can DMA to vmalloc'ed memory :-) 
To be totally safe I think vb2-dma-sg would be better, but I'm not sure it's 
worth the trouble. uvcvideo uses vb2-vmalloc as it performs a memcpy anyway.

> >>>> I'm also missing a patch for the Kconfig that adds a dependency on
> >>>> MEDIA_USB_SUPPORT and that selects VIDEOBUF2_DMA_SG.
> >>> 
> >>> Good point, will add that.
> >>> 
> >>>>> +err_unreg_video:
> >>>>> +	video_unregister_device(&sur40->vdev);
> >>>>> +err_unreg_v4l2:
> >>>>> +	v4l2_device_unregister(&sur40->v4l2);
> >>>>> 
> >>>>>  err_free_buffer:
> >>>>>  	kfree(sur40->bulk_in_buffer);
> >>>>>  
> >>>>>  err_free_polldev:
> >>>>> @@ -436,6 +604,10 @@ static void sur40_disconnect(struct usb_interface
> >>>>> *interface)>>
> >>>> 
> >>>> Is this a hardwired device or hotpluggable? If it is hardwired, then
> >>>> this code is OK, but if it is hotpluggable, then this isn't good
> >>>> enough.
> >>> 
> >>> It's hardwired. Out of curiosity, what would I have to change for a
> >>> hotpluggable one?
> >> 
> >> In that case you can't clean everything up since some application might
> >> still have a filehandle open. You have to wait until the very last
> >> filehandle is closed.
> >> 
> >>>>> +	i->type = V4L2_INPUT_TYPE_CAMERA;
> >>>>> +	i->std = V4L2_STD_UNKNOWN;
> >>>>> +	strlcpy(i->name, "In-Cell Sensor", sizeof(i->name));
> >>>> 
> >>>> Perhaps just say "Sensor" here? I'm not sure what "In-Cell" means.
> >>> 
> >>> In-cell is referring to the concept of integrating sensor pixels
> >>> directly with LCD pixels, I think it's what Samsung calls it.
Florian Echtler Jan. 21, 2015, 1:28 p.m. UTC | #7
Hello everyone,

On 20.01.2015 14:06, Laurent Pinchart wrote:
> On Tuesday 20 January 2015 14:03:00 Hans Verkuil wrote:
>> On 01/20/15 13:59, Laurent Pinchart wrote:
>>> On Tuesday 20 January 2015 10:30:07 Hans Verkuil wrote:
>>>> I've CC-ed Laurent, I think he knows a lot more about this than I do.
>>>>
>>>> Laurent, when does the USB core use DMA? What do you need to do on the
>>>> driver side to have USB use DMA when doing bulk transfers?
>>>
>>> How USB HCD drivers map buffers for DMA is HCD-specific, but all drivers
>>> exepct ehci-tegra, max3421-hcd and musb use the default implementation
>>> usb_hcd_map_urb_for_dma() (in drivers/usb/core/hcd.c).
>>>
>>> Unless the buffer has already been mapped by the USB driver (in which case
>>> the driver will have set the URB_NO_TRANSFER_DMA_MAP flag in
>>> urb->transfer_flags and initialized the urb->transfer_dma field), the
>>> function will use dma_map_sg(), dma_map_page() or dma_map_single()
>>> depending on the buffer type (controlled through urb->sg and
>>> urb->num_sgs). DMA will thus always be used *expect* if the platform uses
>>> bounce buffers when the buffer can't be mapped directly for DMA.
>>
>> So we can safely use videobuf2-vmalloc, right?
> 
> That depends on the platform and whether it can DMA to vmalloc'ed memory :-) 
> To be totally safe I think vb2-dma-sg would be better, but I'm not sure it's 
> worth the trouble. uvcvideo uses vb2-vmalloc as it performs a memcpy anyway.

The SUR40 sends raw video data without any headers over the bulk
endpoint in blocks of 16k, so I'm assuming that in this specific case,
vb2-dma-sg would be the most efficient choice?

On that note, I've seen that vb2_dma_sg_{init|cleanup}_ctx will appear
only in 3.19. If I want to maintain a backwards-compatible version for
older kernels, what do I use in that case?

Best, Florian
Hans Verkuil Jan. 21, 2015, 1:29 p.m. UTC | #8
On 01/21/15 14:28, Florian Echtler wrote:
> Hello everyone,
> 
> On 20.01.2015 14:06, Laurent Pinchart wrote:
>> On Tuesday 20 January 2015 14:03:00 Hans Verkuil wrote:
>>> On 01/20/15 13:59, Laurent Pinchart wrote:
>>>> On Tuesday 20 January 2015 10:30:07 Hans Verkuil wrote:
>>>>> I've CC-ed Laurent, I think he knows a lot more about this than I do.
>>>>>
>>>>> Laurent, when does the USB core use DMA? What do you need to do on the
>>>>> driver side to have USB use DMA when doing bulk transfers?
>>>>
>>>> How USB HCD drivers map buffers for DMA is HCD-specific, but all drivers
>>>> exepct ehci-tegra, max3421-hcd and musb use the default implementation
>>>> usb_hcd_map_urb_for_dma() (in drivers/usb/core/hcd.c).
>>>>
>>>> Unless the buffer has already been mapped by the USB driver (in which case
>>>> the driver will have set the URB_NO_TRANSFER_DMA_MAP flag in
>>>> urb->transfer_flags and initialized the urb->transfer_dma field), the
>>>> function will use dma_map_sg(), dma_map_page() or dma_map_single()
>>>> depending on the buffer type (controlled through urb->sg and
>>>> urb->num_sgs). DMA will thus always be used *expect* if the platform uses
>>>> bounce buffers when the buffer can't be mapped directly for DMA.
>>>
>>> So we can safely use videobuf2-vmalloc, right?
>>
>> That depends on the platform and whether it can DMA to vmalloc'ed memory :-) 
>> To be totally safe I think vb2-dma-sg would be better, but I'm not sure it's 
>> worth the trouble. uvcvideo uses vb2-vmalloc as it performs a memcpy anyway.
> 
> The SUR40 sends raw video data without any headers over the bulk
> endpoint in blocks of 16k, so I'm assuming that in this specific case,
> vb2-dma-sg would be the most efficient choice?
> 
> On that note, I've seen that vb2_dma_sg_{init|cleanup}_ctx will appear
> only in 3.19. If I want to maintain a backwards-compatible version for
> older kernels, what do I use in that case?

Easiest would actually be to copy all the videobuf2 sources and headers
to that older kernel.

Obviously, for upstreaming you should always use the latest APIs and
never use backwards-compatible constructs.

Regards,

	Hans

--
To unsubscribe from this list: send the line "unsubscribe linux-input" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Florian Echtler Jan. 29, 2015, 9:35 p.m. UTC | #9
Hello again,

On 21.01.2015 14:29, Hans Verkuil wrote:
> On 01/21/15 14:28, Florian Echtler wrote:
>> On 20.01.2015 14:06, Laurent Pinchart wrote:
>>> That depends on the platform and whether it can DMA to vmalloc'ed memory :-) 
>>> To be totally safe I think vb2-dma-sg would be better, but I'm not sure it's 
>>> worth the trouble. uvcvideo uses vb2-vmalloc as it performs a memcpy anyway.
>> The SUR40 sends raw video data without any headers over the bulk
>> endpoint in blocks of 16k, so I'm assuming that in this specific case,
>> vb2-dma-sg would be the most efficient choice?
I'm still having a couple of issues sorting out the correct way to
provide DMA access for my driver. I've integrated most of your
suggestions, but I still can't switch from dma-contig to dma-sg.

As far as I understood it, there is no further initialization required
besides using vb2_dma_sg_memops, vb2_dma_sg_init_ctx and
vb2_dma_sg_cleanup_ctx instead of the respective -contig- calls, correct?

However, as soon as I swap the relevant function calls, the video image
stays black and in dmesg, I get the following warning:

------------[ cut here ]------------
WARNING: CPU: 1 PID: 37 at
/home/kernel/COD/linux/drivers/usb/core/hcd.c:1504
usb_hcd_map_urb_for_dma+0x4eb/0x500()
transfer buffer not dma capable
Modules linked in: sur40(OE) videobuf2_dma_contig videobuf2_dma_sg
videobuf2_vmalloc videobuf2_memops videobuf2_core v4l2_common videodev
media dm_crypt joydev input_polldev wl(POE) snd_hda_codec_realtek
snd_hda_codec_generic snd_hda_codec_hdmi snd_hda_intel
snd_hda_controller snd_hda_codec snd_hwdep snd_pcm snd_seq_midi
snd_seq_midi_event snd_rawmidi cfg80211 kvm_amd snd_seq kvm edac_core
serio_raw snd_seq_device btusb snd_timer edac_mce_amd snd ipmi_si
ipmi_msghandler k10temp sp5100_tco i2c_piix4 soundcore bnep 8250_fintek
shpchp tpm_infineon rfcomm bluetooth mac_hid parport_pc ppdev lp parport
hid_apple usbhid hid pata_acpi uas usb_storage amdkfd amd_iommu_v2
radeon psmouse pata_atiixp i2c_algo_bit ttm drm_kms_helper drm ahci
libahci r8169 mii [last unloaded: sur40]
CPU: 1 PID: 37 Comm: kworker/1:1 Tainted: P           OE
3.19.0-031900rc6-generic #201501261152
Hardware name: Samsung SUR40/SDNE-R78BA2-20, BIOS SDNE-R78BA2-2000
11/04/2011
Workqueue: events_freezable input_polled_device_work [input_polldev]
00000000000005e0 ffff8801320c3aa8 ffffffff817c4584 0000000000000007
ffff8801320c3af8 ffff8801320c3ae8 ffffffff81076df7 0000000000000000
ffff8800a71fa6c0 ffff88013243f800 0000000000000010 0000000000000002
Call Trace:
[<ffffffff817c4584>] dump_stack+0x45/0x57
[<ffffffff81076df7>] warn_slowpath_common+0x97/0xe0
[<ffffffff81076ef6>] warn_slowpath_fmt+0x46/0x50
[<ffffffff815aff0b>] usb_hcd_map_urb_for_dma+0x4eb/0x500
[<ffffffff817d03b4>] ? schedule_timeout+0x124/0x210
[<ffffffff815b0bd5>] usb_hcd_submit_urb+0x135/0x1c0
[<ffffffff815b20a6>] usb_submit_urb.part.8+0x1f6/0x580
[<ffffffff811bb542>] ? vmap_pud_range+0x122/0x1c0
[<ffffffff815b2465>] usb_submit_urb+0x35/0x80
[<ffffffff815b339a>] usb_start_wait_urb+0x6a/0x170
[<ffffffff815b1cce>] ? usb_alloc_urb+0x1e/0x50
[<ffffffff815b1cce>] ? usb_alloc_urb+0x1e/0x50
[<ffffffff815b3570>] usb_bulk_msg+0xd0/0x1a0
[<ffffffffc059a841>] sur40_poll+0x561/0x5e0 [sur40]
[<ffffffffc016134b>] input_polled_device_work+0x1b/0x30 [input_polldev]
[<ffffffff8108f6dd>] process_one_work+0x14d/0x460
[<ffffffff810900bb>] worker_thread+0x11b/0x3f0
[<ffffffff8108ffa0>] ? create_worker+0x1e0/0x1e0
[<ffffffff81095cc9>] kthread+0xc9/0xe0
[<ffffffff81095c00>] ? flush_kthread_worker+0x90/0x90
[<ffffffff817d17fc>] ret_from_fork+0x7c/0xb0
[<ffffffff81095c00>] ? flush_kthread_worker+0x90/0x90
---[ end trace 30eaf6524fd028d3 ]---

Moreover, I'm getting the following test failure from v4l2-compliance:

Streaming ioctls:
	test read/write: OK
	test MMAP: OK
		fail: v4l2-test-buffers.cpp(951): buf.qbuf(node)
		fail: v4l2-test-buffers.cpp(994): setupUserPtr(node, q)
	test USERPTR: FAIL
	test DMABUF: Cannot test, specify --expbuf-device

Total: 45, Succeeded: 44, Failed: 1, Warnings: 0

Any suggestions how to deal with this?

Best regards, Florian
Florian Echtler Feb. 3, 2015, 8:45 p.m. UTC | #10
Sorry to bring this up again, but would it be acceptable to simply use
dma-contig after all? Since the GFP_DMA flag is gone, this shouldn't be
too big of an issue IMHO, and I was kind of hoping the patch could still
be part of 3.20.

Best, Florian

On 29.01.2015 22:35, Florian Echtler wrote:
> I'm still having a couple of issues sorting out the correct way to
> provide DMA access for my driver. I've integrated most of your
> suggestions, but I still can't switch from dma-contig to dma-sg.
> As far as I understood it, there is no further initialization required
> besides using vb2_dma_sg_memops, vb2_dma_sg_init_ctx and
> vb2_dma_sg_cleanup_ctx instead of the respective -contig- calls, correct?
> However, as soon as I swap the relevant function calls, the video image
> stays black and in dmesg, I get the following warning:
>
> Call Trace:
> [<ffffffff817c4584>] dump_stack+0x45/0x57
> [<ffffffff81076df7>] warn_slowpath_common+0x97/0xe0
> [<ffffffff81076ef6>] warn_slowpath_fmt+0x46/0x50
> [<ffffffff815aff0b>] usb_hcd_map_urb_for_dma+0x4eb/0x500
> [<ffffffff817d03b4>] ? schedule_timeout+0x124/0x210
> [<ffffffff815b0bd5>] usb_hcd_submit_urb+0x135/0x1c0
> [<ffffffff815b20a6>] usb_submit_urb.part.8+0x1f6/0x580
> [<ffffffff811bb542>] ? vmap_pud_range+0x122/0x1c0
> [<ffffffff815b2465>] usb_submit_urb+0x35/0x80
> [<ffffffff815b339a>] usb_start_wait_urb+0x6a/0x170
> [<ffffffff815b1cce>] ? usb_alloc_urb+0x1e/0x50
> [<ffffffff815b1cce>] ? usb_alloc_urb+0x1e/0x50
> [<ffffffff815b3570>] usb_bulk_msg+0xd0/0x1a0
> [<ffffffffc059a841>] sur40_poll+0x561/0x5e0 [sur40]
>
> Moreover, I'm getting the following test failure from v4l2-compliance:
> 
> Streaming ioctls:
> 	test read/write: OK
> 	test MMAP: OK
> 		fail: v4l2-test-buffers.cpp(951): buf.qbuf(node)
> 		fail: v4l2-test-buffers.cpp(994): setupUserPtr(node, q)
> 	test USERPTR: FAIL
> 	test DMABUF: Cannot test, specify --expbuf-device
> 
> Total: 45, Succeeded: 44, Failed: 1, Warnings: 0
Hans Verkuil Feb. 4, 2015, 8:08 a.m. UTC | #11
Hi Florian,

On 02/03/2015 09:45 PM, Florian Echtler wrote:
> Sorry to bring this up again, but would it be acceptable to simply use
> dma-contig after all? Since the GFP_DMA flag is gone, this shouldn't be
> too big of an issue IMHO, and I was kind of hoping the patch could still
> be part of 3.20.

The window for 3.20 is closed, I'm afraid.

I remain very skeptical about the use of dma-contig (or dma-sg for that
matter). Have you tried using vmalloc and check if the USB core isn't
automatically using DMA transfers for that?

Basically I would like to see proof that vmalloc is not an option before
allowing dma-contig (or dma-sg if you can figure out why that isn't
working).

You can also make a version with vmalloc and I'll merge that, and then
you can look more into the DMA issues. That way the driver is merged,
even if it is perhaps not yet optimal, and you can address that part later.

Regards,

	Hans

> 
> Best, Florian
> 
> On 29.01.2015 22:35, Florian Echtler wrote:
>> I'm still having a couple of issues sorting out the correct way to
>> provide DMA access for my driver. I've integrated most of your
>> suggestions, but I still can't switch from dma-contig to dma-sg.
>> As far as I understood it, there is no further initialization required
>> besides using vb2_dma_sg_memops, vb2_dma_sg_init_ctx and
>> vb2_dma_sg_cleanup_ctx instead of the respective -contig- calls, correct?
>> However, as soon as I swap the relevant function calls, the video image
>> stays black and in dmesg, I get the following warning:
>>
>> Call Trace:
>> [<ffffffff817c4584>] dump_stack+0x45/0x57
>> [<ffffffff81076df7>] warn_slowpath_common+0x97/0xe0
>> [<ffffffff81076ef6>] warn_slowpath_fmt+0x46/0x50
>> [<ffffffff815aff0b>] usb_hcd_map_urb_for_dma+0x4eb/0x500
>> [<ffffffff817d03b4>] ? schedule_timeout+0x124/0x210
>> [<ffffffff815b0bd5>] usb_hcd_submit_urb+0x135/0x1c0
>> [<ffffffff815b20a6>] usb_submit_urb.part.8+0x1f6/0x580
>> [<ffffffff811bb542>] ? vmap_pud_range+0x122/0x1c0
>> [<ffffffff815b2465>] usb_submit_urb+0x35/0x80
>> [<ffffffff815b339a>] usb_start_wait_urb+0x6a/0x170
>> [<ffffffff815b1cce>] ? usb_alloc_urb+0x1e/0x50
>> [<ffffffff815b1cce>] ? usb_alloc_urb+0x1e/0x50
>> [<ffffffff815b3570>] usb_bulk_msg+0xd0/0x1a0
>> [<ffffffffc059a841>] sur40_poll+0x561/0x5e0 [sur40]
>>
>> Moreover, I'm getting the following test failure from v4l2-compliance:
>>
>> Streaming ioctls:
>> 	test read/write: OK
>> 	test MMAP: OK
>> 		fail: v4l2-test-buffers.cpp(951): buf.qbuf(node)
>> 		fail: v4l2-test-buffers.cpp(994): setupUserPtr(node, q)
>> 	test USERPTR: FAIL
>> 	test DMABUF: Cannot test, specify --expbuf-device
>>
>> Total: 45, Succeeded: 44, Failed: 1, Warnings: 0
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-input" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Florian Echtler Feb. 4, 2015, 10:08 a.m. UTC | #12
Hello Hans,

On 04.02.2015 09:08, Hans Verkuil wrote:
> I remain very skeptical about the use of dma-contig (or dma-sg for that
> matter). Have you tried using vmalloc and check if the USB core isn't
> automatically using DMA transfers for that?
> 
> Basically I would like to see proof that vmalloc is not an option before
> allowing dma-contig (or dma-sg if you can figure out why that isn't
> working).
> 
> You can also make a version with vmalloc and I'll merge that, and then
> you can look more into the DMA issues. That way the driver is merged,
> even if it is perhaps not yet optimal, and you can address that part later.
OK, that sounds sensible, I will try that route. When using
videobuf2-vmalloc, what do I pass back for alloc_ctxs in queue_setup?

Best, Florian
Hans Verkuil Feb. 4, 2015, 10:22 a.m. UTC | #13
On 02/04/15 11:08, Florian Echtler wrote:
> Hello Hans,
> 
> On 04.02.2015 09:08, Hans Verkuil wrote:
>> I remain very skeptical about the use of dma-contig (or dma-sg for that
>> matter). Have you tried using vmalloc and check if the USB core isn't
>> automatically using DMA transfers for that?
>>
>> Basically I would like to see proof that vmalloc is not an option before
>> allowing dma-contig (or dma-sg if you can figure out why that isn't
>> working).
>>
>> You can also make a version with vmalloc and I'll merge that, and then
>> you can look more into the DMA issues. That way the driver is merged,
>> even if it is perhaps not yet optimal, and you can address that part later.
> OK, that sounds sensible, I will try that route. When using
> videobuf2-vmalloc, what do I pass back for alloc_ctxs in queue_setup?

vmalloc doesn't need those, so you can just drop any alloc_ctx related code.

Regards,

	Hans

--
To unsubscribe from this list: send the line "unsubscribe linux-input" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Florian Echtler Feb. 4, 2015, 10:56 a.m. UTC | #14
On 04.02.2015 11:22, Hans Verkuil wrote:
> On 02/04/15 11:08, Florian Echtler wrote:
>> On 04.02.2015 09:08, Hans Verkuil wrote:
>>> You can also make a version with vmalloc and I'll merge that, and then
>>> you can look more into the DMA issues. That way the driver is merged,
>>> even if it is perhaps not yet optimal, and you can address that part later.
>> OK, that sounds sensible, I will try that route. When using
>> videobuf2-vmalloc, what do I pass back for alloc_ctxs in queue_setup?
> vmalloc doesn't need those, so you can just drop any alloc_ctx related code.
That's what I assumed, however, I'm running into the same problem as
with dma-sg when I switch to vmalloc...?

I've sent a "proper" patch submission again, which has all the other
issues from the previous submission fixed. I'm hoping you can maybe have
a closer look and see if I'm doing anything subtly wrong which causes
both vmalloc and dma-sg to fail while dma-contig works.

Best, Florian
Laurent Pinchart Feb. 4, 2015, 11:34 a.m. UTC | #15
Hi Florian,

On Wednesday 04 February 2015 11:56:58 Florian Echtler wrote:
> On 04.02.2015 11:22, Hans Verkuil wrote:
> > On 02/04/15 11:08, Florian Echtler wrote:
> >> On 04.02.2015 09:08, Hans Verkuil wrote:
> >>> You can also make a version with vmalloc and I'll merge that, and then
> >>> you can look more into the DMA issues. That way the driver is merged,
> >>> even if it is perhaps not yet optimal, and you can address that part
> >>> later.
> >> 
> >> OK, that sounds sensible, I will try that route. When using
> >> videobuf2-vmalloc, what do I pass back for alloc_ctxs in queue_setup?
> > 
> > vmalloc doesn't need those, so you can just drop any alloc_ctx related
> > code.
>
> That's what I assumed, however, I'm running into the same problem as
> with dma-sg when I switch to vmalloc...?

I don't expect vmalloc to work, as you can't DMA to vmalloc memory directly 
without any IOMMU in the general case (the allocated memory being physically 
fragmented).

dma-sg should work though, but you won't be able to use usb_bulk_msg(). You 
need to create the URBs manually, set their sg and num_sgs fields and submit 
them.

> I've sent a "proper" patch submission again, which has all the other
> issues from the previous submission fixed. I'm hoping you can maybe have
> a closer look and see if I'm doing anything subtly wrong which causes
> both vmalloc and dma-sg to fail while dma-contig works.
Hans Verkuil Feb. 4, 2015, 11:39 a.m. UTC | #16
On 02/04/15 12:34, Laurent Pinchart wrote:
> Hi Florian,
> 
> On Wednesday 04 February 2015 11:56:58 Florian Echtler wrote:
>> On 04.02.2015 11:22, Hans Verkuil wrote:
>>> On 02/04/15 11:08, Florian Echtler wrote:
>>>> On 04.02.2015 09:08, Hans Verkuil wrote:
>>>>> You can also make a version with vmalloc and I'll merge that, and then
>>>>> you can look more into the DMA issues. That way the driver is merged,
>>>>> even if it is perhaps not yet optimal, and you can address that part
>>>>> later.
>>>>
>>>> OK, that sounds sensible, I will try that route. When using
>>>> videobuf2-vmalloc, what do I pass back for alloc_ctxs in queue_setup?
>>>
>>> vmalloc doesn't need those, so you can just drop any alloc_ctx related
>>> code.
>>
>> That's what I assumed, however, I'm running into the same problem as
>> with dma-sg when I switch to vmalloc...?
> 
> I don't expect vmalloc to work, as you can't DMA to vmalloc memory directly 
> without any IOMMU in the general case (the allocated memory being physically 
> fragmented).
> 
> dma-sg should work though, but you won't be able to use usb_bulk_msg(). You 
> need to create the URBs manually, set their sg and num_sgs fields and submit 
> them.

So it works for other usb media drivers because they allocate memory
using kmalloc (and presumably the usb core can DMA to that), and then memcpy
it to the vmalloc-ed buffers?

Anyway Florian, based on Laurent's explanation I think trying to make
dma-sg work seems to be the best solution. And I've learned something
new :-)

Regards,

	Hans

> 
>> I've sent a "proper" patch submission again, which has all the other
>> issues from the previous submission fixed. I'm hoping you can maybe have
>> a closer look and see if I'm doing anything subtly wrong which causes
>> both vmalloc and dma-sg to fail while dma-contig works.
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-input" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Florian Echtler Feb. 4, 2015, 1:21 p.m. UTC | #17
Hello everyone,

On 04.02.2015 12:39, Hans Verkuil wrote:
> On 02/04/15 12:34, Laurent Pinchart wrote:
>> On Wednesday 04 February 2015 11:56:58 Florian Echtler wrote:
>>> That's what I assumed, however, I'm running into the same problem as
>>> with dma-sg when I switch to vmalloc...?
>>
>> I don't expect vmalloc to work, as you can't DMA to vmalloc memory directly 
>> without any IOMMU in the general case (the allocated memory being physically 
>> fragmented).
>>
>> dma-sg should work though, but you won't be able to use usb_bulk_msg(). You 
>> need to create the URBs manually, set their sg and num_sgs fields and submit 
>> them.
Can I also use usb_sg_init/_wait for this? I can't find any other driver
which uses USB in conjunction with dma-sg, can you suggest one I could
use as an example?

> Anyway Florian, based on Laurent's explanation I think trying to make
> dma-sg work seems to be the best solution. And I've learned something
> new :-)
Thanks for the clarification, please ignore the v2 patch submission for
now :-)

Best, Florian
Laurent Pinchart Feb. 4, 2015, 1:51 p.m. UTC | #18
Hi Hans,

On Wednesday 04 February 2015 12:39:30 Hans Verkuil wrote:
> On 02/04/15 12:34, Laurent Pinchart wrote:
> > On Wednesday 04 February 2015 11:56:58 Florian Echtler wrote:
> >> On 04.02.2015 11:22, Hans Verkuil wrote:
> >>> On 02/04/15 11:08, Florian Echtler wrote:
> >>>> On 04.02.2015 09:08, Hans Verkuil wrote:
> >>>>> You can also make a version with vmalloc and I'll merge that, and then
> >>>>> you can look more into the DMA issues. That way the driver is merged,
> >>>>> even if it is perhaps not yet optimal, and you can address that part
> >>>>> later.
> >>>> 
> >>>> OK, that sounds sensible, I will try that route. When using
> >>>> videobuf2-vmalloc, what do I pass back for alloc_ctxs in queue_setup?
> >>> 
> >>> vmalloc doesn't need those, so you can just drop any alloc_ctx related
> >>> code.
> >> 
> >> That's what I assumed, however, I'm running into the same problem as
> >> with dma-sg when I switch to vmalloc...?
> > 
> > I don't expect vmalloc to work, as you can't DMA to vmalloc memory
> > directly without any IOMMU in the general case (the allocated memory being
> > physically fragmented).
> > 
> > dma-sg should work though, but you won't be able to use usb_bulk_msg().
> > You need to create the URBs manually, set their sg and num_sgs fields and
> > submit them.
> 
> So it works for other usb media drivers because they allocate memory
> using kmalloc (and presumably the usb core can DMA to that), and then memcpy
> it to the vmalloc-ed buffers?

Correct. In the uvcvideo case that's unavoidable as headers need to be removed 
from the packets.

> Anyway Florian, based on Laurent's explanation I think trying to make
> dma-sg work seems to be the best solution. And I've learned something
> new :-)
Laurent Pinchart Feb. 4, 2015, 2:06 p.m. UTC | #19
Hi Florian,

On Wednesday 04 February 2015 14:21:47 Florian Echtler wrote:
> On 04.02.2015 12:39, Hans Verkuil wrote:
> > On 02/04/15 12:34, Laurent Pinchart wrote:
> >> On Wednesday 04 February 2015 11:56:58 Florian Echtler wrote:
> >>> That's what I assumed, however, I'm running into the same problem as
> >>> with dma-sg when I switch to vmalloc...?
> >> 
> >> I don't expect vmalloc to work, as you can't DMA to vmalloc memory
> >> directly without any IOMMU in the general case (the allocated memory
> >> being physically fragmented).
> >> 
> >> dma-sg should work though, but you won't be able to use usb_bulk_msg().
> >> You need to create the URBs manually, set their sg and num_sgs fields and
> >> submit them.
> 
> Can I also use usb_sg_init/_wait for this? I can't find any other driver
> which uses USB in conjunction with dma-sg, can you suggest one I could
> use as an example?

usb_sg_init() is an interesting abstraction that transparently splits SG lists 
into several URBs if the USB host controller can't support SG lists directly. 
Unfortunately the API is blocking. It shouldn't be too difficult to add an 
asynchronous option with a complete callback.

> > Anyway Florian, based on Laurent's explanation I think trying to make
> > dma-sg work seems to be the best solution. And I've learned something
> > new :-)
> 
> Thanks for the clarification, please ignore the v2 patch submission for
> now :-)
diff mbox

Patch

diff --git a/drivers/input/touchscreen/sur40.c b/drivers/input/touchscreen/sur40.c
index f1cb051..bcd9ee2 100644
--- a/drivers/input/touchscreen/sur40.c
+++ b/drivers/input/touchscreen/sur40.c
@@ -1,7 +1,7 @@ 
 /*
  * Surface2.0/SUR40/PixelSense input driver
  *
- * Copyright (c) 2013 by Florian 'floe' Echtler <floe@butterbrot.org>
+ * Copyright (c) 2014 by Florian 'floe' Echtler <floe@butterbrot.org>
  *
  * Derived from the USB Skeleton driver 1.1,
  * Copyright (c) 2003 Greg Kroah-Hartman (greg@kroah.com)
@@ -12,6 +12,9 @@ 
  * and from the generic hid-multitouch driver,
  * Copyright (c) 2010-2012 Stephane Chatty <chatty@enac.fr>
  *
+ * and from the v4l2-pci-skeleton driver,
+ * Copyright (c) Copyright 2014 Cisco Systems, Inc.
+ *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation; either version 2 of
@@ -31,6 +34,11 @@ 
 #include <linux/input-polldev.h>
 #include <linux/input/mt.h>
 #include <linux/usb/input.h>
+#include <linux/videodev2.h>
+#include <media/v4l2-device.h>
+#include <media/v4l2-dev.h>
+#include <media/v4l2-ioctl.h>
+#include <media/videobuf2-dma-contig.h>
 
 /* read 512 bytes from endpoint 0x86 -> get header + blobs */
 struct sur40_header {
@@ -82,9 +90,19 @@  struct sur40_data {
 	struct sur40_blob   blobs[];
 } __packed;
 
+/* read 512 bytes from endpoint 0x82 -> get header below
+ * continue reading 16k blocks until header.size bytes read */
+struct sur40_image_header {
+	__le32 magic;     /* "SUBF" */
+	__le32 packet_id;
+	__le32 size;      /* always 0x0007e900 = 960x540 */
+	__le32 timestamp; /* milliseconds (increases by 16 or 17 each frame) */
+	__le32 unknown;   /* "epoch?" always 02/03 00 00 00 */
+} __packed;
 
 /* version information */
 #define DRIVER_SHORT   "sur40"
+#define DRIVER_LONG    "Samsung SUR40"
 #define DRIVER_AUTHOR  "Florian 'floe' Echtler <floe@butterbrot.org>"
 #define DRIVER_DESC    "Surface2.0/SUR40/PixelSense input driver"
 
@@ -99,6 +117,13 @@  struct sur40_data {
 /* touch data endpoint */
 #define TOUCH_ENDPOINT 0x86
 
+/* video data endpoint */
+#define VIDEO_ENDPOINT 0x82
+
+/* video header fields */
+#define VIDEO_HEADER_MAGIC 0x46425553
+#define VIDEO_PACKET_SIZE  16384
+
 /* polling interval (ms) */
 #define POLL_INTERVAL 10
 
@@ -113,21 +138,23 @@  struct sur40_data {
 #define SUR40_GET_STATE   0xc5 /*  4 bytes state (?) */
 #define SUR40_GET_SENSORS 0xb1 /*  8 bytes sensors   */
 
-/*
- * Note: an earlier, non-public version of this driver used USB_RECIP_ENDPOINT
- * here by mistake which is very likely to have corrupted the firmware EEPROM
- * on two separate SUR40 devices. Thanks to Alan Stern who spotted this bug.
- * Should you ever run into a similar problem, the background story to this
- * incident and instructions on how to fix the corrupted EEPROM are available
- * at https://floe.butterbrot.org/matrix/hacking/surface/brick.html
-*/
-
+/* master device state */
 struct sur40_state {
 
 	struct usb_device *usbdev;
 	struct device *dev;
 	struct input_polled_dev *input;
 
+	struct v4l2_device v4l2;
+	struct video_device vdev;
+	struct mutex lock;
+
+	struct vb2_queue queue;
+	struct vb2_alloc_ctx *alloc_ctx;
+	struct list_head buf_list;
+	spinlock_t qlock;
+	int sequence;
+
 	struct sur40_data *bulk_in_buffer;
 	size_t bulk_in_size;
 	u8 bulk_in_epaddr;
@@ -135,6 +162,27 @@  struct sur40_state {
 	char phys[64];
 };
 
+struct sur40_buffer {
+	struct vb2_buffer vb;
+	struct list_head list;
+};
+
+/* forward declarations */
+static struct video_device sur40_video_device;
+static struct v4l2_pix_format sur40_video_format;
+static struct vb2_queue sur40_queue;
+static void sur40_process_video(struct sur40_state *sur40);
+
+/*
+ * Note: an earlier, non-public version of this driver used USB_RECIP_ENDPOINT
+ * here by mistake which is very likely to have corrupted the firmware EEPROM
+ * on two separate SUR40 devices. Thanks to Alan Stern who spotted this bug.
+ * Should you ever run into a similar problem, the background story to this
+ * incident and instructions on how to fix the corrupted EEPROM are available
+ * at https://floe.butterbrot.org/matrix/hacking/surface/brick.html
+*/
+
+/* command wrapper */
 static int sur40_command(struct sur40_state *dev,
 			 u8 command, u16 index, void *buffer, u16 size)
 {
@@ -247,7 +295,6 @@  static void sur40_report_blob(struct sur40_blob *blob, struct input_dev *input)
 /* core function: poll for new input data */
 static void sur40_poll(struct input_polled_dev *polldev)
 {
-
 	struct sur40_state *sur40 = polldev->private;
 	struct input_dev *input = polldev->input;
 	int result, bulk_read, need_blobs, packet_blobs, i;
@@ -314,6 +361,79 @@  static void sur40_poll(struct input_polled_dev *polldev)
 
 	input_mt_sync_frame(input);
 	input_sync(input);
+
+	sur40_process_video(sur40);
+}
+
+/* deal with video data */
+static void sur40_process_video(struct sur40_state *sur40)
+{
+
+	struct sur40_image_header *img = (void *)(sur40->bulk_in_buffer);
+	int result, bulk_read, bufpos;
+	struct sur40_buffer *new_buf;
+	uint8_t *buffer;
+
+	if (list_empty(&sur40->buf_list))
+		return;
+
+	/* get a new buffer from the list */
+	spin_lock(&sur40->qlock);
+	new_buf = list_entry(sur40->buf_list.next, struct sur40_buffer, list);
+	list_del(&new_buf->list);
+	spin_unlock(&sur40->qlock);
+
+	/* retrieve data via bulk read */
+	bufpos = 0;
+
+	result = usb_bulk_msg(sur40->usbdev,
+			usb_rcvbulkpipe(sur40->usbdev, VIDEO_ENDPOINT),
+			sur40->bulk_in_buffer, sur40->bulk_in_size,
+			&bulk_read, 1000);
+
+	if (result < 0) {
+		dev_err(sur40->dev, "error in usb_bulk_read\n");
+		goto err_poll;
+	}
+
+	if (bulk_read != sizeof(struct sur40_image_header)) {
+		dev_err(sur40->dev, "received %d bytes (%ld expected)\n",
+			bulk_read, sizeof(struct sur40_image_header));
+		goto err_poll;
+	}
+
+	if (le32_to_cpu(img->magic) != VIDEO_HEADER_MAGIC) {
+		dev_err(sur40->dev, "image magic mismatch\n");
+		goto err_poll;
+	}
+
+	if (le32_to_cpu(img->size) != sur40_video_format.sizeimage) {
+		dev_err(sur40->dev, "image size mismatch\n");
+		goto err_poll;
+	}
+
+	buffer = vb2_plane_vaddr(&new_buf->vb, 0);
+	while (bufpos < sur40_video_format.sizeimage) {
+		result = usb_bulk_msg(sur40->usbdev,
+			usb_rcvbulkpipe(sur40->usbdev, VIDEO_ENDPOINT),
+			buffer+bufpos, VIDEO_PACKET_SIZE,
+			&bulk_read, 1000);
+		if (result < 0) {
+			dev_err(sur40->dev, "error in usb_bulk_read\n");
+			goto err_poll;
+		}
+		bufpos += bulk_read;
+	}
+
+	/* mark as finished */
+	v4l2_get_timestamp(&new_buf->vb.v4l2_buf.timestamp);
+	new_buf->vb.v4l2_buf.sequence = sur40->sequence++;
+	new_buf->vb.v4l2_buf.field = V4L2_FIELD_NONE;
+	vb2_buffer_done(&new_buf->vb, VB2_BUF_STATE_DONE);
+	return;
+
+err_poll:
+	vb2_buffer_done(&new_buf->vb, VB2_BUF_STATE_ERROR);
 }
 
 /* Initialize input device parameters. */
@@ -377,6 +497,11 @@  static int sur40_probe(struct usb_interface *interface,
 		goto err_free_dev;
 	}
 
+	/* initialize locks/lists */
+	INIT_LIST_HEAD(&sur40->buf_list);
+	spin_lock_init(&sur40->qlock);
+	mutex_init(&sur40->lock);
+
 	/* Set up polled input device control structure */
 	poll_dev->private = sur40;
 	poll_dev->poll_interval = POLL_INTERVAL;
@@ -387,7 +512,7 @@  static int sur40_probe(struct usb_interface *interface,
 	/* Set up regular input device structure */
 	sur40_input_setup(poll_dev->input);
 
-	poll_dev->input->name = "Samsung SUR40";
+	poll_dev->input->name = DRIVER_LONG;
 	usb_to_input_id(usbdev, &poll_dev->input->id);
 	usb_make_path(usbdev, sur40->phys, sizeof(sur40->phys));
 	strlcat(sur40->phys, "/input0", sizeof(sur40->phys));
@@ -408,6 +533,7 @@  static int sur40_probe(struct usb_interface *interface,
 		goto err_free_polldev;
 	}
 
+	/* register the polled input device */
 	error = input_register_polled_device(poll_dev);
 	if (error) {
 		dev_err(&interface->dev,
@@ -415,12 +541,54 @@  static int sur40_probe(struct usb_interface *interface,
 		goto err_free_buffer;
 	}
 
+	/* register the video master device */
+	snprintf(sur40->v4l2.name, sizeof(sur40->v4l2.name), "%s", DRIVER_LONG);
+	error = v4l2_device_register(sur40->dev, &sur40->v4l2);
+	if (error) {
+		dev_err(&interface->dev,
+			"Unable to register video master device.");
+		goto err_unreg_v4l2;
+	}
+
+	/* initialize the lock and subdevice */
+	sur40->queue = sur40_queue;
+	sur40->queue.drv_priv = sur40;
+	sur40->queue.lock = &sur40->lock;
+
+	/* initialize the queue */
+	error = vb2_queue_init(&sur40->queue);
+	if (error)
+		goto err_unreg_v4l2;
+
+	sur40->alloc_ctx = vb2_dma_contig_init_ctx(sur40->dev);
+	if (IS_ERR(sur40->alloc_ctx)) {
+		dev_err(sur40->dev, "Can't allocate buffer context");
+		goto err_unreg_v4l2;
+	}
+
+	sur40->vdev = sur40_video_device;
+	sur40->vdev.v4l2_dev = &sur40->v4l2;
+	sur40->vdev.lock = &sur40->lock;
+	sur40->vdev.queue = &sur40->queue;
+	video_set_drvdata(&sur40->vdev, sur40);
+
+	error = video_register_device(&sur40->vdev, VFL_TYPE_GRABBER, -1);
+	if (error) {
+		dev_err(&interface->dev,
+			"Unable to register video subdevice.");
+		goto err_unreg_video;
+	}
+
 	/* we can register the device now, as it is ready */
 	usb_set_intfdata(interface, sur40);
 	dev_dbg(&interface->dev, "%s is now attached\n", DRIVER_DESC);
 
 	return 0;
 
+err_unreg_video:
+	video_unregister_device(&sur40->vdev);
+err_unreg_v4l2:
+	v4l2_device_unregister(&sur40->v4l2);
 err_free_buffer:
 	kfree(sur40->bulk_in_buffer);
 err_free_polldev:
@@ -436,6 +604,10 @@  static void sur40_disconnect(struct usb_interface *interface)
 {
 	struct sur40_state *sur40 = usb_get_intfdata(interface);
 
+	v4l2_device_unregister(&sur40->v4l2);
+	video_unregister_device(&sur40->vdev);
+	vb2_dma_contig_cleanup_ctx(sur40->alloc_ctx);
+
 	input_unregister_polled_device(sur40->input);
 	input_free_polled_device(sur40->input);
 	kfree(sur40->bulk_in_buffer);
@@ -445,12 +617,239 @@  static void sur40_disconnect(struct usb_interface *interface)
 	dev_dbg(&interface->dev, "%s is now disconnected\n", DRIVER_DESC);
 }
 
+/*
+ * Setup the constraints of the queue: besides setting the number of planes
+ * per buffer and the size and allocation context of each plane, it also
+ * checks if sufficient buffers have been allocated. Usually 3 is a good
+ * minimum number: many DMA engines need a minimum of 2 buffers in the
+ * queue and you need to have another available for userspace processing.
+ */
+static int sur40_queue_setup(struct vb2_queue *q, const struct v4l2_format *fmt,
+		       unsigned int *nbuffers, unsigned int *nplanes,
+		       unsigned int sizes[], void *alloc_ctxs[])
+{
+	struct sur40_state *sur40 = vb2_get_drv_priv(q);
+
+	if (q->num_buffers + *nbuffers < 3)
+		*nbuffers = 3 - q->num_buffers;
+
+	if (fmt && fmt->fmt.pix.sizeimage < sur40_video_format.sizeimage)
+		return -EINVAL;
+
+	*nplanes = 1;
+	sizes[0] = fmt ? fmt->fmt.pix.sizeimage : sur40_video_format.sizeimage;
+	alloc_ctxs[0] = sur40->alloc_ctx;
+
+	return 0;
+}
+
+/*
+ * Prepare the buffer for queueing to the DMA engine: check and set the
+ * payload size.
+ */
+static int sur40_buffer_prepare(struct vb2_buffer *vb)
+{
+	struct sur40_state *sur40 = vb2_get_drv_priv(vb->vb2_queue);
+	unsigned long size = sur40_video_format.sizeimage;
+
+	if (vb2_plane_size(vb, 0) < size) {
+		dev_err(&sur40->usbdev->dev, "buffer too small (%lu < %lu)\n",
+			 vb2_plane_size(vb, 0), size);
+		return -EINVAL;
+	}
+
+	vb2_set_plane_payload(vb, 0, size);
+	return 0;
+}
+
+/*
+ * Queue this buffer to the DMA engine.
+ */
+static void sur40_buffer_queue(struct vb2_buffer *vb)
+{
+	struct sur40_state *sur40 = vb2_get_drv_priv(vb->vb2_queue);
+	struct sur40_buffer *buf = (struct sur40_buffer *)vb;
+
+	spin_lock(&sur40->qlock);
+	list_add_tail(&buf->list, &sur40->buf_list);
+	spin_unlock(&sur40->qlock);
+}
+
+static void return_all_buffers(struct sur40_state *sur40,
+			       enum vb2_buffer_state state)
+{
+	struct sur40_buffer *buf, *node;
+
+	spin_lock(&sur40->qlock);
+	list_for_each_entry_safe(buf, node, &sur40->buf_list, list) {
+		vb2_buffer_done(&buf->vb, state);
+		list_del(&buf->list);
+	}
+	spin_unlock(&sur40->qlock);
+}
+
+/*
+ * Start streaming. First check if the minimum number of buffers have been
+ * queued. If not, then return -ENOBUFS and the vb2 framework will call
+ * this function again the next time a buffer has been queued until enough
+ * buffers are available to actually start the DMA engine.
+ */
+static int sur40_start_streaming(struct vb2_queue *vq, unsigned int count)
+{
+	struct sur40_state *sur40 = vb2_get_drv_priv(vq);
+
+	sur40->sequence = 0;
+	return 0;
+}
+
+/*
+ * Stop the DMA engine. Any remaining buffers in the DMA queue are dequeued
+ * and passed on to the vb2 framework marked as STATE_ERROR.
+ */
+static void sur40_stop_streaming(struct vb2_queue *vq)
+{
+	struct sur40_state *sur40 = vb2_get_drv_priv(vq);
+	/* Release all active buffers */
+	return_all_buffers(sur40, VB2_BUF_STATE_ERROR);
+}
+
+/* V4L ioctl */
+static int sur40_vidioc_querycap(struct file *file, void *priv,
+				 struct v4l2_capability *cap)
+{
+	struct sur40_state *sur40 = video_drvdata(file);
+
+	strlcpy(cap->driver, DRIVER_SHORT, sizeof(cap->driver));
+	strlcpy(cap->card, DRIVER_LONG, sizeof(cap->card));
+	usb_make_path(sur40->usbdev, cap->bus_info, sizeof(cap->bus_info));
+	cap->device_caps = V4L2_CAP_VIDEO_CAPTURE |
+		V4L2_CAP_READWRITE |
+		V4L2_CAP_STREAMING;
+	cap->capabilities = cap->device_caps | V4L2_CAP_DEVICE_CAPS;
+	return 0;
+}
+
+static int sur40_vidioc_enum_input(struct file *file, void *priv,
+				   struct v4l2_input *i)
+{
+	if (i->index != 0)
+		return -EINVAL;
+	i->type = V4L2_INPUT_TYPE_CAMERA;
+	i->std = V4L2_STD_UNKNOWN;
+	strlcpy(i->name, "In-Cell Sensor", sizeof(i->name));
+	i->capabilities = 0;
+	return 0;
+}
+
+static int sur40_vidioc_s_input(struct file *file, void *priv, unsigned int i)
+{
+	return (i == 0) ? 0 : -EINVAL;
+}
+
+static int sur40_vidioc_g_input(struct file *file, void *priv, unsigned int *i)
+{
+	*i = 0;
+	return 0;
+}
+
+static int sur40_vidioc_fmt(struct file *file, void *priv,
+			    struct v4l2_format *f)
+{
+	f->fmt.pix = sur40_video_format;
+	return 0;
+}
+
+static int sur40_vidioc_enum_fmt(struct file *file, void *priv,
+				 struct v4l2_fmtdesc *f)
+{
+	if (f->index != 0)
+		return -EINVAL;
+	strlcpy(f->description, "8-bit greyscale", sizeof(f->description));
+	f->pixelformat = V4L2_PIX_FMT_GREY;
+	f->flags = 0;
+	return 0;
+}
+
 static const struct usb_device_id sur40_table[] = {
 	{ USB_DEVICE(ID_MICROSOFT, ID_SUR40) },  /* Samsung SUR40 */
 	{ }                                      /* terminating null entry */
 };
 MODULE_DEVICE_TABLE(usb, sur40_table);
 
+/* V4L2 structures */
+static struct vb2_ops sur40_queue_ops = {
+	.queue_setup		= sur40_queue_setup,
+	.buf_prepare		= sur40_buffer_prepare,
+	.buf_queue		= sur40_buffer_queue,
+	.start_streaming	= sur40_start_streaming,
+	.stop_streaming		= sur40_stop_streaming,
+	.wait_prepare		= vb2_ops_wait_prepare,
+	.wait_finish		= vb2_ops_wait_finish,
+};
+
+static struct vb2_queue sur40_queue = {
+	.type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
+	.io_modes = VB2_MMAP | VB2_READ,
+	.buf_struct_size = sizeof(struct sur40_buffer),
+	.ops = &sur40_queue_ops,
+	.mem_ops = &vb2_dma_contig_memops,
+	.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC,
+	.min_buffers_needed = 3,
+	.gfp_flags = GFP_DMA,
+};
+
+static const struct v4l2_file_operations sur40_video_fops = {
+	.owner = THIS_MODULE,
+	.open = v4l2_fh_open,
+	.release = vb2_fop_release,
+	.unlocked_ioctl = video_ioctl2,
+	.read = vb2_fop_read,
+	.mmap = vb2_fop_mmap,
+	.poll = vb2_fop_poll,
+};
+
+static const struct v4l2_ioctl_ops sur40_video_ioctl_ops = {
+
+	.vidioc_querycap	= sur40_vidioc_querycap,
+
+	.vidioc_enum_fmt_vid_cap = sur40_vidioc_enum_fmt,
+	.vidioc_try_fmt_vid_cap	= sur40_vidioc_fmt,
+	.vidioc_s_fmt_vid_cap	= sur40_vidioc_fmt,
+	.vidioc_g_fmt_vid_cap	= sur40_vidioc_fmt,
+
+	.vidioc_enum_input	= sur40_vidioc_enum_input,
+	.vidioc_g_input		= sur40_vidioc_g_input,
+	.vidioc_s_input		= sur40_vidioc_s_input,
+
+	.vidioc_reqbufs		= vb2_ioctl_reqbufs,
+	.vidioc_create_bufs	= vb2_ioctl_create_bufs,
+	.vidioc_querybuf	= vb2_ioctl_querybuf,
+	.vidioc_qbuf		= vb2_ioctl_qbuf,
+	.vidioc_dqbuf		= vb2_ioctl_dqbuf,
+	.vidioc_expbuf		= vb2_ioctl_expbuf,
+
+	.vidioc_streamon	= vb2_ioctl_streamon,
+	.vidioc_streamoff	= vb2_ioctl_streamoff,
+};
+
+static struct video_device sur40_video_device = {
+	.name = DRIVER_LONG,
+	.fops = &sur40_video_fops,
+	.ioctl_ops = &sur40_video_ioctl_ops,
+	.release = video_device_release_empty,
+};
+
+static struct v4l2_pix_format sur40_video_format = {
+	.pixelformat = V4L2_PIX_FMT_GREY,
+	.width  = SENSOR_RES_X / 2,
+	.height = SENSOR_RES_Y / 2,
+	.field = V4L2_FIELD_NONE,
+	.colorspace = V4L2_COLORSPACE_SRGB,
+	.bytesperline = SENSOR_RES_X / 2,
+	.sizeimage = (SENSOR_RES_X/2) * (SENSOR_RES_Y/2),
+	.priv = 0,
+};
+
 /* USB-specific object needed to register this driver with the USB subsystem. */
 static struct usb_driver sur40_driver = {
 	.name = DRIVER_SHORT,