diff mbox series

[RFC,2/9] loop: add llseek(SEEK_HOLE/SEEK_DATA) support

Message ID 20240328203910.2370087-3-stefanha@redhat.com (mailing list archive)
State New, archived
Headers show
Series block: add llseek(SEEK_HOLE/SEEK_DATA) support | expand

Commit Message

Stefan Hajnoczi March 28, 2024, 8:39 p.m. UTC
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
Open issues:
- The file offset is updated on both the blkdev file and the backing
  file. Is there a way to avoid updating the backing file offset so the
  file opened by userspace is not affected?
- Should this run in the worker or use the cgroups?
---
 drivers/block/loop.c | 36 ++++++++++++++++++++++++++++++------
 1 file changed, 30 insertions(+), 6 deletions(-)

Comments

Eric Blake March 29, 2024, midnight UTC | #1
On Thu, Mar 28, 2024 at 04:39:03PM -0400, Stefan Hajnoczi wrote:
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
> Open issues:
> - The file offset is updated on both the blkdev file and the backing
>   file. Is there a way to avoid updating the backing file offset so the
>   file opened by userspace is not affected?
> - Should this run in the worker or use the cgroups?
> ---
>  drivers/block/loop.c | 36 ++++++++++++++++++++++++++++++------
>  1 file changed, 30 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/block/loop.c b/drivers/block/loop.c
> index 28a95fd366fea..6a89375de82e8 100644
> --- a/drivers/block/loop.c
> +++ b/drivers/block/loop.c
> @@ -750,6 +750,29 @@ static void loop_sysfs_exit(struct loop_device *lo)
>  				   &loop_attribute_group);
>  }
>  
> +static loff_t lo_seek_hole_data(struct block_device *bdev, loff_t offset,
> +		int whence)
> +{
> +	/* TODO need to activate cgroups or use worker? */
> +	/* TODO locking? */
> +	struct loop_device *lo = bdev->bd_disk->private_data;
> +	struct file *file = lo->lo_backing_file;
> +
> +	if (lo->lo_offset > 0)
> +		offset += lo->lo_offset; /* TODO underflow/overflow? */
> +
> +	/* TODO backing file offset is modified! */
> +	offset = vfs_llseek(file, offset, whence);

Not only did you modify the underlying offset...

> +	if (offset < 0)
> +		return offset;
> +
> +	if (lo->lo_offset > 0)
> +		offset -= lo->lo_offset; /* TODO underflow/overflow? */
> +	if (lo->lo_sizelimit > 0 && offset > lo->lo_sizelimit)
> +		offset = lo->lo_sizelimit;

...but if this code kicks in, you have clamped the return result to
EOF of the loop device while leaving the underlying offset beyond the
limit, which may mess up assumptions of other code expecting the loop
to always have an in-bounds offset for the underlying file (offhand, I
don't know if there is any such code; but since loop_ctl_fops.llseek =
noop_lseek, there may be code relying on an even-tighter restriction
that the offset of the underlying file never changes, not even within
bounds).

Furthermore, this is inconsistent with all other seek-beyond-end code
that fails with -ENXIO instead of returning size.

But for an RFC, the idea of being able to seek to HOLEs in a loop
device is awesome!

> @@ -2140,7 +2164,7 @@ static int loop_control_remove(int idx)
>  		pr_warn_once("deleting an unspecified loop device is not supported.\n");
>  		return -EINVAL;
>  	}
> -		
> +
>  	/* Hide this loop device for serialization. */
>  	ret = mutex_lock_killable(&loop_ctl_mutex);
>  	if (ret)

Unrelated whitespace change?
Stefan Hajnoczi March 29, 2024, 12:54 p.m. UTC | #2
On Thu, Mar 28, 2024 at 07:00:26PM -0500, Eric Blake wrote:
> On Thu, Mar 28, 2024 at 04:39:03PM -0400, Stefan Hajnoczi wrote:
> > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > ---
> > Open issues:
> > - The file offset is updated on both the blkdev file and the backing
> >   file. Is there a way to avoid updating the backing file offset so the
> >   file opened by userspace is not affected?
> > - Should this run in the worker or use the cgroups?
> > ---
> >  drivers/block/loop.c | 36 ++++++++++++++++++++++++++++++------
> >  1 file changed, 30 insertions(+), 6 deletions(-)
> > 
> > diff --git a/drivers/block/loop.c b/drivers/block/loop.c
> > index 28a95fd366fea..6a89375de82e8 100644
> > --- a/drivers/block/loop.c
> > +++ b/drivers/block/loop.c
> > @@ -750,6 +750,29 @@ static void loop_sysfs_exit(struct loop_device *lo)
> >  				   &loop_attribute_group);
> >  }
> >  
> > +static loff_t lo_seek_hole_data(struct block_device *bdev, loff_t offset,
> > +		int whence)
> > +{
> > +	/* TODO need to activate cgroups or use worker? */
> > +	/* TODO locking? */
> > +	struct loop_device *lo = bdev->bd_disk->private_data;
> > +	struct file *file = lo->lo_backing_file;
> > +
> > +	if (lo->lo_offset > 0)
> > +		offset += lo->lo_offset; /* TODO underflow/overflow? */
> > +
> > +	/* TODO backing file offset is modified! */
> > +	offset = vfs_llseek(file, offset, whence);
> 
> Not only did you modify the underlying offset...
> 
> > +	if (offset < 0)
> > +		return offset;
> > +
> > +	if (lo->lo_offset > 0)
> > +		offset -= lo->lo_offset; /* TODO underflow/overflow? */
> > +	if (lo->lo_sizelimit > 0 && offset > lo->lo_sizelimit)
> > +		offset = lo->lo_sizelimit;
> 
> ...but if this code kicks in, you have clamped the return result to
> EOF of the loop device while leaving the underlying offset beyond the
> limit, which may mess up assumptions of other code expecting the loop
> to always have an in-bounds offset for the underlying file (offhand, I
> don't know if there is any such code; but since loop_ctl_fops.llseek =
> noop_lseek, there may be code relying on an even-tighter restriction
> that the offset of the underlying file never changes, not even within
> bounds).

I don't think anything relies on the file offset. Requests coming from
the block device contain their own offset (which may have been based on
the block device file's offset, but not the backing file's offset).

> Furthermore, this is inconsistent with all other seek-beyond-end code
> that fails with -ENXIO instead of returning size.

You're right, in the SEEK_DATA case the return value should be -ENXIO.
The SEEK_HOLE case is correct with lo_sizelimit. There is also an
off-by-one error in the comparison.

It should be:

  if (lo->lo_sizelimit > 0 && offset >= lo->lo_sizelimit) {
  	if (whence == SEEK_DATA)
  		offset = -ENXIO;
  	else
  		offset = lo->lo_sizelimit;
  }

> But for an RFC, the idea of being able to seek to HOLEs in a loop
> device is awesome!
> 
> > @@ -2140,7 +2164,7 @@ static int loop_control_remove(int idx)
> >  		pr_warn_once("deleting an unspecified loop device is not supported.\n");
> >  		return -EINVAL;
> >  	}
> > -		
> > +
> >  	/* Hide this loop device for serialization. */
> >  	ret = mutex_lock_killable(&loop_ctl_mutex);
> >  	if (ret)
> 
> Unrelated whitespace change?

Yes, I'll drop it.

Stefan
diff mbox series

Patch

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 28a95fd366fea..6a89375de82e8 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -750,6 +750,29 @@  static void loop_sysfs_exit(struct loop_device *lo)
 				   &loop_attribute_group);
 }
 
+static loff_t lo_seek_hole_data(struct block_device *bdev, loff_t offset,
+		int whence)
+{
+	/* TODO need to activate cgroups or use worker? */
+	/* TODO locking? */
+	struct loop_device *lo = bdev->bd_disk->private_data;
+	struct file *file = lo->lo_backing_file;
+
+	if (lo->lo_offset > 0)
+		offset += lo->lo_offset; /* TODO underflow/overflow? */
+
+	/* TODO backing file offset is modified! */
+	offset = vfs_llseek(file, offset, whence);
+	if (offset < 0)
+		return offset;
+
+	if (lo->lo_offset > 0)
+		offset -= lo->lo_offset; /* TODO underflow/overflow? */
+	if (lo->lo_sizelimit > 0 && offset > lo->lo_sizelimit)
+		offset = lo->lo_sizelimit;
+	return offset;
+}
+
 static void loop_config_discard(struct loop_device *lo,
 		struct queue_limits *lim)
 {
@@ -1751,13 +1774,14 @@  static void lo_free_disk(struct gendisk *disk)
 }
 
 static const struct block_device_operations lo_fops = {
-	.owner =	THIS_MODULE,
-	.release =	lo_release,
-	.ioctl =	lo_ioctl,
+	.owner =		THIS_MODULE,
+	.release =		lo_release,
+	.ioctl =		lo_ioctl,
 #ifdef CONFIG_COMPAT
-	.compat_ioctl =	lo_compat_ioctl,
+	.compat_ioctl =		lo_compat_ioctl,
 #endif
-	.free_disk =	lo_free_disk,
+	.free_disk =		lo_free_disk,
+	.seek_hole_data =	lo_seek_hole_data,
 };
 
 /*
@@ -2140,7 +2164,7 @@  static int loop_control_remove(int idx)
 		pr_warn_once("deleting an unspecified loop device is not supported.\n");
 		return -EINVAL;
 	}
-		
+
 	/* Hide this loop device for serialization. */
 	ret = mutex_lock_killable(&loop_ctl_mutex);
 	if (ret)