diff mbox series

[4/7] block: Introduce a new ioctl for simple copy

Message ID 20210817101423.12367-5-selvakuma.s1@samsung.com (mailing list archive)
State New, archived
Headers show
Series [1/7] block: make bio_map_kern() non static | expand

Commit Message

SelvaKumar S Aug. 17, 2021, 10:14 a.m. UTC
From: Nitesh Shetty <nj.shetty@samsung.com>

Add new BLKCOPY ioctl that offloads copying of one or more sources ranges
to a destination in the device. COPY ioctl accepts a 'copy_range'
structure that contains destination (in sectors), no of sources and
pointer to the array of source ranges. Each source range is represented by
'range_entry' that contains start and length of source ranges (in sectors)

MAX_COPY_NR_RANGE, limits the number of entries for the IOCTL and
MAX_COPY_TOTAL_LENGTH limits the total copy length, IOCTL can handle.

Example code, to issue BLKCOPY:
/* Sample example to copy three source-ranges [0, 8] [16, 8] [32,8] to
 * [64,24], on the same device */

int main(void)
{
	int ret, fd;
	struct range_entry source_range[] = {{.src = 0, .len = 8},
		{.src = 16, .len = 8}, {.src = 32, .len = 8},};
	struct copy_range cr;

	cr.dest = 64;
	cr.nr_range = 3;
	cr.range_list = (__u64)&source_range;

	fd = open("/dev/nvme0n1", O_RDWR);
	if (fd < 0) return 1;

	ret = ioctl(fd, BLKCOPY, &cr);
	if (ret < 0) printf("copy failure\n");

	close(fd);

	return ret;
}

Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
Signed-off-by: SelvaKumar S <selvakuma.s1@samsung.com>
Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
---
 block/ioctl.c           | 33 +++++++++++++++++++++++++++++++++
 include/uapi/linux/fs.h |  8 ++++++++
 2 files changed, 41 insertions(+)

Comments

Greg Kroah-Hartman Aug. 17, 2021, 1:09 p.m. UTC | #1
On Tue, Aug 17, 2021 at 03:44:20PM +0530, SelvaKumar S wrote:
> From: Nitesh Shetty <nj.shetty@samsung.com>
> 
> Add new BLKCOPY ioctl that offloads copying of one or more sources ranges
> to a destination in the device. COPY ioctl accepts a 'copy_range'
> structure that contains destination (in sectors), no of sources and
> pointer to the array of source ranges. Each source range is represented by
> 'range_entry' that contains start and length of source ranges (in sectors)
> 
> MAX_COPY_NR_RANGE, limits the number of entries for the IOCTL and
> MAX_COPY_TOTAL_LENGTH limits the total copy length, IOCTL can handle.
> 
> Example code, to issue BLKCOPY:
> /* Sample example to copy three source-ranges [0, 8] [16, 8] [32,8] to
>  * [64,24], on the same device */
> 
> int main(void)
> {
> 	int ret, fd;
> 	struct range_entry source_range[] = {{.src = 0, .len = 8},
> 		{.src = 16, .len = 8}, {.src = 32, .len = 8},};
> 	struct copy_range cr;
> 
> 	cr.dest = 64;
> 	cr.nr_range = 3;
> 	cr.range_list = (__u64)&source_range;
> 
> 	fd = open("/dev/nvme0n1", O_RDWR);
> 	if (fd < 0) return 1;
> 
> 	ret = ioctl(fd, BLKCOPY, &cr);
> 	if (ret < 0) printf("copy failure\n");
> 
> 	close(fd);
> 
> 	return ret;
> }
> 
> Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
> Signed-off-by: SelvaKumar S <selvakuma.s1@samsung.com>
> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
> ---
>  block/ioctl.c           | 33 +++++++++++++++++++++++++++++++++
>  include/uapi/linux/fs.h |  8 ++++++++
>  2 files changed, 41 insertions(+)
> 
> diff --git a/block/ioctl.c b/block/ioctl.c
> index eb0491e90b9a..2af56d01e9fe 100644
> --- a/block/ioctl.c
> +++ b/block/ioctl.c
> @@ -143,6 +143,37 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
>  				    GFP_KERNEL, flags);
>  }
>  
> +static int blk_ioctl_copy(struct block_device *bdev, fmode_t mode,
> +		unsigned long arg)
> +{
> +	struct copy_range crange;
> +	struct range_entry *rlist;
> +	int ret;
> +
> +	if (!(mode & FMODE_WRITE))
> +		return -EBADF;
> +
> +	if (copy_from_user(&crange, (void __user *)arg, sizeof(crange)))
> +		return -EFAULT;
> +
> +	rlist = kmalloc_array(crange.nr_range, sizeof(*rlist),
> +			GFP_KERNEL);
> +	if (!rlist)
> +		return -ENOMEM;
> +
> +	if (copy_from_user(rlist, (void __user *)crange.range_list,
> +				sizeof(*rlist) * crange.nr_range)) {
> +		ret = -EFAULT;
> +		goto out;
> +	}
> +
> +	ret = blkdev_issue_copy(bdev, crange.nr_range, rlist, bdev, crange.dest,
> +			GFP_KERNEL, 0);
> +out:
> +	kfree(rlist);
> +	return ret;
> +}
> +
>  static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
>  		unsigned long arg)
>  {
> @@ -468,6 +499,8 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
>  	case BLKSECDISCARD:
>  		return blk_ioctl_discard(bdev, mode, arg,
>  				BLKDEV_DISCARD_SECURE);
> +	case BLKCOPY:
> +		return blk_ioctl_copy(bdev, mode, arg);
>  	case BLKZEROOUT:
>  		return blk_ioctl_zeroout(bdev, mode, arg);
>  	case BLKGETDISKSEQ:
> diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
> index 7a97b588d892..4183688ff398 100644
> --- a/include/uapi/linux/fs.h
> +++ b/include/uapi/linux/fs.h
> @@ -76,6 +76,13 @@ struct range_entry {
>  	__u64 len;
>  };
>  
> +struct copy_range {
> +	__u64 dest;
> +	__u64 nr_range;
> +	__u64 range_list;
> +	__u64 rsvd;

If you have a "reserved" field, you HAVE to check that it is 0.  If not,
you can never use it in the future.

Also, you can spell it out, we have lots of vowels :)

thanks,

greg k-h

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel
Greg KH Aug. 17, 2021, 1:10 p.m. UTC | #2
On Tue, Aug 17, 2021 at 03:44:20PM +0530, SelvaKumar S wrote:
> From: Nitesh Shetty <nj.shetty@samsung.com>
> 
> Add new BLKCOPY ioctl that offloads copying of one or more sources ranges
> to a destination in the device. COPY ioctl accepts a 'copy_range'
> structure that contains destination (in sectors), no of sources and
> pointer to the array of source ranges. Each source range is represented by
> 'range_entry' that contains start and length of source ranges (in sectors)
> 
> MAX_COPY_NR_RANGE, limits the number of entries for the IOCTL and
> MAX_COPY_TOTAL_LENGTH limits the total copy length, IOCTL can handle.
> 
> Example code, to issue BLKCOPY:
> /* Sample example to copy three source-ranges [0, 8] [16, 8] [32,8] to
>  * [64,24], on the same device */
> 
> int main(void)
> {
> 	int ret, fd;
> 	struct range_entry source_range[] = {{.src = 0, .len = 8},
> 		{.src = 16, .len = 8}, {.src = 32, .len = 8},};
> 	struct copy_range cr;
> 
> 	cr.dest = 64;
> 	cr.nr_range = 3;
> 	cr.range_list = (__u64)&source_range;
> 
> 	fd = open("/dev/nvme0n1", O_RDWR);
> 	if (fd < 0) return 1;
> 
> 	ret = ioctl(fd, BLKCOPY, &cr);
> 	if (ret < 0) printf("copy failure\n");
> 
> 	close(fd);
> 
> 	return ret;
> }
> 
> Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
> Signed-off-by: SelvaKumar S <selvakuma.s1@samsung.com>
> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
> ---
>  block/ioctl.c           | 33 +++++++++++++++++++++++++++++++++
>  include/uapi/linux/fs.h |  8 ++++++++
>  2 files changed, 41 insertions(+)
> 
> diff --git a/block/ioctl.c b/block/ioctl.c
> index eb0491e90b9a..2af56d01e9fe 100644
> --- a/block/ioctl.c
> +++ b/block/ioctl.c
> @@ -143,6 +143,37 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
>  				    GFP_KERNEL, flags);
>  }
>  
> +static int blk_ioctl_copy(struct block_device *bdev, fmode_t mode,
> +		unsigned long arg)
> +{
> +	struct copy_range crange;
> +	struct range_entry *rlist;
> +	int ret;
> +
> +	if (!(mode & FMODE_WRITE))
> +		return -EBADF;
> +
> +	if (copy_from_user(&crange, (void __user *)arg, sizeof(crange)))
> +		return -EFAULT;
> +
> +	rlist = kmalloc_array(crange.nr_range, sizeof(*rlist),
> +			GFP_KERNEL);

No error checking for huge values of nr_range?  Is that wise?  You
really want userspace to be able to allocate "all" of the kernel memory
in the system?

thanks,

greg k-h

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel
Nitesh Shetty Aug. 17, 2021, 2:48 p.m. UTC | #3
On Tue, Aug 17, 2021 at 6:40 PM Greg KH <greg@kroah.com> wrote:
>
> On Tue, Aug 17, 2021 at 03:44:20PM +0530, SelvaKumar S wrote:
> > From: Nitesh Shetty <nj.shetty@samsung.com>
> >
> > Add new BLKCOPY ioctl that offloads copying of one or more sources ranges
> > to a destination in the device. COPY ioctl accepts a 'copy_range'
> > structure that contains destination (in sectors), no of sources and
> > pointer to the array of source ranges. Each source range is represented by
> > 'range_entry' that contains start and length of source ranges (in sectors)
> >
> > MAX_COPY_NR_RANGE, limits the number of entries for the IOCTL and
> > MAX_COPY_TOTAL_LENGTH limits the total copy length, IOCTL can handle.
> >
> > Example code, to issue BLKCOPY:
> > /* Sample example to copy three source-ranges [0, 8] [16, 8] [32,8] to
> >  * [64,24], on the same device */
> >
> > int main(void)
> > {
> >       int ret, fd;
> >       struct range_entry source_range[] = {{.src = 0, .len = 8},
> >               {.src = 16, .len = 8}, {.src = 32, .len = 8},};
> >       struct copy_range cr;
> >
> >       cr.dest = 64;
> >       cr.nr_range = 3;
> >       cr.range_list = (__u64)&source_range;
> >
> >       fd = open("/dev/nvme0n1", O_RDWR);
> >       if (fd < 0) return 1;
> >
> >       ret = ioctl(fd, BLKCOPY, &cr);
> >       if (ret < 0) printf("copy failure\n");
> >
> >       close(fd);
> >
> >       return ret;
> > }
> >
> > Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
> > Signed-off-by: SelvaKumar S <selvakuma.s1@samsung.com>
> > Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
> > ---
> >  block/ioctl.c           | 33 +++++++++++++++++++++++++++++++++
> >  include/uapi/linux/fs.h |  8 ++++++++
> >  2 files changed, 41 insertions(+)
> >
> > diff --git a/block/ioctl.c b/block/ioctl.c
> > index eb0491e90b9a..2af56d01e9fe 100644
> > --- a/block/ioctl.c
> > +++ b/block/ioctl.c
> > @@ -143,6 +143,37 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
> >                                   GFP_KERNEL, flags);
> >  }
> >
> > +static int blk_ioctl_copy(struct block_device *bdev, fmode_t mode,
> > +             unsigned long arg)
> > +{
> > +     struct copy_range crange;
> > +     struct range_entry *rlist;
> > +     int ret;
> > +
> > +     if (!(mode & FMODE_WRITE))
> > +             return -EBADF;
> > +
> > +     if (copy_from_user(&crange, (void __user *)arg, sizeof(crange)))
> > +             return -EFAULT;
> > +
> > +     rlist = kmalloc_array(crange.nr_range, sizeof(*rlist),
> > +                     GFP_KERNEL);
>
> No error checking for huge values of nr_range?  Is that wise?  You
> really want userspace to be able to allocate "all" of the kernel memory
> in the system?
>
> thanks,
>
> greg k-h

We added a kernel imposed limit MAX_COPY_NR_RANGE for that purpose,
but missed adding the check here.
Will have that fixed. Thanks for pointing this out.

Nitesh Shetty

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel
Darrick J. Wong Aug. 17, 2021, 11:36 p.m. UTC | #4
On Tue, Aug 17, 2021 at 03:44:20PM +0530, SelvaKumar S wrote:
> From: Nitesh Shetty <nj.shetty@samsung.com>
> 
> Add new BLKCOPY ioctl that offloads copying of one or more sources ranges
> to a destination in the device. COPY ioctl accepts a 'copy_range'
> structure that contains destination (in sectors), no of sources and
> pointer to the array of source ranges. Each source range is represented by
> 'range_entry' that contains start and length of source ranges (in sectors)
> 
> MAX_COPY_NR_RANGE, limits the number of entries for the IOCTL and
> MAX_COPY_TOTAL_LENGTH limits the total copy length, IOCTL can handle.
> 
> Example code, to issue BLKCOPY:
> /* Sample example to copy three source-ranges [0, 8] [16, 8] [32,8] to
>  * [64,24], on the same device */
> 
> int main(void)
> {
> 	int ret, fd;
> 	struct range_entry source_range[] = {{.src = 0, .len = 8},
> 		{.src = 16, .len = 8}, {.src = 32, .len = 8},};
> 	struct copy_range cr;
> 
> 	cr.dest = 64;
> 	cr.nr_range = 3;
> 	cr.range_list = (__u64)&source_range;
> 
> 	fd = open("/dev/nvme0n1", O_RDWR);
> 	if (fd < 0) return 1;
> 
> 	ret = ioctl(fd, BLKCOPY, &cr);
> 	if (ret < 0) printf("copy failure\n");
> 
> 	close(fd);
> 
> 	return ret;
> }
> 
> Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
> Signed-off-by: SelvaKumar S <selvakuma.s1@samsung.com>
> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
> ---
>  block/ioctl.c           | 33 +++++++++++++++++++++++++++++++++
>  include/uapi/linux/fs.h |  8 ++++++++
>  2 files changed, 41 insertions(+)
> 
> diff --git a/block/ioctl.c b/block/ioctl.c
> index eb0491e90b9a..2af56d01e9fe 100644
> --- a/block/ioctl.c
> +++ b/block/ioctl.c
> @@ -143,6 +143,37 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
>  				    GFP_KERNEL, flags);
>  }
>  
> +static int blk_ioctl_copy(struct block_device *bdev, fmode_t mode,
> +		unsigned long arg)
> +{
> +	struct copy_range crange;
> +	struct range_entry *rlist;
> +	int ret;
> +
> +	if (!(mode & FMODE_WRITE))
> +		return -EBADF;
> +
> +	if (copy_from_user(&crange, (void __user *)arg, sizeof(crange)))
> +		return -EFAULT;
> +
> +	rlist = kmalloc_array(crange.nr_range, sizeof(*rlist),
> +			GFP_KERNEL);
> +	if (!rlist)
> +		return -ENOMEM;
> +
> +	if (copy_from_user(rlist, (void __user *)crange.range_list,
> +				sizeof(*rlist) * crange.nr_range)) {
> +		ret = -EFAULT;
> +		goto out;
> +	}
> +
> +	ret = blkdev_issue_copy(bdev, crange.nr_range, rlist, bdev, crange.dest,
> +			GFP_KERNEL, 0);
> +out:
> +	kfree(rlist);
> +	return ret;
> +}
> +
>  static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
>  		unsigned long arg)
>  {
> @@ -468,6 +499,8 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
>  	case BLKSECDISCARD:
>  		return blk_ioctl_discard(bdev, mode, arg,
>  				BLKDEV_DISCARD_SECURE);
> +	case BLKCOPY:
> +		return blk_ioctl_copy(bdev, mode, arg);
>  	case BLKZEROOUT:
>  		return blk_ioctl_zeroout(bdev, mode, arg);
>  	case BLKGETDISKSEQ:
> diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
> index 7a97b588d892..4183688ff398 100644
> --- a/include/uapi/linux/fs.h
> +++ b/include/uapi/linux/fs.h
> @@ -76,6 +76,13 @@ struct range_entry {
>  	__u64 len;
>  };
>  
> +struct copy_range {
> +	__u64 dest;
> +	__u64 nr_range;

If the maximum number of elements in the range list is 1<<12, there's no
need for this to be larger than a u16, right?

> +	__u64 range_list;

Pointers embedded in a structure are /not/ a good idea, because this
will create a lot of compatibility headaches for 32-bit binaries running
on 64-bit kernels.  Please just make the size of this header structure
a multiple of 8 bytes and put the range_entry list immediately after it.

struct copy_range {
	__s64 dest_offset;
	__u32 nr_range_entries;
	__u32 flags;
	__u64 reserved[2];
};

struct __user range_entry *re = ((struct range_entry *)(copyhead + 1));

copy_from_user(&urk, re...);

--D

> +	__u64 rsvd;
> +};
> +
>  /* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */
>  #define FILE_DEDUPE_RANGE_SAME		0
>  #define FILE_DEDUPE_RANGE_DIFFERS	1
> @@ -197,6 +204,7 @@ struct fsxattr {
>  #define BLKROTATIONAL _IO(0x12,126)
>  #define BLKZEROOUT _IO(0x12,127)
>  #define BLKGETDISKSEQ _IOR(0x12,128,__u64)
> +#define BLKCOPY _IOWR(0x12, 129, struct copy_range)
>  /*
>   * A jump here: 130-136 are reserved for zoned block devices
>   * (see uapi/linux/blkzoned.h)
> -- 
> 2.25.1
> 

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel
Nitesh Shetty Aug. 18, 2021, 3:37 p.m. UTC | #5
On Wed, Aug 18, 2021 at 5:06 AM Darrick J. Wong <djwong@kernel.org> wrote:
>
> On Tue, Aug 17, 2021 at 03:44:20PM +0530, SelvaKumar S wrote:
> > From: Nitesh Shetty <nj.shetty@samsung.com>
> >
> > Add new BLKCOPY ioctl that offloads copying of one or more sources ranges
> > to a destination in the device. COPY ioctl accepts a 'copy_range'
> > structure that contains destination (in sectors), no of sources and
> > pointer to the array of source ranges. Each source range is represented by
> > 'range_entry' that contains start and length of source ranges (in sectors)
> >
> > MAX_COPY_NR_RANGE, limits the number of entries for the IOCTL and
> > MAX_COPY_TOTAL_LENGTH limits the total copy length, IOCTL can handle.
> >
> > Example code, to issue BLKCOPY:
> > /* Sample example to copy three source-ranges [0, 8] [16, 8] [32,8] to
> >  * [64,24], on the same device */
> >
> > int main(void)
> > {
> >       int ret, fd;
> >       struct range_entry source_range[] = {{.src = 0, .len = 8},
> >               {.src = 16, .len = 8}, {.src = 32, .len = 8},};
> >       struct copy_range cr;
> >
> >       cr.dest = 64;
> >       cr.nr_range = 3;
> >       cr.range_list = (__u64)&source_range;
> >
> >       fd = open("/dev/nvme0n1", O_RDWR);
> >       if (fd < 0) return 1;
> >
> >       ret = ioctl(fd, BLKCOPY, &cr);
> >       if (ret < 0) printf("copy failure\n");
> >
> >       close(fd);
> >
> >       return ret;
> > }
> >
> > Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
> > Signed-off-by: SelvaKumar S <selvakuma.s1@samsung.com>
> > Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
> > ---
> >  block/ioctl.c           | 33 +++++++++++++++++++++++++++++++++
> >  include/uapi/linux/fs.h |  8 ++++++++
> >  2 files changed, 41 insertions(+)
> >
> > diff --git a/block/ioctl.c b/block/ioctl.c
> > index eb0491e90b9a..2af56d01e9fe 100644
> > --- a/block/ioctl.c
> > +++ b/block/ioctl.c
> > @@ -143,6 +143,37 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
> >                                   GFP_KERNEL, flags);
> >  }
> >
> > +static int blk_ioctl_copy(struct block_device *bdev, fmode_t mode,
> > +             unsigned long arg)
> > +{
> > +     struct copy_range crange;
> > +     struct range_entry *rlist;
> > +     int ret;
> > +
> > +     if (!(mode & FMODE_WRITE))
> > +             return -EBADF;
> > +
> > +     if (copy_from_user(&crange, (void __user *)arg, sizeof(crange)))
> > +             return -EFAULT;
> > +
> > +     rlist = kmalloc_array(crange.nr_range, sizeof(*rlist),
> > +                     GFP_KERNEL);
> > +     if (!rlist)
> > +             return -ENOMEM;
> > +
> > +     if (copy_from_user(rlist, (void __user *)crange.range_list,
> > +                             sizeof(*rlist) * crange.nr_range)) {
> > +             ret = -EFAULT;
> > +             goto out;
> > +     }
> > +
> > +     ret = blkdev_issue_copy(bdev, crange.nr_range, rlist, bdev, crange.dest,
> > +                     GFP_KERNEL, 0);
> > +out:
> > +     kfree(rlist);
> > +     return ret;
> > +}
> > +
> >  static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
> >               unsigned long arg)
> >  {
> > @@ -468,6 +499,8 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
> >       case BLKSECDISCARD:
> >               return blk_ioctl_discard(bdev, mode, arg,
> >                               BLKDEV_DISCARD_SECURE);
> > +     case BLKCOPY:
> > +             return blk_ioctl_copy(bdev, mode, arg);
> >       case BLKZEROOUT:
> >               return blk_ioctl_zeroout(bdev, mode, arg);
> >       case BLKGETDISKSEQ:
> > diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
> > index 7a97b588d892..4183688ff398 100644
> > --- a/include/uapi/linux/fs.h
> > +++ b/include/uapi/linux/fs.h
> > @@ -76,6 +76,13 @@ struct range_entry {
> >       __u64 len;
> >  };
> >
> > +struct copy_range {
> > +     __u64 dest;
> > +     __u64 nr_range;
>
> If the maximum number of elements in the range list is 1<<12, there's no
> need for this to be larger than a u16, right?
>
> > +     __u64 range_list;
>
> Pointers embedded in a structure are /not/ a good idea, because this
> will create a lot of compatibility headaches for 32-bit binaries running
> on 64-bit kernels.  Please just make the size of this header structure
> a multiple of 8 bytes and put the range_entry list immediately after it.
>
> struct copy_range {
>         __s64 dest_offset;
>         __u32 nr_range_entries;
>         __u32 flags;
>         __u64 reserved[2];
> };
>
> struct __user range_entry *re = ((struct range_entry *)(copyhead + 1));
>
> copy_from_user(&urk, re...);
>
> --D
>
Thanks, this is better. 'Reserved' field was there to be used for
future extension of the interface.
Now that you mentioned 'flags', it seems we can do away with
'reserved' fields altogether?

Regards,
Nitesh Shetty

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel
Darrick J. Wong Aug. 18, 2021, 4:17 p.m. UTC | #6
On Wed, Aug 18, 2021 at 09:07:54PM +0530, Nitesh Shetty wrote:
> On Wed, Aug 18, 2021 at 5:06 AM Darrick J. Wong <djwong@kernel.org> wrote:
> >
> > On Tue, Aug 17, 2021 at 03:44:20PM +0530, SelvaKumar S wrote:
> > > From: Nitesh Shetty <nj.shetty@samsung.com>
> > >
> > > Add new BLKCOPY ioctl that offloads copying of one or more sources ranges
> > > to a destination in the device. COPY ioctl accepts a 'copy_range'
> > > structure that contains destination (in sectors), no of sources and
> > > pointer to the array of source ranges. Each source range is represented by
> > > 'range_entry' that contains start and length of source ranges (in sectors)
> > >
> > > MAX_COPY_NR_RANGE, limits the number of entries for the IOCTL and
> > > MAX_COPY_TOTAL_LENGTH limits the total copy length, IOCTL can handle.
> > >
> > > Example code, to issue BLKCOPY:
> > > /* Sample example to copy three source-ranges [0, 8] [16, 8] [32,8] to
> > >  * [64,24], on the same device */
> > >
> > > int main(void)
> > > {
> > >       int ret, fd;
> > >       struct range_entry source_range[] = {{.src = 0, .len = 8},
> > >               {.src = 16, .len = 8}, {.src = 32, .len = 8},};
> > >       struct copy_range cr;
> > >
> > >       cr.dest = 64;
> > >       cr.nr_range = 3;
> > >       cr.range_list = (__u64)&source_range;
> > >
> > >       fd = open("/dev/nvme0n1", O_RDWR);
> > >       if (fd < 0) return 1;
> > >
> > >       ret = ioctl(fd, BLKCOPY, &cr);
> > >       if (ret < 0) printf("copy failure\n");
> > >
> > >       close(fd);
> > >
> > >       return ret;
> > > }
> > >
> > > Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
> > > Signed-off-by: SelvaKumar S <selvakuma.s1@samsung.com>
> > > Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
> > > ---
> > >  block/ioctl.c           | 33 +++++++++++++++++++++++++++++++++
> > >  include/uapi/linux/fs.h |  8 ++++++++
> > >  2 files changed, 41 insertions(+)
> > >
> > > diff --git a/block/ioctl.c b/block/ioctl.c
> > > index eb0491e90b9a..2af56d01e9fe 100644
> > > --- a/block/ioctl.c
> > > +++ b/block/ioctl.c
> > > @@ -143,6 +143,37 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
> > >                                   GFP_KERNEL, flags);
> > >  }
> > >
> > > +static int blk_ioctl_copy(struct block_device *bdev, fmode_t mode,
> > > +             unsigned long arg)
> > > +{
> > > +     struct copy_range crange;
> > > +     struct range_entry *rlist;
> > > +     int ret;
> > > +
> > > +     if (!(mode & FMODE_WRITE))
> > > +             return -EBADF;
> > > +
> > > +     if (copy_from_user(&crange, (void __user *)arg, sizeof(crange)))
> > > +             return -EFAULT;
> > > +
> > > +     rlist = kmalloc_array(crange.nr_range, sizeof(*rlist),
> > > +                     GFP_KERNEL);
> > > +     if (!rlist)
> > > +             return -ENOMEM;
> > > +
> > > +     if (copy_from_user(rlist, (void __user *)crange.range_list,
> > > +                             sizeof(*rlist) * crange.nr_range)) {
> > > +             ret = -EFAULT;
> > > +             goto out;
> > > +     }
> > > +
> > > +     ret = blkdev_issue_copy(bdev, crange.nr_range, rlist, bdev, crange.dest,
> > > +                     GFP_KERNEL, 0);
> > > +out:
> > > +     kfree(rlist);
> > > +     return ret;
> > > +}
> > > +
> > >  static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
> > >               unsigned long arg)
> > >  {
> > > @@ -468,6 +499,8 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
> > >       case BLKSECDISCARD:
> > >               return blk_ioctl_discard(bdev, mode, arg,
> > >                               BLKDEV_DISCARD_SECURE);
> > > +     case BLKCOPY:
> > > +             return blk_ioctl_copy(bdev, mode, arg);
> > >       case BLKZEROOUT:
> > >               return blk_ioctl_zeroout(bdev, mode, arg);
> > >       case BLKGETDISKSEQ:
> > > diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
> > > index 7a97b588d892..4183688ff398 100644
> > > --- a/include/uapi/linux/fs.h
> > > +++ b/include/uapi/linux/fs.h
> > > @@ -76,6 +76,13 @@ struct range_entry {
> > >       __u64 len;
> > >  };
> > >
> > > +struct copy_range {
> > > +     __u64 dest;
> > > +     __u64 nr_range;
> >
> > If the maximum number of elements in the range list is 1<<12, there's no
> > need for this to be larger than a u16, right?
> >
> > > +     __u64 range_list;
> >
> > Pointers embedded in a structure are /not/ a good idea, because this
> > will create a lot of compatibility headaches for 32-bit binaries running
> > on 64-bit kernels.  Please just make the size of this header structure
> > a multiple of 8 bytes and put the range_entry list immediately after it.
> >
> > struct copy_range {
> >         __s64 dest_offset;
> >         __u32 nr_range_entries;
> >         __u32 flags;
> >         __u64 reserved[2];
> > };
> >
> > struct __user range_entry *re = ((struct range_entry *)(copyhead + 1));
> >
> > copy_from_user(&urk, re...);
> >
> > --D
> >
> Thanks, this is better. 'Reserved' field was there to be used for
> future extension of the interface.
> Now that you mentioned 'flags', it seems we can do away with
> 'reserved' fields altogether?

We still want the reserved-must-be-zero fields so that adding the first
field or two doesn't require changes to the pointer arithmetic.

Also, I suppose you could make the relationship between copy_range and
range_entry more explicit:

struct copy_range {
        __s64 dest_offset;
        __u32 nr_range_entries;
        __u32 flags;
        __u64 reserved[2];

	/* must come last */
	struct range_entry entries[];
};

struct __user range_entry *re = &copyhead->entries[0];

--D

> 
> Regards,
> Nitesh Shetty

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel
diff mbox series

Patch

diff --git a/block/ioctl.c b/block/ioctl.c
index eb0491e90b9a..2af56d01e9fe 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -143,6 +143,37 @@  static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
 				    GFP_KERNEL, flags);
 }
 
+static int blk_ioctl_copy(struct block_device *bdev, fmode_t mode,
+		unsigned long arg)
+{
+	struct copy_range crange;
+	struct range_entry *rlist;
+	int ret;
+
+	if (!(mode & FMODE_WRITE))
+		return -EBADF;
+
+	if (copy_from_user(&crange, (void __user *)arg, sizeof(crange)))
+		return -EFAULT;
+
+	rlist = kmalloc_array(crange.nr_range, sizeof(*rlist),
+			GFP_KERNEL);
+	if (!rlist)
+		return -ENOMEM;
+
+	if (copy_from_user(rlist, (void __user *)crange.range_list,
+				sizeof(*rlist) * crange.nr_range)) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	ret = blkdev_issue_copy(bdev, crange.nr_range, rlist, bdev, crange.dest,
+			GFP_KERNEL, 0);
+out:
+	kfree(rlist);
+	return ret;
+}
+
 static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
 		unsigned long arg)
 {
@@ -468,6 +499,8 @@  static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
 	case BLKSECDISCARD:
 		return blk_ioctl_discard(bdev, mode, arg,
 				BLKDEV_DISCARD_SECURE);
+	case BLKCOPY:
+		return blk_ioctl_copy(bdev, mode, arg);
 	case BLKZEROOUT:
 		return blk_ioctl_zeroout(bdev, mode, arg);
 	case BLKGETDISKSEQ:
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 7a97b588d892..4183688ff398 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -76,6 +76,13 @@  struct range_entry {
 	__u64 len;
 };
 
+struct copy_range {
+	__u64 dest;
+	__u64 nr_range;
+	__u64 range_list;
+	__u64 rsvd;
+};
+
 /* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */
 #define FILE_DEDUPE_RANGE_SAME		0
 #define FILE_DEDUPE_RANGE_DIFFERS	1
@@ -197,6 +204,7 @@  struct fsxattr {
 #define BLKROTATIONAL _IO(0x12,126)
 #define BLKZEROOUT _IO(0x12,127)
 #define BLKGETDISKSEQ _IOR(0x12,128,__u64)
+#define BLKCOPY _IOWR(0x12, 129, struct copy_range)
 /*
  * A jump here: 130-136 are reserved for zoned block devices
  * (see uapi/linux/blkzoned.h)