diff mbox series

[v3,02/10] block: Introduce queue limits for copy-offload support

Message ID 20220214080002.18381-3-nj.shetty@samsung.com (mailing list archive)
State Not Applicable
Headers show
Series [v3,01/10] block: make bio_map_kern() non static | expand

Commit Message

Nitesh Shetty Feb. 14, 2022, 7:59 a.m. UTC
Add device limits as sysfs entries,
        - copy_offload (RW)
        - copy_max_bytes (RW)
        - copy_max_hw_bytes (RO)
        - copy_max_range_bytes (RW)
        - copy_max_range_hw_bytes (RO)
        - copy_max_nr_ranges (RW)
        - copy_max_nr_ranges_hw (RO)

Above limits help to split the copy payload in block layer.
copy_offload, used for setting copy offload(1) or emulation(0).
copy_max_bytes: maximum total length of copy in single payload.
copy_max_range_bytes: maximum length in a single entry.
copy_max_nr_ranges: maximum number of entries in a payload.
copy_max_*_hw_*: Reflects the device supported maximum limits.

Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
Signed-off-by: SelvaKumar S <selvakuma.s1@samsung.com>
Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
---
 block/blk-settings.c   |  59 ++++++++++++++++++
 block/blk-sysfs.c      | 138 +++++++++++++++++++++++++++++++++++++++++
 include/linux/blkdev.h |  13 ++++
 3 files changed, 210 insertions(+)

Comments

Luis Chamberlain Feb. 17, 2022, 9:07 a.m. UTC | #1
The subject says limits for copy-offload...

On Mon, Feb 14, 2022 at 01:29:52PM +0530, Nitesh Shetty wrote:
> Add device limits as sysfs entries,
>         - copy_offload (RW)
>         - copy_max_bytes (RW)
>         - copy_max_hw_bytes (RO)
>         - copy_max_range_bytes (RW)
>         - copy_max_range_hw_bytes (RO)
>         - copy_max_nr_ranges (RW)
>         - copy_max_nr_ranges_hw (RO)

Some of these seem like generic... and also I see a few more max_hw ones
not listed above...

> --- a/block/blk-settings.c
> +++ b/block/blk-settings.c
> +/**
> + * blk_queue_max_copy_sectors - set max sectors for a single copy payload
> + * @q:  the request queue for the device
> + * @max_copy_sectors: maximum number of sectors to copy
> + **/
> +void blk_queue_max_copy_sectors(struct request_queue *q,
> +		unsigned int max_copy_sectors)
> +{
> +	q->limits.max_hw_copy_sectors = max_copy_sectors;
> +	q->limits.max_copy_sectors = max_copy_sectors;
> +}
> +EXPORT_SYMBOL(blk_queue_max_copy_sectors);

Please use EXPORT_SYMBOL_GPL() for all new things.

Why is this setting both? The documentation does't seem to say.
What's the point?

> +
> +/**
> + * blk_queue_max_copy_range_sectors - set max sectors for a single range, in a copy payload
> + * @q:  the request queue for the device
> + * @max_copy_range_sectors: maximum number of sectors to copy in a single range
> + **/
> +void blk_queue_max_copy_range_sectors(struct request_queue *q,
> +		unsigned int max_copy_range_sectors)
> +{
> +	q->limits.max_hw_copy_range_sectors = max_copy_range_sectors;
> +	q->limits.max_copy_range_sectors = max_copy_range_sectors;
> +}
> +EXPORT_SYMBOL(blk_queue_max_copy_range_sectors);

Same here.

> +/**
> + * blk_queue_max_copy_nr_ranges - set max number of ranges, in a copy payload
> + * @q:  the request queue for the device
> + * @max_copy_nr_ranges: maximum number of ranges
> + **/
> +void blk_queue_max_copy_nr_ranges(struct request_queue *q,
> +		unsigned int max_copy_nr_ranges)
> +{
> +	q->limits.max_hw_copy_nr_ranges = max_copy_nr_ranges;
> +	q->limits.max_copy_nr_ranges = max_copy_nr_ranges;
> +}
> +EXPORT_SYMBOL(blk_queue_max_copy_nr_ranges);

Same.

> +
>  /**
>   * blk_queue_max_write_same_sectors - set max sectors for a single write same
>   * @q:  the request queue for the device
> @@ -541,6 +592,14 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
>  	t->max_segment_size = min_not_zero(t->max_segment_size,
>  					   b->max_segment_size);
>  
> +	t->max_copy_sectors = min(t->max_copy_sectors, b->max_copy_sectors);
> +	t->max_hw_copy_sectors = min(t->max_hw_copy_sectors, b->max_hw_copy_sectors);
> +	t->max_copy_range_sectors = min(t->max_copy_range_sectors, b->max_copy_range_sectors);
> +	t->max_hw_copy_range_sectors = min(t->max_hw_copy_range_sectors,
> +						b->max_hw_copy_range_sectors);
> +	t->max_copy_nr_ranges = min(t->max_copy_nr_ranges, b->max_copy_nr_ranges);
> +	t->max_hw_copy_nr_ranges = min(t->max_hw_copy_nr_ranges, b->max_hw_copy_nr_ranges);
> +
>  	t->misaligned |= b->misaligned;
>  
>  	alignment = queue_limit_alignment_offset(b, start);
> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index 9f32882ceb2f..9ddd07f142d9 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -212,6 +212,129 @@ static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *pag
>  	return queue_var_show(0, page);
>  }
>  
> +static ssize_t queue_copy_offload_show(struct request_queue *q, char *page)
> +{
> +	return queue_var_show(blk_queue_copy(q), page);
> +}
> +
> +static ssize_t queue_copy_offload_store(struct request_queue *q,
> +				       const char *page, size_t count)
> +{
> +	unsigned long copy_offload;
> +	ssize_t ret = queue_var_store(&copy_offload, page, count);
> +
> +	if (ret < 0)
> +		return ret;
> +
> +	if (copy_offload && !q->limits.max_hw_copy_sectors)
> +		return -EINVAL;


If the kernel schedules, copy_offload may still be true and
max_hw_copy_sectors may be set to 0. Is that an issue?

> +
> +	if (copy_offload)
> +		blk_queue_flag_set(QUEUE_FLAG_COPY, q);
> +	else
> +		blk_queue_flag_clear(QUEUE_FLAG_COPY, q);

The flag may be set but the queue flag could be set. Is that an issue?

> @@ -597,6 +720,14 @@ QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones");
>  QUEUE_RO_ENTRY(queue_max_open_zones, "max_open_zones");
>  QUEUE_RO_ENTRY(queue_max_active_zones, "max_active_zones");
>  
> +QUEUE_RW_ENTRY(queue_copy_offload, "copy_offload");
> +QUEUE_RO_ENTRY(queue_copy_max_hw, "copy_max_hw_bytes");
> +QUEUE_RW_ENTRY(queue_copy_max, "copy_max_bytes");
> +QUEUE_RO_ENTRY(queue_copy_range_max_hw, "copy_max_range_hw_bytes");
> +QUEUE_RW_ENTRY(queue_copy_range_max, "copy_max_range_bytes");
> +QUEUE_RO_ENTRY(queue_copy_nr_ranges_max_hw, "copy_max_nr_ranges_hw");
> +QUEUE_RW_ENTRY(queue_copy_nr_ranges_max, "copy_max_nr_ranges");

Seems like you need to update Documentation/ABI/stable/sysfs-block.

> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index efed3820cbf7..792e6d556589 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -254,6 +254,13 @@ struct queue_limits {
>  	unsigned int		discard_alignment;
>  	unsigned int		zone_write_granularity;
>  
> +	unsigned long		max_hw_copy_sectors;
> +	unsigned long		max_copy_sectors;
> +	unsigned int		max_hw_copy_range_sectors;
> +	unsigned int		max_copy_range_sectors;
> +	unsigned short		max_hw_copy_nr_ranges;
> +	unsigned short		max_copy_nr_ranges;

Before limits start growing more.. I wonder if we should just
stuff hw offload stuff to its own struct within queue_limits.

Christoph?

  Luis
Chaitanya Kulkarni Feb. 17, 2022, 10:16 a.m. UTC | #2
On 2/17/22 1:07 AM, Luis Chamberlain wrote:
> The subject says limits for copy-offload...
> 
> On Mon, Feb 14, 2022 at 01:29:52PM +0530, Nitesh Shetty wrote:
>> Add device limits as sysfs entries,
>>          - copy_offload (RW)
>>          - copy_max_bytes (RW)
>>          - copy_max_hw_bytes (RO)
>>          - copy_max_range_bytes (RW)
>>          - copy_max_range_hw_bytes (RO)
>>          - copy_max_nr_ranges (RW)
>>          - copy_max_nr_ranges_hw (RO)
> 
> Some of these seem like generic... and also I see a few more max_hw ones
> not listed above...
> 
>> --- a/block/blk-settings.c
>> +++ b/block/blk-settings.c
>> +/**
>> + * blk_queue_max_copy_sectors - set max sectors for a single copy payload
>> + * @q:  the request queue for the device
>> + * @max_copy_sectors: maximum number of sectors to copy
>> + **/
>> +void blk_queue_max_copy_sectors(struct request_queue *q,
>> +		unsigned int max_copy_sectors)
>> +{
>> +	q->limits.max_hw_copy_sectors = max_copy_sectors;
>> +	q->limits.max_copy_sectors = max_copy_sectors;
>> +}
>> +EXPORT_SYMBOL(blk_queue_max_copy_sectors);
> 
> Please use EXPORT_SYMBOL_GPL() for all new things.
> 
> Why is this setting both? The documentation does't seem to say.
> What's the point?
> 
>> +
>> +/**
>> + * blk_queue_max_copy_range_sectors - set max sectors for a single range, in a copy payload
>> + * @q:  the request queue for the device
>> + * @max_copy_range_sectors: maximum number of sectors to copy in a single range
>> + **/
>> +void blk_queue_max_copy_range_sectors(struct request_queue *q,
>> +		unsigned int max_copy_range_sectors)
>> +{
>> +	q->limits.max_hw_copy_range_sectors = max_copy_range_sectors;
>> +	q->limits.max_copy_range_sectors = max_copy_range_sectors;
>> +}
>> +EXPORT_SYMBOL(blk_queue_max_copy_range_sectors);
> 
> Same here.
> 
>> +/**
>> + * blk_queue_max_copy_nr_ranges - set max number of ranges, in a copy payload
>> + * @q:  the request queue for the device
>> + * @max_copy_nr_ranges: maximum number of ranges
>> + **/
>> +void blk_queue_max_copy_nr_ranges(struct request_queue *q,
>> +		unsigned int max_copy_nr_ranges)
>> +{
>> +	q->limits.max_hw_copy_nr_ranges = max_copy_nr_ranges;
>> +	q->limits.max_copy_nr_ranges = max_copy_nr_ranges;
>> +}
>> +EXPORT_SYMBOL(blk_queue_max_copy_nr_ranges);
> 
> Same.
> 
>> +
>>   /**
>>    * blk_queue_max_write_same_sectors - set max sectors for a single write same
>>    * @q:  the request queue for the device
>> @@ -541,6 +592,14 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
>>   	t->max_segment_size = min_not_zero(t->max_segment_size,
>>   					   b->max_segment_size);
>>   
>> +	t->max_copy_sectors = min(t->max_copy_sectors, b->max_copy_sectors);
>> +	t->max_hw_copy_sectors = min(t->max_hw_copy_sectors, b->max_hw_copy_sectors);
>> +	t->max_copy_range_sectors = min(t->max_copy_range_sectors, b->max_copy_range_sectors);
>> +	t->max_hw_copy_range_sectors = min(t->max_hw_copy_range_sectors,
>> +						b->max_hw_copy_range_sectors);
>> +	t->max_copy_nr_ranges = min(t->max_copy_nr_ranges, b->max_copy_nr_ranges);
>> +	t->max_hw_copy_nr_ranges = min(t->max_hw_copy_nr_ranges, b->max_hw_copy_nr_ranges);
>> +
>>   	t->misaligned |= b->misaligned;
>>   
>>   	alignment = queue_limit_alignment_offset(b, start);
>> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
>> index 9f32882ceb2f..9ddd07f142d9 100644
>> --- a/block/blk-sysfs.c
>> +++ b/block/blk-sysfs.c
>> @@ -212,6 +212,129 @@ static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *pag
>>   	return queue_var_show(0, page);
>>   }
>>   
>> +static ssize_t queue_copy_offload_show(struct request_queue *q, char *page)
>> +{
>> +	return queue_var_show(blk_queue_copy(q), page);
>> +}
>> +
>> +static ssize_t queue_copy_offload_store(struct request_queue *q,
>> +				       const char *page, size_t count)
>> +{
>> +	unsigned long copy_offload;
>> +	ssize_t ret = queue_var_store(&copy_offload, page, count);
>> +
>> +	if (ret < 0)
>> +		return ret;
>> +
>> +	if (copy_offload && !q->limits.max_hw_copy_sectors)
>> +		return -EINVAL;
> 
> 
> If the kernel schedules, copy_offload may still be true and
> max_hw_copy_sectors may be set to 0. Is that an issue?
> 
>> +
>> +	if (copy_offload)
>> +		blk_queue_flag_set(QUEUE_FLAG_COPY, q);
>> +	else
>> +		blk_queue_flag_clear(QUEUE_FLAG_COPY, q);
> 
> The flag may be set but the queue flag could be set. Is that an issue?
> 
>> @@ -597,6 +720,14 @@ QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones");
>>   QUEUE_RO_ENTRY(queue_max_open_zones, "max_open_zones");
>>   QUEUE_RO_ENTRY(queue_max_active_zones, "max_active_zones");
>>   
>> +QUEUE_RW_ENTRY(queue_copy_offload, "copy_offload");
>> +QUEUE_RO_ENTRY(queue_copy_max_hw, "copy_max_hw_bytes");
>> +QUEUE_RW_ENTRY(queue_copy_max, "copy_max_bytes");
>> +QUEUE_RO_ENTRY(queue_copy_range_max_hw, "copy_max_range_hw_bytes");
>> +QUEUE_RW_ENTRY(queue_copy_range_max, "copy_max_range_bytes");
>> +QUEUE_RO_ENTRY(queue_copy_nr_ranges_max_hw, "copy_max_nr_ranges_hw");
>> +QUEUE_RW_ENTRY(queue_copy_nr_ranges_max, "copy_max_nr_ranges");
> 
> Seems like you need to update Documentation/ABI/stable/sysfs-block.
> 
>> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>> index efed3820cbf7..792e6d556589 100644
>> --- a/include/linux/blkdev.h
>> +++ b/include/linux/blkdev.h
>> @@ -254,6 +254,13 @@ struct queue_limits {
>>   	unsigned int		discard_alignment;
>>   	unsigned int		zone_write_granularity;
>>   
>> +	unsigned long		max_hw_copy_sectors;
>> +	unsigned long		max_copy_sectors;
>> +	unsigned int		max_hw_copy_range_sectors;
>> +	unsigned int		max_copy_range_sectors;
>> +	unsigned short		max_hw_copy_nr_ranges;
>> +	unsigned short		max_copy_nr_ranges;
> 
> Before limits start growing more.. I wonder if we should just
> stuff hw offload stuff to its own struct within queue_limits.
> 
> Christoph?
> 

Potentially use a pointer to structure and maybe make it configurable,
although I'm not sure about the later part, I'll let Christoph decide
that.

>    Luis
> 

-ck
Nitesh Shetty Feb. 17, 2022, 12:59 p.m. UTC | #3
Thu, Feb 17, 2022 at 01:07:00AM -0800, Luis Chamberlain wrote:
> The subject says limits for copy-offload...
> 
> On Mon, Feb 14, 2022 at 01:29:52PM +0530, Nitesh Shetty wrote:
> > Add device limits as sysfs entries,
> >         - copy_offload (RW)
> >         - copy_max_bytes (RW)
> >         - copy_max_hw_bytes (RO)
> >         - copy_max_range_bytes (RW)
> >         - copy_max_range_hw_bytes (RO)
> >         - copy_max_nr_ranges (RW)
> >         - copy_max_nr_ranges_hw (RO)
> 
> Some of these seem like generic... and also I see a few more max_hw ones
> not listed above...
>
queue_limits and sysfs entries are differently named.
All sysfs entries start with copy_* prefix. Also it makes easy to lookup
all copy sysfs.
For queue limits naming, I tried to following existing queue limit
convention (like discard).

> > --- a/block/blk-settings.c
> > +++ b/block/blk-settings.c
> > +/**
> > + * blk_queue_max_copy_sectors - set max sectors for a single copy payload
> > + * @q:  the request queue for the device
> > + * @max_copy_sectors: maximum number of sectors to copy
> > + **/
> > +void blk_queue_max_copy_sectors(struct request_queue *q,
> > +		unsigned int max_copy_sectors)
> > +{
> > +	q->limits.max_hw_copy_sectors = max_copy_sectors;
> > +	q->limits.max_copy_sectors = max_copy_sectors;
> > +}
> > +EXPORT_SYMBOL(blk_queue_max_copy_sectors);
> 
> Please use EXPORT_SYMBOL_GPL() for all new things.
> 
acked.

> Why is this setting both? The documentation does't seem to say.
> What's the point?
>

This function is used only by driver, while intializing request queue.
I will put this as part of description next time.

> > +
> > +/**
> > + * blk_queue_max_copy_range_sectors - set max sectors for a single range, in a copy payload
> > + * @q:  the request queue for the device
> > + * @max_copy_range_sectors: maximum number of sectors to copy in a single range
> > + **/
> > +void blk_queue_max_copy_range_sectors(struct request_queue *q,
> > +		unsigned int max_copy_range_sectors)
> > +{
> > +	q->limits.max_hw_copy_range_sectors = max_copy_range_sectors;
> > +	q->limits.max_copy_range_sectors = max_copy_range_sectors;
> > +}
> > +EXPORT_SYMBOL(blk_queue_max_copy_range_sectors);
> 
> Same here.
> 
> > +/**
> > + * blk_queue_max_copy_nr_ranges - set max number of ranges, in a copy payload
> > + * @q:  the request queue for the device
> > + * @max_copy_nr_ranges: maximum number of ranges
> > + **/
> > +void blk_queue_max_copy_nr_ranges(struct request_queue *q,
> > +		unsigned int max_copy_nr_ranges)
> > +{
> > +	q->limits.max_hw_copy_nr_ranges = max_copy_nr_ranges;
> > +	q->limits.max_copy_nr_ranges = max_copy_nr_ranges;
> > +}
> > +EXPORT_SYMBOL(blk_queue_max_copy_nr_ranges);
> 
> Same.
> 
> > +
> >  /**
> >   * blk_queue_max_write_same_sectors - set max sectors for a single write same
> >   * @q:  the request queue for the device
> > @@ -541,6 +592,14 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
> >  	t->max_segment_size = min_not_zero(t->max_segment_size,
> >  					   b->max_segment_size);
> >  
> > +	t->max_copy_sectors = min(t->max_copy_sectors, b->max_copy_sectors);
> > +	t->max_hw_copy_sectors = min(t->max_hw_copy_sectors, b->max_hw_copy_sectors);
> > +	t->max_copy_range_sectors = min(t->max_copy_range_sectors, b->max_copy_range_sectors);
> > +	t->max_hw_copy_range_sectors = min(t->max_hw_copy_range_sectors,
> > +						b->max_hw_copy_range_sectors);
> > +	t->max_copy_nr_ranges = min(t->max_copy_nr_ranges, b->max_copy_nr_ranges);
> > +	t->max_hw_copy_nr_ranges = min(t->max_hw_copy_nr_ranges, b->max_hw_copy_nr_ranges);
> > +
> >  	t->misaligned |= b->misaligned;
> >  
> >  	alignment = queue_limit_alignment_offset(b, start);
> > diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> > index 9f32882ceb2f..9ddd07f142d9 100644
> > --- a/block/blk-sysfs.c
> > +++ b/block/blk-sysfs.c
> > @@ -212,6 +212,129 @@ static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *pag
> >  	return queue_var_show(0, page);
> >  }
> >  
> > +static ssize_t queue_copy_offload_show(struct request_queue *q, char *page)
> > +{
> > +	return queue_var_show(blk_queue_copy(q), page);
> > +}
> > +
> > +static ssize_t queue_copy_offload_store(struct request_queue *q,
> > +				       const char *page, size_t count)
> > +{
> > +	unsigned long copy_offload;
> > +	ssize_t ret = queue_var_store(&copy_offload, page, count);
> > +
> > +	if (ret < 0)
> > +		return ret;
> > +
> > +	if (copy_offload && !q->limits.max_hw_copy_sectors)
> > +		return -EINVAL;
> 
> 
> If the kernel schedules, copy_offload may still be true and
> max_hw_copy_sectors may be set to 0. Is that an issue?
>

This check ensures that, we dont enable offload if device doesnt support
offload. I feel it shouldn't be an issue.

> > +
> > +	if (copy_offload)
> > +		blk_queue_flag_set(QUEUE_FLAG_COPY, q);
> > +	else
> > +		blk_queue_flag_clear(QUEUE_FLAG_COPY, q);
> 
> The flag may be set but the queue flag could be set. Is that an issue?
> 
> > @@ -597,6 +720,14 @@ QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones");
> >  QUEUE_RO_ENTRY(queue_max_open_zones, "max_open_zones");
> >  QUEUE_RO_ENTRY(queue_max_active_zones, "max_active_zones");
> >  
> > +QUEUE_RW_ENTRY(queue_copy_offload, "copy_offload");
> > +QUEUE_RO_ENTRY(queue_copy_max_hw, "copy_max_hw_bytes");
> > +QUEUE_RW_ENTRY(queue_copy_max, "copy_max_bytes");
> > +QUEUE_RO_ENTRY(queue_copy_range_max_hw, "copy_max_range_hw_bytes");
> > +QUEUE_RW_ENTRY(queue_copy_range_max, "copy_max_range_bytes");
> > +QUEUE_RO_ENTRY(queue_copy_nr_ranges_max_hw, "copy_max_nr_ranges_hw");
> > +QUEUE_RW_ENTRY(queue_copy_nr_ranges_max, "copy_max_nr_ranges");
> 
> Seems like you need to update Documentation/ABI/stable/sysfs-block.
>

acked. 

> > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> > index efed3820cbf7..792e6d556589 100644
> > --- a/include/linux/blkdev.h
> > +++ b/include/linux/blkdev.h
> > @@ -254,6 +254,13 @@ struct queue_limits {
> >  	unsigned int		discard_alignment;
> >  	unsigned int		zone_write_granularity;
> >  
> > +	unsigned long		max_hw_copy_sectors;
> > +	unsigned long		max_copy_sectors;
> > +	unsigned int		max_hw_copy_range_sectors;
> > +	unsigned int		max_copy_range_sectors;
> > +	unsigned short		max_hw_copy_nr_ranges;
> > +	unsigned short		max_copy_nr_ranges;
> 
> Before limits start growing more.. I wonder if we should just
> stuff hw offload stuff to its own struct within queue_limits.
> 
> Christoph?
> 
>   Luis
>
Yeah, would like to know community opinion on this.


-- Nitesh
Luis Chamberlain Feb. 17, 2022, 5:49 p.m. UTC | #4
On Thu, Feb 17, 2022 at 10:16:21AM +0000, Chaitanya Kulkarni wrote:
> On 2/17/22 1:07 AM, Luis Chamberlain wrote:
> >> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> >> index efed3820cbf7..792e6d556589 100644
> >> --- a/include/linux/blkdev.h
> >> +++ b/include/linux/blkdev.h
> >> @@ -254,6 +254,13 @@ struct queue_limits {
> >>   	unsigned int		discard_alignment;
> >>   	unsigned int		zone_write_granularity;
> >>   
> >> +	unsigned long		max_hw_copy_sectors;
> >> +	unsigned long		max_copy_sectors;
> >> +	unsigned int		max_hw_copy_range_sectors;
> >> +	unsigned int		max_copy_range_sectors;
> >> +	unsigned short		max_hw_copy_nr_ranges;
> >> +	unsigned short		max_copy_nr_ranges;
> > 
> > Before limits start growing more.. I wonder if we should just
> > stuff hw offload stuff to its own struct within queue_limits.
> > 
> > Christoph?
> > 
> 
> Potentially use a pointer to structure and maybe make it configurable,

Did you mean to make queue limits local or for hw offload and make that
a pointer? If so that seems odd because even for hw copy offload we
still need the other limits no?

So what I meant was that struct queue_limits seems to be getting large,
and that hw copy offload seems like an example use case where we should
probably use a separate struct for it. And while at it, well, start
adding kdocs for these things, because, there's tons of things which
could use kdoc love.

> although I'm not sure about the later part, I'll let Christoph decide
> that.

  Luis
Luis Chamberlain Feb. 23, 2022, 12:55 a.m. UTC | #5
On Thu, Feb 17, 2022 at 06:29:01PM +0530, Nitesh Shetty wrote:
>  Thu, Feb 17, 2022 at 01:07:00AM -0800, Luis Chamberlain wrote:
> > The subject says limits for copy-offload...
> > 
> > On Mon, Feb 14, 2022 at 01:29:52PM +0530, Nitesh Shetty wrote:
> > > Add device limits as sysfs entries,
> > >         - copy_offload (RW)
> > >         - copy_max_bytes (RW)
> > >         - copy_max_hw_bytes (RO)
> > >         - copy_max_range_bytes (RW)
> > >         - copy_max_range_hw_bytes (RO)
> > >         - copy_max_nr_ranges (RW)
> > >         - copy_max_nr_ranges_hw (RO)
> > 
> > Some of these seem like generic... and also I see a few more max_hw ones
> > not listed above...
> >
> queue_limits and sysfs entries are differently named.
> All sysfs entries start with copy_* prefix. Also it makes easy to lookup
> all copy sysfs.
> For queue limits naming, I tried to following existing queue limit
> convention (like discard).

My point was that your subject seems to indicate the changes are just
for copy-offload, but you seem to be adding generic queue limits as
well. Is that correct? If so then perhaps the subject should be changed
or the patch split up.

> > > +static ssize_t queue_copy_offload_store(struct request_queue *q,
> > > +				       const char *page, size_t count)
> > > +{
> > > +	unsigned long copy_offload;
> > > +	ssize_t ret = queue_var_store(&copy_offload, page, count);
> > > +
> > > +	if (ret < 0)
> > > +		return ret;
> > > +
> > > +	if (copy_offload && !q->limits.max_hw_copy_sectors)
> > > +		return -EINVAL;
> > 
> > 
> > If the kernel schedules, copy_offload may still be true and
> > max_hw_copy_sectors may be set to 0. Is that an issue?
> >
> 
> This check ensures that, we dont enable offload if device doesnt support
> offload. I feel it shouldn't be an issue.

My point was this:

CPU1                                       CPU2
Time
1) if (copy_offload 
2)    ---> preemption so it schedules      
3)    ---> some other high priority task  Sets q->limits.max_hw_copy_sectors to 0
4) && !q->limits.max_hw_copy_sectors)

Can something bad happen if we allow for this?
Damien Le Moal Feb. 23, 2022, 1:29 a.m. UTC | #6
On 2/23/22 09:55, Luis Chamberlain wrote:
> On Thu, Feb 17, 2022 at 06:29:01PM +0530, Nitesh Shetty wrote:
>>  Thu, Feb 17, 2022 at 01:07:00AM -0800, Luis Chamberlain wrote:
>>> The subject says limits for copy-offload...
>>>
>>> On Mon, Feb 14, 2022 at 01:29:52PM +0530, Nitesh Shetty wrote:
>>>> Add device limits as sysfs entries,
>>>>         - copy_offload (RW)
>>>>         - copy_max_bytes (RW)
>>>>         - copy_max_hw_bytes (RO)
>>>>         - copy_max_range_bytes (RW)
>>>>         - copy_max_range_hw_bytes (RO)
>>>>         - copy_max_nr_ranges (RW)
>>>>         - copy_max_nr_ranges_hw (RO)
>>>
>>> Some of these seem like generic... and also I see a few more max_hw ones
>>> not listed above...
>>>
>> queue_limits and sysfs entries are differently named.
>> All sysfs entries start with copy_* prefix. Also it makes easy to lookup
>> all copy sysfs.
>> For queue limits naming, I tried to following existing queue limit
>> convention (like discard).
> 
> My point was that your subject seems to indicate the changes are just
> for copy-offload, but you seem to be adding generic queue limits as
> well. Is that correct? If so then perhaps the subject should be changed
> or the patch split up.
> 
>>>> +static ssize_t queue_copy_offload_store(struct request_queue *q,
>>>> +				       const char *page, size_t count)
>>>> +{
>>>> +	unsigned long copy_offload;
>>>> +	ssize_t ret = queue_var_store(&copy_offload, page, count);
>>>> +
>>>> +	if (ret < 0)
>>>> +		return ret;
>>>> +
>>>> +	if (copy_offload && !q->limits.max_hw_copy_sectors)
>>>> +		return -EINVAL;
>>>
>>>
>>> If the kernel schedules, copy_offload may still be true and
>>> max_hw_copy_sectors may be set to 0. Is that an issue?
>>>
>>
>> This check ensures that, we dont enable offload if device doesnt support
>> offload. I feel it shouldn't be an issue.
> 
> My point was this:
> 
> CPU1                                       CPU2
> Time
> 1) if (copy_offload 
> 2)    ---> preemption so it schedules      
> 3)    ---> some other high priority task  Sets q->limits.max_hw_copy_sectors to 0
> 4) && !q->limits.max_hw_copy_sectors)
> 
> Can something bad happen if we allow for this?

max_hw_copy_sectors describes the device capability to offload copy. So
this is read-only and "max_hw_copy_sectors != 0" means that the device
supports copy offload (this attribute should really be named
max_hw_copy_offload_sectors).

The actual loop to issue copy offload BIOs, however, must use the soft
version of the attribute: max_copy_sectors, which defaults to
max_hw_copy_sectors if copy offload is truned on and I guess to
max_sectors for the emulation case.

Now, with this in mind, I do not see how allowing max_copy_sectors to be
0 makes sense. I fail to see why that should be allowed since:
1) If copy_offload is true, we will rely on the device and chunk copy
offload BIOs up to max_copy_sectors
2) If copy_offload is false (or device does not support it), emulation
will be used by issuing read/write BIOs of up to max_copy_sectors.

Thus max_copy_sectors must always be at least equal to the device
minimum IO size, that is, the logical block size.
Nitesh Shetty Feb. 24, 2022, 12:02 p.m. UTC | #7
On Tue, Feb 22, 2022 at 04:55:41PM -0800, Luis Chamberlain wrote:
> On Thu, Feb 17, 2022 at 06:29:01PM +0530, Nitesh Shetty wrote:
> >  Thu, Feb 17, 2022 at 01:07:00AM -0800, Luis Chamberlain wrote:
> > > The subject says limits for copy-offload...
> > > 
> > > On Mon, Feb 14, 2022 at 01:29:52PM +0530, Nitesh Shetty wrote:
> > > > Add device limits as sysfs entries,
> > > >         - copy_offload (RW)
> > > >         - copy_max_bytes (RW)
> > > >         - copy_max_hw_bytes (RO)
> > > >         - copy_max_range_bytes (RW)
> > > >         - copy_max_range_hw_bytes (RO)
> > > >         - copy_max_nr_ranges (RW)
> > > >         - copy_max_nr_ranges_hw (RO)
> > > 
> > > Some of these seem like generic... and also I see a few more max_hw ones
> > > not listed above...
> > >
> > queue_limits and sysfs entries are differently named.
> > All sysfs entries start with copy_* prefix. Also it makes easy to lookup
> > all copy sysfs.
> > For queue limits naming, I tried to following existing queue limit
> > convention (like discard).
> 
> My point was that your subject seems to indicate the changes are just
> for copy-offload, but you seem to be adding generic queue limits as
> well. Is that correct? If so then perhaps the subject should be changed
> or the patch split up.
>
Yeah, queue limits indicates copy offload. I think will make more
readable by adding copy_offload_* prefix.

> > > > +static ssize_t queue_copy_offload_store(struct request_queue *q,
> > > > +				       const char *page, size_t count)
> > > > +{
> > > > +	unsigned long copy_offload;
> > > > +	ssize_t ret = queue_var_store(&copy_offload, page, count);
> > > > +
> > > > +	if (ret < 0)
> > > > +		return ret;
> > > > +
> > > > +	if (copy_offload && !q->limits.max_hw_copy_sectors)
> > > > +		return -EINVAL;
> > > 
> > > 
> > > If the kernel schedules, copy_offload may still be true and
> > > max_hw_copy_sectors may be set to 0. Is that an issue?
> > >
> > 
> > This check ensures that, we dont enable offload if device doesnt support
> > offload. I feel it shouldn't be an issue.
> 
> My point was this:
> 
> CPU1                                       CPU2
> Time
> 1) if (copy_offload 
> 2)    ---> preemption so it schedules      
> 3)    ---> some other high priority task  Sets q->limits.max_hw_copy_sectors to 0
> 4) && !q->limits.max_hw_copy_sectors)
> 
> Can something bad happen if we allow for this?
> 
> 

max_hw_copy_sectors is read only for user. And inside kernel, this is set
only by driver at initialization.
Nitesh Shetty Feb. 24, 2022, 12:12 p.m. UTC | #8
On Wed, Feb 23, 2022 at 10:29:18AM +0900, Damien Le Moal wrote:
> On 2/23/22 09:55, Luis Chamberlain wrote:
> > On Thu, Feb 17, 2022 at 06:29:01PM +0530, Nitesh Shetty wrote:
> >>  Thu, Feb 17, 2022 at 01:07:00AM -0800, Luis Chamberlain wrote:
> >>> The subject says limits for copy-offload...
> >>>
> >>> On Mon, Feb 14, 2022 at 01:29:52PM +0530, Nitesh Shetty wrote:
> >>>> Add device limits as sysfs entries,
> >>>>         - copy_offload (RW)
> >>>>         - copy_max_bytes (RW)
> >>>>         - copy_max_hw_bytes (RO)
> >>>>         - copy_max_range_bytes (RW)
> >>>>         - copy_max_range_hw_bytes (RO)
> >>>>         - copy_max_nr_ranges (RW)
> >>>>         - copy_max_nr_ranges_hw (RO)
> >>>
> >>> Some of these seem like generic... and also I see a few more max_hw ones
> >>> not listed above...
> >>>
> >> queue_limits and sysfs entries are differently named.
> >> All sysfs entries start with copy_* prefix. Also it makes easy to lookup
> >> all copy sysfs.
> >> For queue limits naming, I tried to following existing queue limit
> >> convention (like discard).
> > 
> > My point was that your subject seems to indicate the changes are just
> > for copy-offload, but you seem to be adding generic queue limits as
> > well. Is that correct? If so then perhaps the subject should be changed
> > or the patch split up.
> > 
> >>>> +static ssize_t queue_copy_offload_store(struct request_queue *q,
> >>>> +				       const char *page, size_t count)
> >>>> +{
> >>>> +	unsigned long copy_offload;
> >>>> +	ssize_t ret = queue_var_store(&copy_offload, page, count);
> >>>> +
> >>>> +	if (ret < 0)
> >>>> +		return ret;
> >>>> +
> >>>> +	if (copy_offload && !q->limits.max_hw_copy_sectors)
> >>>> +		return -EINVAL;
> >>>
> >>>
> >>> If the kernel schedules, copy_offload may still be true and
> >>> max_hw_copy_sectors may be set to 0. Is that an issue?
> >>>
> >>
> >> This check ensures that, we dont enable offload if device doesnt support
> >> offload. I feel it shouldn't be an issue.
> > 
> > My point was this:
> > 
> > CPU1                                       CPU2
> > Time
> > 1) if (copy_offload 
> > 2)    ---> preemption so it schedules      
> > 3)    ---> some other high priority task  Sets q->limits.max_hw_copy_sectors to 0
> > 4) && !q->limits.max_hw_copy_sectors)
> > 
> > Can something bad happen if we allow for this?
> 
> max_hw_copy_sectors describes the device capability to offload copy. So
> this is read-only and "max_hw_copy_sectors != 0" means that the device
> supports copy offload (this attribute should really be named
> max_hw_copy_offload_sectors).
>
Yes, it does make sense to change prefix to copy_offload_*, but downside
being sysfs attributes becomes too long.

> The actual loop to issue copy offload BIOs, however, must use the soft
> version of the attribute: max_copy_sectors, which defaults to
> max_hw_copy_sectors if copy offload is truned on and I guess to
> max_sectors for the emulation case.
> 
> Now, with this in mind, I do not see how allowing max_copy_sectors to be
> 0 makes sense. I fail to see why that should be allowed since:
> 1) If copy_offload is true, we will rely on the device and chunk copy
> offload BIOs up to max_copy_sectors
> 2) If copy_offload is false (or device does not support it), emulation
> will be used by issuing read/write BIOs of up to max_copy_sectors.
> 
> Thus max_copy_sectors must always be at least equal to the device
> minimum IO size, that is, the logical block size.

Agreed, if device doesn't suppport offload, soft limit should be based on
limits of READ/WRITE IOs.

--
Nitesh Shetty
diff mbox series

Patch

diff --git a/block/blk-settings.c b/block/blk-settings.c
index b880c70e22e4..4baccc93a294 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -57,6 +57,12 @@  void blk_set_default_limits(struct queue_limits *lim)
 	lim->misaligned = 0;
 	lim->zoned = BLK_ZONED_NONE;
 	lim->zone_write_granularity = 0;
+	lim->max_hw_copy_sectors = 0;
+	lim->max_copy_sectors = 0;
+	lim->max_hw_copy_nr_ranges = 0;
+	lim->max_copy_nr_ranges = 0;
+	lim->max_hw_copy_range_sectors = 0;
+	lim->max_copy_range_sectors = 0;
 }
 EXPORT_SYMBOL(blk_set_default_limits);
 
@@ -82,6 +88,12 @@  void blk_set_stacking_limits(struct queue_limits *lim)
 	lim->max_write_same_sectors = UINT_MAX;
 	lim->max_write_zeroes_sectors = UINT_MAX;
 	lim->max_zone_append_sectors = UINT_MAX;
+	lim->max_hw_copy_sectors = ULONG_MAX;
+	lim->max_copy_sectors = ULONG_MAX;
+	lim->max_hw_copy_range_sectors = UINT_MAX;
+	lim->max_copy_range_sectors = UINT_MAX;
+	lim->max_hw_copy_nr_ranges = USHRT_MAX;
+	lim->max_copy_nr_ranges = USHRT_MAX;
 }
 EXPORT_SYMBOL(blk_set_stacking_limits);
 
@@ -178,6 +190,45 @@  void blk_queue_max_discard_sectors(struct request_queue *q,
 }
 EXPORT_SYMBOL(blk_queue_max_discard_sectors);
 
+/**
+ * blk_queue_max_copy_sectors - set max sectors for a single copy payload
+ * @q:  the request queue for the device
+ * @max_copy_sectors: maximum number of sectors to copy
+ **/
+void blk_queue_max_copy_sectors(struct request_queue *q,
+		unsigned int max_copy_sectors)
+{
+	q->limits.max_hw_copy_sectors = max_copy_sectors;
+	q->limits.max_copy_sectors = max_copy_sectors;
+}
+EXPORT_SYMBOL(blk_queue_max_copy_sectors);
+
+/**
+ * blk_queue_max_copy_range_sectors - set max sectors for a single range, in a copy payload
+ * @q:  the request queue for the device
+ * @max_copy_range_sectors: maximum number of sectors to copy in a single range
+ **/
+void blk_queue_max_copy_range_sectors(struct request_queue *q,
+		unsigned int max_copy_range_sectors)
+{
+	q->limits.max_hw_copy_range_sectors = max_copy_range_sectors;
+	q->limits.max_copy_range_sectors = max_copy_range_sectors;
+}
+EXPORT_SYMBOL(blk_queue_max_copy_range_sectors);
+
+/**
+ * blk_queue_max_copy_nr_ranges - set max number of ranges, in a copy payload
+ * @q:  the request queue for the device
+ * @max_copy_nr_ranges: maximum number of ranges
+ **/
+void blk_queue_max_copy_nr_ranges(struct request_queue *q,
+		unsigned int max_copy_nr_ranges)
+{
+	q->limits.max_hw_copy_nr_ranges = max_copy_nr_ranges;
+	q->limits.max_copy_nr_ranges = max_copy_nr_ranges;
+}
+EXPORT_SYMBOL(blk_queue_max_copy_nr_ranges);
+
 /**
  * blk_queue_max_write_same_sectors - set max sectors for a single write same
  * @q:  the request queue for the device
@@ -541,6 +592,14 @@  int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 	t->max_segment_size = min_not_zero(t->max_segment_size,
 					   b->max_segment_size);
 
+	t->max_copy_sectors = min(t->max_copy_sectors, b->max_copy_sectors);
+	t->max_hw_copy_sectors = min(t->max_hw_copy_sectors, b->max_hw_copy_sectors);
+	t->max_copy_range_sectors = min(t->max_copy_range_sectors, b->max_copy_range_sectors);
+	t->max_hw_copy_range_sectors = min(t->max_hw_copy_range_sectors,
+						b->max_hw_copy_range_sectors);
+	t->max_copy_nr_ranges = min(t->max_copy_nr_ranges, b->max_copy_nr_ranges);
+	t->max_hw_copy_nr_ranges = min(t->max_hw_copy_nr_ranges, b->max_hw_copy_nr_ranges);
+
 	t->misaligned |= b->misaligned;
 
 	alignment = queue_limit_alignment_offset(b, start);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 9f32882ceb2f..9ddd07f142d9 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -212,6 +212,129 @@  static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *pag
 	return queue_var_show(0, page);
 }
 
+static ssize_t queue_copy_offload_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(blk_queue_copy(q), page);
+}
+
+static ssize_t queue_copy_offload_store(struct request_queue *q,
+				       const char *page, size_t count)
+{
+	unsigned long copy_offload;
+	ssize_t ret = queue_var_store(&copy_offload, page, count);
+
+	if (ret < 0)
+		return ret;
+
+	if (copy_offload && !q->limits.max_hw_copy_sectors)
+		return -EINVAL;
+
+	if (copy_offload)
+		blk_queue_flag_set(QUEUE_FLAG_COPY, q);
+	else
+		blk_queue_flag_clear(QUEUE_FLAG_COPY, q);
+
+	return ret;
+}
+
+static ssize_t queue_copy_max_hw_show(struct request_queue *q, char *page)
+{
+	return sprintf(page, "%llu\n",
+		(unsigned long long)q->limits.max_hw_copy_sectors << 9);
+}
+
+static ssize_t queue_copy_max_show(struct request_queue *q, char *page)
+{
+	return sprintf(page, "%llu\n",
+		(unsigned long long)q->limits.max_copy_sectors << 9);
+}
+
+static ssize_t queue_copy_max_store(struct request_queue *q,
+				       const char *page, size_t count)
+{
+	unsigned long max_copy;
+	ssize_t ret = queue_var_store(&max_copy, page, count);
+
+	if (ret < 0)
+		return ret;
+
+	if (max_copy & (queue_logical_block_size(q) - 1))
+		return -EINVAL;
+
+	max_copy >>= 9;
+	if (max_copy > q->limits.max_hw_copy_sectors)
+		max_copy = q->limits.max_hw_copy_sectors;
+
+	q->limits.max_copy_sectors = max_copy;
+	return ret;
+}
+
+static ssize_t queue_copy_range_max_hw_show(struct request_queue *q, char *page)
+{
+	return sprintf(page, "%llu\n",
+		(unsigned long long)q->limits.max_hw_copy_range_sectors << 9);
+}
+
+static ssize_t queue_copy_range_max_show(struct request_queue *q,
+		char *page)
+{
+	return sprintf(page, "%llu\n",
+		(unsigned long long)q->limits.max_copy_range_sectors << 9);
+}
+
+static ssize_t queue_copy_range_max_store(struct request_queue *q,
+				       const char *page, size_t count)
+{
+	unsigned long max_copy;
+	ssize_t ret = queue_var_store(&max_copy, page, count);
+
+	if (ret < 0)
+		return ret;
+
+	if (max_copy & (queue_logical_block_size(q) - 1))
+		return -EINVAL;
+
+	max_copy >>= 9;
+	if (max_copy > UINT_MAX)
+		return -EINVAL;
+
+	if (max_copy > q->limits.max_hw_copy_range_sectors)
+		max_copy = q->limits.max_hw_copy_range_sectors;
+
+	q->limits.max_copy_range_sectors = max_copy;
+	return ret;
+}
+
+static ssize_t queue_copy_nr_ranges_max_hw_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(q->limits.max_hw_copy_nr_ranges, page);
+}
+
+static ssize_t queue_copy_nr_ranges_max_show(struct request_queue *q,
+		char *page)
+{
+	return queue_var_show(q->limits.max_copy_nr_ranges, page);
+}
+
+static ssize_t queue_copy_nr_ranges_max_store(struct request_queue *q,
+				       const char *page, size_t count)
+{
+	unsigned long max_nr;
+	ssize_t ret = queue_var_store(&max_nr, page, count);
+
+	if (ret < 0)
+		return ret;
+
+	if (max_nr > USHRT_MAX)
+		return -EINVAL;
+
+	if (max_nr > q->limits.max_hw_copy_nr_ranges)
+		max_nr = q->limits.max_hw_copy_nr_ranges;
+
+	q->limits.max_copy_nr_ranges = max_nr;
+	return ret;
+}
+
 static ssize_t queue_write_same_max_show(struct request_queue *q, char *page)
 {
 	return sprintf(page, "%llu\n",
@@ -597,6 +720,14 @@  QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones");
 QUEUE_RO_ENTRY(queue_max_open_zones, "max_open_zones");
 QUEUE_RO_ENTRY(queue_max_active_zones, "max_active_zones");
 
+QUEUE_RW_ENTRY(queue_copy_offload, "copy_offload");
+QUEUE_RO_ENTRY(queue_copy_max_hw, "copy_max_hw_bytes");
+QUEUE_RW_ENTRY(queue_copy_max, "copy_max_bytes");
+QUEUE_RO_ENTRY(queue_copy_range_max_hw, "copy_max_range_hw_bytes");
+QUEUE_RW_ENTRY(queue_copy_range_max, "copy_max_range_bytes");
+QUEUE_RO_ENTRY(queue_copy_nr_ranges_max_hw, "copy_max_nr_ranges_hw");
+QUEUE_RW_ENTRY(queue_copy_nr_ranges_max, "copy_max_nr_ranges");
+
 QUEUE_RW_ENTRY(queue_nomerges, "nomerges");
 QUEUE_RW_ENTRY(queue_rq_affinity, "rq_affinity");
 QUEUE_RW_ENTRY(queue_poll, "io_poll");
@@ -643,6 +774,13 @@  static struct attribute *queue_attrs[] = {
 	&queue_discard_max_entry.attr,
 	&queue_discard_max_hw_entry.attr,
 	&queue_discard_zeroes_data_entry.attr,
+	&queue_copy_offload_entry.attr,
+	&queue_copy_max_hw_entry.attr,
+	&queue_copy_max_entry.attr,
+	&queue_copy_range_max_hw_entry.attr,
+	&queue_copy_range_max_entry.attr,
+	&queue_copy_nr_ranges_max_hw_entry.attr,
+	&queue_copy_nr_ranges_max_entry.attr,
 	&queue_write_same_max_entry.attr,
 	&queue_write_zeroes_max_entry.attr,
 	&queue_zone_append_max_entry.attr,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index efed3820cbf7..792e6d556589 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -254,6 +254,13 @@  struct queue_limits {
 	unsigned int		discard_alignment;
 	unsigned int		zone_write_granularity;
 
+	unsigned long		max_hw_copy_sectors;
+	unsigned long		max_copy_sectors;
+	unsigned int		max_hw_copy_range_sectors;
+	unsigned int		max_copy_range_sectors;
+	unsigned short		max_hw_copy_nr_ranges;
+	unsigned short		max_copy_nr_ranges;
+
 	unsigned short		max_segments;
 	unsigned short		max_integrity_segments;
 	unsigned short		max_discard_segments;
@@ -562,6 +569,7 @@  struct request_queue {
 #define QUEUE_FLAG_RQ_ALLOC_TIME 27	/* record rq->alloc_time_ns */
 #define QUEUE_FLAG_HCTX_ACTIVE	28	/* at least one blk-mq hctx is active */
 #define QUEUE_FLAG_NOWAIT       29	/* device supports NOWAIT */
+#define QUEUE_FLAG_COPY		30	/* supports copy offload */
 
 #define QUEUE_FLAG_MQ_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_SAME_COMP) |		\
@@ -585,6 +593,7 @@  bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
 #define blk_queue_io_stat(q)	test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
 #define blk_queue_add_random(q)	test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags)
 #define blk_queue_discard(q)	test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags)
+#define blk_queue_copy(q)	test_bit(QUEUE_FLAG_COPY, &(q)->queue_flags)
 #define blk_queue_zone_resetall(q)	\
 	test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags)
 #define blk_queue_secure_erase(q) \
@@ -958,6 +967,10 @@  extern void blk_queue_max_discard_segments(struct request_queue *,
 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
 extern void blk_queue_max_discard_sectors(struct request_queue *q,
 		unsigned int max_discard_sectors);
+extern void blk_queue_max_copy_sectors(struct request_queue *q, unsigned int max_copy_sectors);
+extern void blk_queue_max_copy_range_sectors(struct request_queue *q,
+		unsigned int max_copy_range_sectors);
+extern void blk_queue_max_copy_nr_ranges(struct request_queue *q, unsigned int max_copy_nr_ranges);
 extern void blk_queue_max_write_same_sectors(struct request_queue *q,
 		unsigned int max_write_same_sectors);
 extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q,