diff mbox series

[v20,01/12] block: Introduce queue limits and sysfs for copy-offload support

Message ID 20240520102033.9361-2-nj.shetty@samsung.com (mailing list archive)
State New
Headers show
Series [v20,01/12] block: Introduce queue limits and sysfs for copy-offload support | expand

Commit Message

Nitesh Shetty May 20, 2024, 10:20 a.m. UTC
Add device limits as sysfs entries,
	- copy_max_bytes (RW)
	- copy_max_hw_bytes (RO)

Above limits help to split the copy payload in block layer.
copy_max_bytes: maximum total length of copy in single payload.
copy_max_hw_bytes: Reflects the device supported maximum limit.

Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
---
 Documentation/ABI/stable/sysfs-block | 23 +++++++++++++++
 block/blk-settings.c                 | 34 ++++++++++++++++++++--
 block/blk-sysfs.c                    | 43 ++++++++++++++++++++++++++++
 include/linux/blkdev.h               | 14 +++++++++
 4 files changed, 112 insertions(+), 2 deletions(-)

Comments

Damien Le Moal May 20, 2024, 2:33 p.m. UTC | #1
On 2024/05/20 12:20, Nitesh Shetty wrote:
> Add device limits as sysfs entries,
> 	- copy_max_bytes (RW)
> 	- copy_max_hw_bytes (RO)
> 
> Above limits help to split the copy payload in block layer.
> copy_max_bytes: maximum total length of copy in single payload.
> copy_max_hw_bytes: Reflects the device supported maximum limit.
> 
> Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
> Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
> ---
>  Documentation/ABI/stable/sysfs-block | 23 +++++++++++++++
>  block/blk-settings.c                 | 34 ++++++++++++++++++++--
>  block/blk-sysfs.c                    | 43 ++++++++++++++++++++++++++++
>  include/linux/blkdev.h               | 14 +++++++++
>  4 files changed, 112 insertions(+), 2 deletions(-)
> 
> diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block
> index 831f19a32e08..52d8a253bf8e 100644
> --- a/Documentation/ABI/stable/sysfs-block
> +++ b/Documentation/ABI/stable/sysfs-block
> @@ -165,6 +165,29 @@ Description:
>  		last zone of the device which may be smaller.
>  
>  
> +What:		/sys/block/<disk>/queue/copy_max_bytes
> +Date:		May 2024
> +Contact:	linux-block@vger.kernel.org
> +Description:
> +		[RW] This is the maximum number of bytes that the block layer
> +		will allow for a copy request. This is always smaller or
> +		equal to the maximum size allowed by the hardware, indicated by
> +		'copy_max_hw_bytes'. An attempt to set a value higher than
> +		'copy_max_hw_bytes' will truncate this to 'copy_max_hw_bytes'.
> +		Writing '0' to this file will disable offloading copies for this
> +		device, instead copy is done via emulation.
> +
> +
> +What:		/sys/block/<disk>/queue/copy_max_hw_bytes
> +Date:		May 2024
> +Contact:	linux-block@vger.kernel.org
> +Description:
> +		[RO] This is the maximum number of bytes that the hardware
> +		will allow for single data copy request.
> +		A value of 0 means that the device does not support
> +		copy offload.
> +
> +
>  What:		/sys/block/<disk>/queue/crypto/
>  Date:		February 2022
>  Contact:	linux-block@vger.kernel.org
> diff --git a/block/blk-settings.c b/block/blk-settings.c
> index a7fe8e90240a..67010ed82422 100644
> --- a/block/blk-settings.c
> +++ b/block/blk-settings.c
> @@ -52,6 +52,9 @@ void blk_set_stacking_limits(struct queue_limits *lim)
>  	lim->max_write_zeroes_sectors = UINT_MAX;
>  	lim->max_zone_append_sectors = UINT_MAX;
>  	lim->max_user_discard_sectors = UINT_MAX;
> +	lim->max_copy_hw_sectors = UINT_MAX;
> +	lim->max_copy_sectors = UINT_MAX;
> +	lim->max_user_copy_sectors = UINT_MAX;
>  }
>  EXPORT_SYMBOL(blk_set_stacking_limits);
>  
> @@ -219,6 +222,9 @@ static int blk_validate_limits(struct queue_limits *lim)
>  		lim->misaligned = 0;
>  	}
>  
> +	lim->max_copy_sectors =
> +		min(lim->max_copy_hw_sectors, lim->max_user_copy_sectors);
> +
>  	return blk_validate_zoned_limits(lim);
>  }
>  
> @@ -231,10 +237,11 @@ int blk_set_default_limits(struct queue_limits *lim)
>  {
>  	/*
>  	 * Most defaults are set by capping the bounds in blk_validate_limits,
> -	 * but max_user_discard_sectors is special and needs an explicit
> -	 * initialization to the max value here.
> +	 * but max_user_discard_sectors and max_user_copy_sectors are special
> +	 * and needs an explicit initialization to the max value here.

s/needs/need

>  	 */
>  	lim->max_user_discard_sectors = UINT_MAX;
> +	lim->max_user_copy_sectors = UINT_MAX;
>  	return blk_validate_limits(lim);
>  }
>  
> @@ -316,6 +323,25 @@ void blk_queue_max_discard_sectors(struct request_queue *q,
>  }
>  EXPORT_SYMBOL(blk_queue_max_discard_sectors);
>  
> +/*
> + * blk_queue_max_copy_hw_sectors - set max sectors for a single copy payload
> + * @q:	the request queue for the device
> + * @max_copy_sectors: maximum number of sectors to copy
> + */
> +void blk_queue_max_copy_hw_sectors(struct request_queue *q,
> +				   unsigned int max_copy_sectors)
> +{
> +	struct queue_limits *lim = &q->limits;
> +
> +	if (max_copy_sectors > (BLK_COPY_MAX_BYTES >> SECTOR_SHIFT))
> +		max_copy_sectors = BLK_COPY_MAX_BYTES >> SECTOR_SHIFT;
> +
> +	lim->max_copy_hw_sectors = max_copy_sectors;
> +	lim->max_copy_sectors =
> +		min(max_copy_sectors, lim->max_user_copy_sectors);
> +}
> +EXPORT_SYMBOL_GPL(blk_queue_max_copy_hw_sectors);

Hmm... Such helper seems to not fit with Christoph's changes of the limits
initialization as that is not necessarily done using &q->limits but depending on
the driver, a different limit structure. So shouldn't this function be passed a
queue_limits struct pointer instead of the request queue pointer ?

> +
>  /**
>   * blk_queue_max_secure_erase_sectors - set max sectors for a secure erase
>   * @q:  the request queue for the device
> @@ -633,6 +659,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
>  	t->max_segment_size = min_not_zero(t->max_segment_size,
>  					   b->max_segment_size);
>  
> +	t->max_copy_sectors = min(t->max_copy_sectors, b->max_copy_sectors);
> +	t->max_copy_hw_sectors = min(t->max_copy_hw_sectors,
> +				     b->max_copy_hw_sectors);
> +
>  	t->misaligned |= b->misaligned;
>  
>  	alignment = queue_limit_alignment_offset(b, start);
> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index f0f9314ab65c..805c2b6b0393 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -205,6 +205,44 @@ static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *pag
>  	return queue_var_show(0, page);
>  }
>  
> +static ssize_t queue_copy_hw_max_show(struct request_queue *q, char *page)
> +{
> +	return sprintf(page, "%llu\n", (unsigned long long)
> +		       q->limits.max_copy_hw_sectors << SECTOR_SHIFT);
> +}
> +
> +static ssize_t queue_copy_max_show(struct request_queue *q, char *page)
> +{
> +	return sprintf(page, "%llu\n", (unsigned long long)
> +		       q->limits.max_copy_sectors << SECTOR_SHIFT);
> +}

Given that you repeat the same pattern twice, may be add a queue_var64_show()
helper ? (naming can be changed).

> +
> +static ssize_t queue_copy_max_store(struct request_queue *q, const char *page,
> +				    size_t count)
> +{
> +	unsigned long max_copy_bytes;
> +	struct queue_limits lim;
> +	ssize_t ret;
> +	int err;
> +
> +	ret = queue_var_store(&max_copy_bytes, page, count);
> +	if (ret < 0)
> +		return ret;
> +
> +	if (max_copy_bytes & (queue_logical_block_size(q) - 1))
> +		return -EINVAL;
> +
> +	blk_mq_freeze_queue(q);
> +	lim = queue_limits_start_update(q);
> +	lim.max_user_copy_sectors = max_copy_bytes >> SECTOR_SHIFT;

max_copy_bytes is an unsigned long, so 64 bits on 64-bit arch and
max_user_copy_sectors is an unsigned int, so 32-bits. There are thus no
guarantees that this will not overflow. A check is needed.

> +	err = queue_limits_commit_update(q, &lim);
> +	blk_mq_unfreeze_queue(q);
> +
> +	if (err)

You can reuse ret here. No need for adding the err variable.

> +		return err;
> +	return count;
> +}
> +
>  static ssize_t queue_write_same_max_show(struct request_queue *q, char *page)
>  {
>  	return queue_var_show(0, page);
> @@ -505,6 +543,9 @@ QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones");
>  QUEUE_RO_ENTRY(queue_max_open_zones, "max_open_zones");
>  QUEUE_RO_ENTRY(queue_max_active_zones, "max_active_zones");
>  
> +QUEUE_RO_ENTRY(queue_copy_hw_max, "copy_max_hw_bytes");
> +QUEUE_RW_ENTRY(queue_copy_max, "copy_max_bytes");
> +
>  QUEUE_RW_ENTRY(queue_nomerges, "nomerges");
>  QUEUE_RW_ENTRY(queue_rq_affinity, "rq_affinity");
>  QUEUE_RW_ENTRY(queue_poll, "io_poll");
> @@ -618,6 +659,8 @@ static struct attribute *queue_attrs[] = {
>  	&queue_discard_max_entry.attr,
>  	&queue_discard_max_hw_entry.attr,
>  	&queue_discard_zeroes_data_entry.attr,
> +	&queue_copy_hw_max_entry.attr,
> +	&queue_copy_max_entry.attr,
>  	&queue_write_same_max_entry.attr,
>  	&queue_write_zeroes_max_entry.attr,
>  	&queue_zone_append_max_entry.attr,
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index aefdda9f4ec7..109d9f905c3c 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -309,6 +309,10 @@ struct queue_limits {
>  	unsigned int		discard_alignment;
>  	unsigned int		zone_write_granularity;
>  
> +	unsigned int		max_copy_hw_sectors;
> +	unsigned int		max_copy_sectors;
> +	unsigned int		max_user_copy_sectors;
> +
>  	unsigned short		max_segments;
>  	unsigned short		max_integrity_segments;
>  	unsigned short		max_discard_segments;
> @@ -933,6 +937,8 @@ void blk_queue_max_secure_erase_sectors(struct request_queue *q,
>  		unsigned int max_sectors);
>  extern void blk_queue_max_discard_sectors(struct request_queue *q,
>  		unsigned int max_discard_sectors);
> +extern void blk_queue_max_copy_hw_sectors(struct request_queue *q,
> +					  unsigned int max_copy_sectors);
>  extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q,
>  		unsigned int max_write_same_sectors);
>  extern void blk_queue_logical_block_size(struct request_queue *, unsigned int);
> @@ -1271,6 +1277,14 @@ static inline unsigned int bdev_discard_granularity(struct block_device *bdev)
>  	return bdev_get_queue(bdev)->limits.discard_granularity;
>  }
>  
> +/* maximum copy offload length, this is set to 128MB based on current testing */

Current testing will not be current in a while... So may be simply say
"arbitrary" or something. Also please capitalize the first letter of the
comment. So something like:

/* Arbitrary absolute limit of 128 MB for copy offload. */

> +#define BLK_COPY_MAX_BYTES		(1 << 27)

Also, it is not clear from the name if this is a soft limit or a cap on the
hardware limit... So at least please adjust the comment to say which one it is.

> +
> +static inline unsigned int bdev_max_copy_sectors(struct block_device *bdev)
> +{
> +	return bdev_get_queue(bdev)->limits.max_copy_sectors;
> +}
> +
>  static inline unsigned int
>  bdev_max_secure_erase_sectors(struct block_device *bdev)
>  {
Bart Van Assche May 20, 2024, 10:42 p.m. UTC | #2
On 5/20/24 03:20, Nitesh Shetty wrote:
> +static ssize_t queue_copy_max_show(struct request_queue *q, char *page)
> +{
> +	return sprintf(page, "%llu\n", (unsigned long long)
> +		       q->limits.max_copy_sectors << SECTOR_SHIFT);
> +}
> +
> +static ssize_t queue_copy_max_store(struct request_queue *q, const char *page,
> +				    size_t count)
> +{
> +	unsigned long max_copy_bytes;
> +	struct queue_limits lim;
> +	ssize_t ret;
> +	int err;
> +
> +	ret = queue_var_store(&max_copy_bytes, page, count);
> +	if (ret < 0)
> +		return ret;
> +
> +	if (max_copy_bytes & (queue_logical_block_size(q) - 1))
> +		return -EINVAL;

Wouldn't it be more user-friendly if this check would be left out? Does any code
depend on max_copy_bytes being a multiple of the logical block size?

> +	blk_mq_freeze_queue(q);
> +	lim = queue_limits_start_update(q);
> +	lim.max_user_copy_sectors = max_copy_bytes >> SECTOR_SHIFT;
> +	err = queue_limits_commit_update(q, &lim);
> +	blk_mq_unfreeze_queue(q);
> +
> +	if (err)
> +		return err;
> +	return count;
> +}

queue_copy_max_show() shows max_copy_sectors while queue_copy_max_store()
modifies max_user_copy_sectors. Is that perhaps a bug?

> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index aefdda9f4ec7..109d9f905c3c 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -309,6 +309,10 @@ struct queue_limits {
>   	unsigned int		discard_alignment;
>   	unsigned int		zone_write_granularity;
>   
> +	unsigned int		max_copy_hw_sectors;
> +	unsigned int		max_copy_sectors;
> +	unsigned int		max_user_copy_sectors;

Two new limits are documented in Documentation/ABI/stable/sysfs-block while three
new parameters are added in struct queue_limits. Why three new limits instead of
two? Please add a comment above the new parameters that explains the role of the
new parameters.

> +/* maximum copy offload length, this is set to 128MB based on current testing */
> +#define BLK_COPY_MAX_BYTES		(1 << 27)

"current testing" sounds vague. Why is this limit required? Why to cap what the
driver reports instead of using the value reported by the driver without modifying it?

Additionally, since this constant is only used in source code that occurs in the
block/ directory, please move the definition of this constant into a source or header
file in the block/ directory.

Thanks,

Bart.
Hannes Reinecke May 21, 2024, 6:57 a.m. UTC | #3
On 5/20/24 12:20, Nitesh Shetty wrote:
> Add device limits as sysfs entries,
> 	- copy_max_bytes (RW)
> 	- copy_max_hw_bytes (RO)
> 
> Above limits help to split the copy payload in block layer.
> copy_max_bytes: maximum total length of copy in single payload.
> copy_max_hw_bytes: Reflects the device supported maximum limit.
> 
> Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
> Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
> ---
>   Documentation/ABI/stable/sysfs-block | 23 +++++++++++++++
>   block/blk-settings.c                 | 34 ++++++++++++++++++++--
>   block/blk-sysfs.c                    | 43 ++++++++++++++++++++++++++++
>   include/linux/blkdev.h               | 14 +++++++++
>   4 files changed, 112 insertions(+), 2 deletions(-)
> 
Reviewed-by: Hannes Reinecke <hare@suse.de>

Cheers,

Hannes
Nitesh Shetty May 21, 2024, 8:15 a.m. UTC | #4
On 20/05/24 04:33PM, Damien Le Moal wrote:
>On 2024/05/20 12:20, Nitesh Shetty wrote:
>> @@ -231,10 +237,11 @@ int blk_set_default_limits(struct queue_limits *lim)
>>  {
>>  	/*
>>  	 * Most defaults are set by capping the bounds in blk_validate_limits,
>> -	 * but max_user_discard_sectors is special and needs an explicit
>> -	 * initialization to the max value here.
>> +	 * but max_user_discard_sectors and max_user_copy_sectors are special
>> +	 * and needs an explicit initialization to the max value here.
>
>s/needs/need

acked
>
>>  	 */
>>  	lim->max_user_discard_sectors = UINT_MAX;
>> +	lim->max_user_copy_sectors = UINT_MAX;
>>  	return blk_validate_limits(lim);
>>  }
>>
>> @@ -316,6 +323,25 @@ void blk_queue_max_discard_sectors(struct request_queue *q,
>>  }
>>  EXPORT_SYMBOL(blk_queue_max_discard_sectors);
>>
>> +/*
>> + * blk_queue_max_copy_hw_sectors - set max sectors for a single copy payload
>> + * @q:	the request queue for the device
>> + * @max_copy_sectors: maximum number of sectors to copy
>> + */
>> +void blk_queue_max_copy_hw_sectors(struct request_queue *q,
>> +				   unsigned int max_copy_sectors)
>> +{
>> +	struct queue_limits *lim = &q->limits;
>> +
>> +	if (max_copy_sectors > (BLK_COPY_MAX_BYTES >> SECTOR_SHIFT))
>> +		max_copy_sectors = BLK_COPY_MAX_BYTES >> SECTOR_SHIFT;
>> +
>> +	lim->max_copy_hw_sectors = max_copy_sectors;
>> +	lim->max_copy_sectors =
>> +		min(max_copy_sectors, lim->max_user_copy_sectors);
>> +}
>> +EXPORT_SYMBOL_GPL(blk_queue_max_copy_hw_sectors);
>
>Hmm... Such helper seems to not fit with Christoph's changes of the limits
>initialization as that is not necessarily done using &q->limits but depending on
>the driver, a different limit structure. So shouldn't this function be passed a
>queue_limits struct pointer instead of the request queue pointer ?
>
Acked, we made a mistake, we are no longer using this function after moving
to atomic limits change. We will remove this function in next version.

>> +
>>  /**
>>   * blk_queue_max_secure_erase_sectors - set max sectors for a secure erase
>>   * @q:  the request queue for the device
>> @@ -633,6 +659,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
>>  	t->max_segment_size = min_not_zero(t->max_segment_size,
>>  					   b->max_segment_size);
>>
>> +	t->max_copy_sectors = min(t->max_copy_sectors, b->max_copy_sectors);
>> +	t->max_copy_hw_sectors = min(t->max_copy_hw_sectors,
>> +				     b->max_copy_hw_sectors);
>> +
>>  	t->misaligned |= b->misaligned;
>>
>>  	alignment = queue_limit_alignment_offset(b, start);
>> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
>> index f0f9314ab65c..805c2b6b0393 100644
>> --- a/block/blk-sysfs.c
>> +++ b/block/blk-sysfs.c
>> @@ -205,6 +205,44 @@ static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *pag
>>  	return queue_var_show(0, page);
>>  }
>>
>> +static ssize_t queue_copy_hw_max_show(struct request_queue *q, char *page)
>> +{
>> +	return sprintf(page, "%llu\n", (unsigned long long)
>> +		       q->limits.max_copy_hw_sectors << SECTOR_SHIFT);
>> +}
>> +
>> +static ssize_t queue_copy_max_show(struct request_queue *q, char *page)
>> +{
>> +	return sprintf(page, "%llu\n", (unsigned long long)
>> +		       q->limits.max_copy_sectors << SECTOR_SHIFT);
>> +}
>
>Given that you repeat the same pattern twice, may be add a queue_var64_show()
>helper ? (naming can be changed).
>
Acked

>> +
>> +static ssize_t queue_copy_max_store(struct request_queue *q, const char *page,
>> +				    size_t count)
>> +{
>> +	unsigned long max_copy_bytes;
>> +	struct queue_limits lim;
>> +	ssize_t ret;
>> +	int err;
>> +
>> +	ret = queue_var_store(&max_copy_bytes, page, count);
>> +	if (ret < 0)
>> +		return ret;
>> +
>> +	if (max_copy_bytes & (queue_logical_block_size(q) - 1))
>> +		return -EINVAL;
>> +
>> +	blk_mq_freeze_queue(q);
>> +	lim = queue_limits_start_update(q);
>> +	lim.max_user_copy_sectors = max_copy_bytes >> SECTOR_SHIFT;
>
>max_copy_bytes is an unsigned long, so 64 bits on 64-bit arch and
>max_user_copy_sectors is an unsigned int, so 32-bits. There are thus no
>guarantees that this will not overflow. A check is needed.
>
Acked

>> +	err = queue_limits_commit_update(q, &lim);
>> +	blk_mq_unfreeze_queue(q);
>> +
>> +	if (err)
>
>You can reuse ret here. No need for adding the err variable.
Acked

>
>> +		return err;
>> +	return count;
>> +}
>> +
>>  static ssize_t queue_write_same_max_show(struct request_queue *q, char *page)
>>  {
>>  	return queue_var_show(0, page);
>> @@ -505,6 +543,9 @@ QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones");
>>  QUEUE_RO_ENTRY(queue_max_open_zones, "max_open_zones");
>>  QUEUE_RO_ENTRY(queue_max_active_zones, "max_active_zones");
>>
>> +QUEUE_RO_ENTRY(queue_copy_hw_max, "copy_max_hw_bytes");
>> +QUEUE_RW_ENTRY(queue_copy_max, "copy_max_bytes");
>> +
>>  QUEUE_RW_ENTRY(queue_nomerges, "nomerges");
>>  QUEUE_RW_ENTRY(queue_rq_affinity, "rq_affinity");
>>  QUEUE_RW_ENTRY(queue_poll, "io_poll");
>> @@ -618,6 +659,8 @@ static struct attribute *queue_attrs[] = {
>>  	&queue_discard_max_entry.attr,
>>  	&queue_discard_max_hw_entry.attr,
>>  	&queue_discard_zeroes_data_entry.attr,
>> +	&queue_copy_hw_max_entry.attr,
>> +	&queue_copy_max_entry.attr,
>>  	&queue_write_same_max_entry.attr,
>>  	&queue_write_zeroes_max_entry.attr,
>>  	&queue_zone_append_max_entry.attr,
>> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>> index aefdda9f4ec7..109d9f905c3c 100644
>> --- a/include/linux/blkdev.h
>> +++ b/include/linux/blkdev.h
>> @@ -309,6 +309,10 @@ struct queue_limits {
>>  	unsigned int		discard_alignment;
>>  	unsigned int		zone_write_granularity;
>>
>> +	unsigned int		max_copy_hw_sectors;
>> +	unsigned int		max_copy_sectors;
>> +	unsigned int		max_user_copy_sectors;
>> +
>>  	unsigned short		max_segments;
>>  	unsigned short		max_integrity_segments;
>>  	unsigned short		max_discard_segments;
>> @@ -933,6 +937,8 @@ void blk_queue_max_secure_erase_sectors(struct request_queue *q,
>>  		unsigned int max_sectors);
>>  extern void blk_queue_max_discard_sectors(struct request_queue *q,
>>  		unsigned int max_discard_sectors);
>> +extern void blk_queue_max_copy_hw_sectors(struct request_queue *q,
>> +					  unsigned int max_copy_sectors);
>>  extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q,
>>  		unsigned int max_write_same_sectors);
>>  extern void blk_queue_logical_block_size(struct request_queue *, unsigned int);
>> @@ -1271,6 +1277,14 @@ static inline unsigned int bdev_discard_granularity(struct block_device *bdev)
>>  	return bdev_get_queue(bdev)->limits.discard_granularity;
>>  }
>>
>> +/* maximum copy offload length, this is set to 128MB based on current testing */
>
>Current testing will not be current in a while... So may be simply say
>"arbitrary" or something. Also please capitalize the first letter of the
>comment. So something like:
>
>/* Arbitrary absolute limit of 128 MB for copy offload. */
>
>> +#define BLK_COPY_MAX_BYTES		(1 << 27)
>
>Also, it is not clear from the name if this is a soft limit or a cap on the
>hardware limit... So at least please adjust the comment to say which one it is.
>
Acked, it is a soft limit.

Thank You,
Nitesh Shetty
Nitesh Shetty May 21, 2024, 2:25 p.m. UTC | #5
On 20/05/24 03:42PM, Bart Van Assche wrote:
>On 5/20/24 03:20, Nitesh Shetty wrote:
>>+static ssize_t queue_copy_max_show(struct request_queue *q, char *page)
>>+{
>>+	return sprintf(page, "%llu\n", (unsigned long long)
>>+		       q->limits.max_copy_sectors << SECTOR_SHIFT);
>>+}
>>+
>>+static ssize_t queue_copy_max_store(struct request_queue *q, const char *page,
>>+				    size_t count)
>>+{
>>+	unsigned long max_copy_bytes;
>>+	struct queue_limits lim;
>>+	ssize_t ret;
>>+	int err;
>>+
>>+	ret = queue_var_store(&max_copy_bytes, page, count);
>>+	if (ret < 0)
>>+		return ret;
>>+
>>+	if (max_copy_bytes & (queue_logical_block_size(q) - 1))
>>+		return -EINVAL;
>
>Wouldn't it be more user-friendly if this check would be left out? Does any code
>depend on max_copy_bytes being a multiple of the logical block size?
>
In block layer, we use max_copy_bytes to split larger copy into
device supported copy size.
Simple copy spec requires length to be logical block size aligned.
Hence this check.

>>+	blk_mq_freeze_queue(q);
>>+	lim = queue_limits_start_update(q);
>>+	lim.max_user_copy_sectors = max_copy_bytes >> SECTOR_SHIFT;
>>+	err = queue_limits_commit_update(q, &lim);
>>+	blk_mq_unfreeze_queue(q);
>>+
>>+	if (err)
>>+		return err;
>>+	return count;
>>+}
>
>queue_copy_max_show() shows max_copy_sectors while queue_copy_max_store()
>modifies max_user_copy_sectors. Is that perhaps a bug?
>
This follows discard implementaion[1].
max_copy_sectors gets updated in queue_limits_commits_update.

[1] https://lore.kernel.org/linux-block/20240213073425.1621680-7-hch@lst.de/

>>diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>>index aefdda9f4ec7..109d9f905c3c 100644
>>--- a/include/linux/blkdev.h
>>+++ b/include/linux/blkdev.h
>>@@ -309,6 +309,10 @@ struct queue_limits {
>>  	unsigned int		discard_alignment;
>>  	unsigned int		zone_write_granularity;
>>+	unsigned int		max_copy_hw_sectors;
>>+	unsigned int		max_copy_sectors;
>>+	unsigned int		max_user_copy_sectors;
>
>Two new limits are documented in Documentation/ABI/stable/sysfs-block while three
>new parameters are added in struct queue_limits. Why three new limits instead of
>two? Please add a comment above the new parameters that explains the role of the
>new parameters.
>
Similar to discard, only 2 limits are exposed to user.

>>+/* maximum copy offload length, this is set to 128MB based on current testing */
>>+#define BLK_COPY_MAX_BYTES		(1 << 27)
>
>"current testing" sounds vague. Why is this limit required? Why to cap what the
>driver reports instead of using the value reported by the driver without modifying it?
>
Here we are expecting BLK_COPY_MAX_BYTES >= driver supported limit.

We do support copy length larger than device supported limit.
In block later(blkdev_copy_offload), we split larger copy into device
supported limit and send down.
We added this check to make sure userspace doesn't consume all the
kernel resources[2].
We can remove/expand this arbitary limit moving forward.

[2] https://lore.kernel.org/linux-block/YRu1WFImFulfpk7s@kroah.com/

>Additionally, since this constant is only used in source code that occurs in the
>block/ directory, please move the definition of this constant into a source or header
>file in the block/ directory.
>
We are using this in null block driver as well, so we need to keep it
here.

Thank You,
Nitesh Shetty
Bart Van Assche May 22, 2024, 5:49 p.m. UTC | #6
On 5/21/24 07:25, Nitesh Shetty wrote:
> On 20/05/24 03:42PM, Bart Van Assche wrote:
>> On 5/20/24 03:20, Nitesh Shetty wrote:
>>> +    if (max_copy_bytes & (queue_logical_block_size(q) - 1))
>>> +        return -EINVAL;
>>
>> Wouldn't it be more user-friendly if this check would be left out? Does any code
>> depend on max_copy_bytes being a multiple of the logical block size?
>>
> In block layer, we use max_copy_bytes to split larger copy into
> device supported copy size.
> Simple copy spec requires length to be logical block size aligned.
> Hence this check.

Will blkdev_copy_sanity_check() reject invalid copy requests even if this
check is left out?

Thanks,

Bart.
Nitesh Shetty May 23, 2024, 7:05 a.m. UTC | #7
On 22/05/24 10:49AM, Bart Van Assche wrote:
>On 5/21/24 07:25, Nitesh Shetty wrote:
>>On 20/05/24 03:42PM, Bart Van Assche wrote:
>>>On 5/20/24 03:20, Nitesh Shetty wrote:
>>>>+    if (max_copy_bytes & (queue_logical_block_size(q) - 1))
>>>>+        return -EINVAL;
>>>
>>>Wouldn't it be more user-friendly if this check would be left out? Does any code
>>>depend on max_copy_bytes being a multiple of the logical block size?
>>>
>>In block layer, we use max_copy_bytes to split larger copy into
>>device supported copy size.
>>Simple copy spec requires length to be logical block size aligned.
>>Hence this check.
>
>Will blkdev_copy_sanity_check() reject invalid copy requests even if this
>check is left out?
>
Yes, you are right. We have checks both places.
We will remove checks in one of the places.

Thank you,
Nitesh Shetty
Christoph Hellwig June 1, 2024, 5:53 a.m. UTC | #8
On Mon, May 20, 2024 at 03:50:14PM +0530, Nitesh Shetty wrote:
> Add device limits as sysfs entries,
> 	- copy_max_bytes (RW)
> 	- copy_max_hw_bytes (RO)
> 
> Above limits help to split the copy payload in block layer.
> copy_max_bytes: maximum total length of copy in single payload.
> copy_max_hw_bytes: Reflects the device supported maximum limit.

That's a bit of a weird way to phrase the commit log as the queue_limits
are the main thing (and there are three of them as required for the
scheme to work).  The sysfs attributes really are just an artifact.

> @@ -231,10 +237,11 @@ int blk_set_default_limits(struct queue_limits *lim)
>  {
>  	/*
>  	 * Most defaults are set by capping the bounds in blk_validate_limits,
> -	 * but max_user_discard_sectors is special and needs an explicit
> -	 * initialization to the max value here.
> +	 * but max_user_discard_sectors and max_user_copy_sectors are special
> +	 * and needs an explicit initialization to the max value here.

s/needs/need/

> +/*
> + * blk_queue_max_copy_hw_sectors - set max sectors for a single copy payload
> + * @q:	the request queue for the device
> + * @max_copy_sectors: maximum number of sectors to copy
> + */
> +void blk_queue_max_copy_hw_sectors(struct request_queue *q,
> +				   unsigned int max_copy_sectors)
> +{
> +	struct queue_limits *lim = &q->limits;
> +
> +	if (max_copy_sectors > (BLK_COPY_MAX_BYTES >> SECTOR_SHIFT))
> +		max_copy_sectors = BLK_COPY_MAX_BYTES >> SECTOR_SHIFT;
> +
> +	lim->max_copy_hw_sectors = max_copy_sectors;
> +	lim->max_copy_sectors =
> +		min(max_copy_sectors, lim->max_user_copy_sectors);
> +}
> +EXPORT_SYMBOL_GPL(blk_queue_max_copy_hw_sectors);

Please don't add new blk_queue_* helpers, everything should go through
the atomic queue limits API now.  Also capping the hardware limit
here looks odd.

> +	if (max_copy_bytes & (queue_logical_block_size(q) - 1))
> +		return -EINVAL;

This should probably go into blk_validate_limits and just round down.

Also most block limits are in kb.  Not that I really know why we are
doing that, but is there a good reason to deviate from that scheme?
Nitesh Shetty June 3, 2024, 6:43 a.m. UTC | #9
On 01/06/24 07:53AM, Christoph Hellwig wrote:
>On Mon, May 20, 2024 at 03:50:14PM +0530, Nitesh Shetty wrote:
>> Add device limits as sysfs entries,
>> 	- copy_max_bytes (RW)
>> 	- copy_max_hw_bytes (RO)
>>
>> Above limits help to split the copy payload in block layer.
>> copy_max_bytes: maximum total length of copy in single payload.
>> copy_max_hw_bytes: Reflects the device supported maximum limit.
>
>That's a bit of a weird way to phrase the commit log as the queue_limits
>are the main thing (and there are three of them as required for the
>scheme to work).  The sysfs attributes really are just an artifact.
>
Acked, we will update commit log.

>> @@ -231,10 +237,11 @@ int blk_set_default_limits(struct queue_limits *lim)
>>  {
>>  	/*
>>  	 * Most defaults are set by capping the bounds in blk_validate_limits,
>> -	 * but max_user_discard_sectors is special and needs an explicit
>> -	 * initialization to the max value here.
>> +	 * but max_user_discard_sectors and max_user_copy_sectors are special
>> +	 * and needs an explicit initialization to the max value here.
>
>s/needs/need/
>
Acked.

>> +/*
>> + * blk_queue_max_copy_hw_sectors - set max sectors for a single copy payload
>> + * @q:	the request queue for the device
>> + * @max_copy_sectors: maximum number of sectors to copy
>> + */
>> +void blk_queue_max_copy_hw_sectors(struct request_queue *q,
>> +				   unsigned int max_copy_sectors)
>> +{
>> +	struct queue_limits *lim = &q->limits;
>> +
>> +	if (max_copy_sectors > (BLK_COPY_MAX_BYTES >> SECTOR_SHIFT))
>> +		max_copy_sectors = BLK_COPY_MAX_BYTES >> SECTOR_SHIFT;
>> +
>> +	lim->max_copy_hw_sectors = max_copy_sectors;
>> +	lim->max_copy_sectors =
>> +		min(max_copy_sectors, lim->max_user_copy_sectors);
>> +}
>> +EXPORT_SYMBOL_GPL(blk_queue_max_copy_hw_sectors);
>
>Please don't add new blk_queue_* helpers, everything should go through
>the atomic queue limits API now.  Also capping the hardware limit
>here looks odd.
>
This was a mistake, we are not using this function. We will remove this
function in next version.

>> +	if (max_copy_bytes & (queue_logical_block_size(q) - 1))
>> +		return -EINVAL;
>
>This should probably go into blk_validate_limits and just round down.
>
Bart also pointed out similar thing. We do same check before issuing
copy offload, so we will remove this check.

>Also most block limits are in kb.  Not that I really know why we are
>doing that, but is there a good reason to deviate from that scheme?
>
We followed discard as a reference, but we can move to kb, if that helps
with overall readability.

Thank you,
Nitesh Shetty
diff mbox series

Patch

diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block
index 831f19a32e08..52d8a253bf8e 100644
--- a/Documentation/ABI/stable/sysfs-block
+++ b/Documentation/ABI/stable/sysfs-block
@@ -165,6 +165,29 @@  Description:
 		last zone of the device which may be smaller.
 
 
+What:		/sys/block/<disk>/queue/copy_max_bytes
+Date:		May 2024
+Contact:	linux-block@vger.kernel.org
+Description:
+		[RW] This is the maximum number of bytes that the block layer
+		will allow for a copy request. This is always smaller or
+		equal to the maximum size allowed by the hardware, indicated by
+		'copy_max_hw_bytes'. An attempt to set a value higher than
+		'copy_max_hw_bytes' will truncate this to 'copy_max_hw_bytes'.
+		Writing '0' to this file will disable offloading copies for this
+		device, instead copy is done via emulation.
+
+
+What:		/sys/block/<disk>/queue/copy_max_hw_bytes
+Date:		May 2024
+Contact:	linux-block@vger.kernel.org
+Description:
+		[RO] This is the maximum number of bytes that the hardware
+		will allow for single data copy request.
+		A value of 0 means that the device does not support
+		copy offload.
+
+
 What:		/sys/block/<disk>/queue/crypto/
 Date:		February 2022
 Contact:	linux-block@vger.kernel.org
diff --git a/block/blk-settings.c b/block/blk-settings.c
index a7fe8e90240a..67010ed82422 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -52,6 +52,9 @@  void blk_set_stacking_limits(struct queue_limits *lim)
 	lim->max_write_zeroes_sectors = UINT_MAX;
 	lim->max_zone_append_sectors = UINT_MAX;
 	lim->max_user_discard_sectors = UINT_MAX;
+	lim->max_copy_hw_sectors = UINT_MAX;
+	lim->max_copy_sectors = UINT_MAX;
+	lim->max_user_copy_sectors = UINT_MAX;
 }
 EXPORT_SYMBOL(blk_set_stacking_limits);
 
@@ -219,6 +222,9 @@  static int blk_validate_limits(struct queue_limits *lim)
 		lim->misaligned = 0;
 	}
 
+	lim->max_copy_sectors =
+		min(lim->max_copy_hw_sectors, lim->max_user_copy_sectors);
+
 	return blk_validate_zoned_limits(lim);
 }
 
@@ -231,10 +237,11 @@  int blk_set_default_limits(struct queue_limits *lim)
 {
 	/*
 	 * Most defaults are set by capping the bounds in blk_validate_limits,
-	 * but max_user_discard_sectors is special and needs an explicit
-	 * initialization to the max value here.
+	 * but max_user_discard_sectors and max_user_copy_sectors are special
+	 * and needs an explicit initialization to the max value here.
 	 */
 	lim->max_user_discard_sectors = UINT_MAX;
+	lim->max_user_copy_sectors = UINT_MAX;
 	return blk_validate_limits(lim);
 }
 
@@ -316,6 +323,25 @@  void blk_queue_max_discard_sectors(struct request_queue *q,
 }
 EXPORT_SYMBOL(blk_queue_max_discard_sectors);
 
+/*
+ * blk_queue_max_copy_hw_sectors - set max sectors for a single copy payload
+ * @q:	the request queue for the device
+ * @max_copy_sectors: maximum number of sectors to copy
+ */
+void blk_queue_max_copy_hw_sectors(struct request_queue *q,
+				   unsigned int max_copy_sectors)
+{
+	struct queue_limits *lim = &q->limits;
+
+	if (max_copy_sectors > (BLK_COPY_MAX_BYTES >> SECTOR_SHIFT))
+		max_copy_sectors = BLK_COPY_MAX_BYTES >> SECTOR_SHIFT;
+
+	lim->max_copy_hw_sectors = max_copy_sectors;
+	lim->max_copy_sectors =
+		min(max_copy_sectors, lim->max_user_copy_sectors);
+}
+EXPORT_SYMBOL_GPL(blk_queue_max_copy_hw_sectors);
+
 /**
  * blk_queue_max_secure_erase_sectors - set max sectors for a secure erase
  * @q:  the request queue for the device
@@ -633,6 +659,10 @@  int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 	t->max_segment_size = min_not_zero(t->max_segment_size,
 					   b->max_segment_size);
 
+	t->max_copy_sectors = min(t->max_copy_sectors, b->max_copy_sectors);
+	t->max_copy_hw_sectors = min(t->max_copy_hw_sectors,
+				     b->max_copy_hw_sectors);
+
 	t->misaligned |= b->misaligned;
 
 	alignment = queue_limit_alignment_offset(b, start);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index f0f9314ab65c..805c2b6b0393 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -205,6 +205,44 @@  static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *pag
 	return queue_var_show(0, page);
 }
 
+static ssize_t queue_copy_hw_max_show(struct request_queue *q, char *page)
+{
+	return sprintf(page, "%llu\n", (unsigned long long)
+		       q->limits.max_copy_hw_sectors << SECTOR_SHIFT);
+}
+
+static ssize_t queue_copy_max_show(struct request_queue *q, char *page)
+{
+	return sprintf(page, "%llu\n", (unsigned long long)
+		       q->limits.max_copy_sectors << SECTOR_SHIFT);
+}
+
+static ssize_t queue_copy_max_store(struct request_queue *q, const char *page,
+				    size_t count)
+{
+	unsigned long max_copy_bytes;
+	struct queue_limits lim;
+	ssize_t ret;
+	int err;
+
+	ret = queue_var_store(&max_copy_bytes, page, count);
+	if (ret < 0)
+		return ret;
+
+	if (max_copy_bytes & (queue_logical_block_size(q) - 1))
+		return -EINVAL;
+
+	blk_mq_freeze_queue(q);
+	lim = queue_limits_start_update(q);
+	lim.max_user_copy_sectors = max_copy_bytes >> SECTOR_SHIFT;
+	err = queue_limits_commit_update(q, &lim);
+	blk_mq_unfreeze_queue(q);
+
+	if (err)
+		return err;
+	return count;
+}
+
 static ssize_t queue_write_same_max_show(struct request_queue *q, char *page)
 {
 	return queue_var_show(0, page);
@@ -505,6 +543,9 @@  QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones");
 QUEUE_RO_ENTRY(queue_max_open_zones, "max_open_zones");
 QUEUE_RO_ENTRY(queue_max_active_zones, "max_active_zones");
 
+QUEUE_RO_ENTRY(queue_copy_hw_max, "copy_max_hw_bytes");
+QUEUE_RW_ENTRY(queue_copy_max, "copy_max_bytes");
+
 QUEUE_RW_ENTRY(queue_nomerges, "nomerges");
 QUEUE_RW_ENTRY(queue_rq_affinity, "rq_affinity");
 QUEUE_RW_ENTRY(queue_poll, "io_poll");
@@ -618,6 +659,8 @@  static struct attribute *queue_attrs[] = {
 	&queue_discard_max_entry.attr,
 	&queue_discard_max_hw_entry.attr,
 	&queue_discard_zeroes_data_entry.attr,
+	&queue_copy_hw_max_entry.attr,
+	&queue_copy_max_entry.attr,
 	&queue_write_same_max_entry.attr,
 	&queue_write_zeroes_max_entry.attr,
 	&queue_zone_append_max_entry.attr,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index aefdda9f4ec7..109d9f905c3c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -309,6 +309,10 @@  struct queue_limits {
 	unsigned int		discard_alignment;
 	unsigned int		zone_write_granularity;
 
+	unsigned int		max_copy_hw_sectors;
+	unsigned int		max_copy_sectors;
+	unsigned int		max_user_copy_sectors;
+
 	unsigned short		max_segments;
 	unsigned short		max_integrity_segments;
 	unsigned short		max_discard_segments;
@@ -933,6 +937,8 @@  void blk_queue_max_secure_erase_sectors(struct request_queue *q,
 		unsigned int max_sectors);
 extern void blk_queue_max_discard_sectors(struct request_queue *q,
 		unsigned int max_discard_sectors);
+extern void blk_queue_max_copy_hw_sectors(struct request_queue *q,
+					  unsigned int max_copy_sectors);
 extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q,
 		unsigned int max_write_same_sectors);
 extern void blk_queue_logical_block_size(struct request_queue *, unsigned int);
@@ -1271,6 +1277,14 @@  static inline unsigned int bdev_discard_granularity(struct block_device *bdev)
 	return bdev_get_queue(bdev)->limits.discard_granularity;
 }
 
+/* maximum copy offload length, this is set to 128MB based on current testing */
+#define BLK_COPY_MAX_BYTES		(1 << 27)
+
+static inline unsigned int bdev_max_copy_sectors(struct block_device *bdev)
+{
+	return bdev_get_queue(bdev)->limits.max_copy_sectors;
+}
+
 static inline unsigned int
 bdev_max_secure_erase_sectors(struct block_device *bdev)
 {