diff mbox series

[v2] block: fix queue limits checks in blk_rq_map_user_bvec for real

Message ID 20241028090840.446180-1-hch@lst.de (mailing list archive)
State New
Headers show
Series [v2] block: fix queue limits checks in blk_rq_map_user_bvec for real | expand

Commit Message

Christoph Hellwig Oct. 28, 2024, 9:07 a.m. UTC
blk_rq_map_user_bvec currently only has ad-hoc checks for queue limits,
and the last fix to it enabled valid NVMe I/O to pass, but also allowed
invalid one for drivers that set a max_segment_size or seg_boundary
limit.

Fix it once for all by using the bio_split_rw_at helper from the I/O
path that indicates if and where a bio would be have to be split to
adhere to the queue limits, and it it returns a positive value, turn
that into -EREMOTEIO to retry using the copy path.

Fixes: 2ff949441802 ("block: fix sanity checks in blk_rq_map_user_bvec")
Signed-off-by: Christoph Hellwig <hch@lst.de>
---

Changes since v1:
 - correctly shift the max_hw_sectors value.  Tested by actually
   checking for max_hw_sectors exceeding I/Os to fail properly

 block/blk-map.c | 56 +++++++++++++++----------------------------------
 1 file changed, 17 insertions(+), 39 deletions(-)

Comments

John Garry Oct. 28, 2024, 10:26 a.m. UTC | #1
On 28/10/2024 09:07, Christoph Hellwig wrote:
> blk_rq_map_user_bvec currently only has ad-hoc checks for queue limits,
> and the last fix to it enabled valid NVMe I/O to pass, but also allowed
> invalid one for drivers that set a max_segment_size or seg_boundary
> limit.
> 
> Fix it once for all by using the bio_split_rw_at helper from the I/O
> path that indicates if and where a bio would be have to be split to
> adhere to the queue limits, and it it returns a positive value, turn

super nit: check "it it "

> that into -EREMOTEIO to retry using the copy path.
> 
> Fixes: 2ff949441802 ("block: fix sanity checks in blk_rq_map_user_bvec")
> Signed-off-by: Christoph Hellwig <hch@lst.de>

FWIW,

Reviewed-by: John Garry <john.g.garry@oracle.com>

> ---
> 
> Changes since v1:
>   - correctly shift the max_hw_sectors value.  Tested by actually
>     checking for max_hw_sectors exceeding I/Os to fail properly
> 
>   block/blk-map.c | 56 +++++++++++++++----------------------------------
>   1 file changed, 17 insertions(+), 39 deletions(-)
> 
> diff --git a/block/blk-map.c b/block/blk-map.c
> index 6ef2ec1f7d78..b5fd1d857461 100644
> --- a/block/blk-map.c
> +++ b/block/blk-map.c
> @@ -561,55 +561,33 @@ EXPORT_SYMBOL(blk_rq_append_bio);
>   /* Prepare bio for passthrough IO given ITER_BVEC iter */
>   static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter)
>   {
> -	struct request_queue *q = rq->q;
> -	size_t nr_iter = iov_iter_count(iter);
> -	size_t nr_segs = iter->nr_segs;
> -	struct bio_vec *bvecs, *bvprvp = NULL;
> -	const struct queue_limits *lim = &q->limits;
> -	unsigned int nsegs = 0, bytes = 0;
> +	const struct queue_limits *lim = &rq->q->limits;
> +	unsigned int max_bytes = lim->max_hw_sectors << SECTOR_SHIFT;
> +	unsigned int nsegs;
>   	struct bio *bio;
> -	size_t i;
> +	int ret;
>   
> -	if (!nr_iter || (nr_iter >> SECTOR_SHIFT) > queue_max_hw_sectors(q))
> -		return -EINVAL;
> -	if (nr_segs > queue_max_segments(q))
> +	if (!iov_iter_count(iter) || iov_iter_count(iter) > max_bytes)
>   		return -EINVAL;
>   
> -	/* no iovecs to alloc, as we already have a BVEC iterator */
> +	/* reuse the bvecs from the iterator instead of allocating new ones */
>   	bio = blk_rq_map_bio_alloc(rq, 0, GFP_KERNEL);
> -	if (bio == NULL)
> +	if (!bio)
>   		return -ENOMEM;
> -
>   	bio_iov_bvec_set(bio, (struct iov_iter *)iter);

totally separate comment to this patch, but I think that 
bio_iov_bvec_set() could be changed to accept const struct iov_iter * 
(and so we could drop the casting here)

> -	blk_rq_bio_prep(rq, bio, nr_segs);
> -
> -	/* loop to perform a bunch of sanity checks */
> -	bvecs = (struct bio_vec *)iter->bvec;
> -	for (i = 0; i < nr_segs; i++) {
> -		struct bio_vec *bv = &bvecs[i];
> -
> -		/*
> -		 * If the queue doesn't support SG gaps and adding this
> -		 * offset would create a gap, fallback to copy.
> -		 */
> -		if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv->bv_offset)) {
> -			blk_mq_map_bio_put(bio);
> -			return -EREMOTEIO;
> -		}
> -		/* check full condition */
> -		if (nsegs >= nr_segs || bytes > UINT_MAX - bv->bv_len)
> -			goto put_bio;
> -		if (bytes + bv->bv_len > nr_iter)
> -			break;
>   
> -		nsegs++;
> -		bytes += bv->bv_len;
> -		bvprvp = bv;
> +	/* check that the data layout matches the hardware restrictions */
> +	ret = bio_split_rw_at(bio, lim, &nsegs, max_bytes);
> +	if (ret) {
> +		/* if we would have to split the bio, copy instead */
> +		if (ret > 0)
> +			ret = -EREMOTEIO;
> +		blk_mq_map_bio_put(bio);
> +		return ret;
>   	}
> +
> +	blk_rq_bio_prep(rq, bio, nsegs);
>   	return 0;
> -put_bio:
> -	blk_mq_map_bio_put(bio);
> -	return -EINVAL;
>   }
>   
>   /**
Jens Axboe Oct. 28, 2024, 6:35 p.m. UTC | #2
On Mon, 28 Oct 2024 10:07:48 +0100, Christoph Hellwig wrote:
> blk_rq_map_user_bvec currently only has ad-hoc checks for queue limits,
> and the last fix to it enabled valid NVMe I/O to pass, but also allowed
> invalid one for drivers that set a max_segment_size or seg_boundary
> limit.
> 
> Fix it once for all by using the bio_split_rw_at helper from the I/O
> path that indicates if and where a bio would be have to be split to
> adhere to the queue limits, and it it returns a positive value, turn
> that into -EREMOTEIO to retry using the copy path.
> 
> [...]

Applied, thanks!

[1/1] block: fix queue limits checks in blk_rq_map_user_bvec for real
      commit: be0e822bb3f5259c7f9424ba97e8175211288813

Best regards,
diff mbox series

Patch

diff --git a/block/blk-map.c b/block/blk-map.c
index 6ef2ec1f7d78..b5fd1d857461 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -561,55 +561,33 @@  EXPORT_SYMBOL(blk_rq_append_bio);
 /* Prepare bio for passthrough IO given ITER_BVEC iter */
 static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter)
 {
-	struct request_queue *q = rq->q;
-	size_t nr_iter = iov_iter_count(iter);
-	size_t nr_segs = iter->nr_segs;
-	struct bio_vec *bvecs, *bvprvp = NULL;
-	const struct queue_limits *lim = &q->limits;
-	unsigned int nsegs = 0, bytes = 0;
+	const struct queue_limits *lim = &rq->q->limits;
+	unsigned int max_bytes = lim->max_hw_sectors << SECTOR_SHIFT;
+	unsigned int nsegs;
 	struct bio *bio;
-	size_t i;
+	int ret;
 
-	if (!nr_iter || (nr_iter >> SECTOR_SHIFT) > queue_max_hw_sectors(q))
-		return -EINVAL;
-	if (nr_segs > queue_max_segments(q))
+	if (!iov_iter_count(iter) || iov_iter_count(iter) > max_bytes)
 		return -EINVAL;
 
-	/* no iovecs to alloc, as we already have a BVEC iterator */
+	/* reuse the bvecs from the iterator instead of allocating new ones */
 	bio = blk_rq_map_bio_alloc(rq, 0, GFP_KERNEL);
-	if (bio == NULL)
+	if (!bio)
 		return -ENOMEM;
-
 	bio_iov_bvec_set(bio, (struct iov_iter *)iter);
-	blk_rq_bio_prep(rq, bio, nr_segs);
-
-	/* loop to perform a bunch of sanity checks */
-	bvecs = (struct bio_vec *)iter->bvec;
-	for (i = 0; i < nr_segs; i++) {
-		struct bio_vec *bv = &bvecs[i];
-
-		/*
-		 * If the queue doesn't support SG gaps and adding this
-		 * offset would create a gap, fallback to copy.
-		 */
-		if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv->bv_offset)) {
-			blk_mq_map_bio_put(bio);
-			return -EREMOTEIO;
-		}
-		/* check full condition */
-		if (nsegs >= nr_segs || bytes > UINT_MAX - bv->bv_len)
-			goto put_bio;
-		if (bytes + bv->bv_len > nr_iter)
-			break;
 
-		nsegs++;
-		bytes += bv->bv_len;
-		bvprvp = bv;
+	/* check that the data layout matches the hardware restrictions */
+	ret = bio_split_rw_at(bio, lim, &nsegs, max_bytes);
+	if (ret) {
+		/* if we would have to split the bio, copy instead */
+		if (ret > 0)
+			ret = -EREMOTEIO;
+		blk_mq_map_bio_put(bio);
+		return ret;
 	}
+
+	blk_rq_bio_prep(rq, bio, nsegs);
 	return 0;
-put_bio:
-	blk_mq_map_bio_put(bio);
-	return -EINVAL;
 }
 
 /**