diff mbox series

blk-mq: enforce op-specific segment limits in blk_insert_cloned_request

Message ID 20230215201507.494152-1-ushankar@purestorage.com (mailing list archive)
State New, archived
Headers show
Series blk-mq: enforce op-specific segment limits in blk_insert_cloned_request | expand

Commit Message

Uday Shankar Feb. 15, 2023, 8:15 p.m. UTC
The block layer might merge together discard requests up until the
max_discard_segments limit is hit, but blk_insert_cloned_request checks
the segment count against max_segments regardless of the req op. This
can result in errors like the following when discards are issued through
a DM device and max_discard_segments exceeds max_segments for the queue
of the chosen underlying device.

blk_insert_cloned_request: over max segments limit. (256 > 129)

Fix this by looking at the req_op and enforcing the appropriate segment
limit - max_discard_segments for REQ_OP_DISCARDs and max_segments for
everything else.

Signed-off-by: Uday Shankar <ushankar@purestorage.com>
---
 block/blk-merge.c | 4 +---
 block/blk-mq.c    | 5 +++--
 block/blk.h       | 8 ++++++++
 3 files changed, 12 insertions(+), 5 deletions(-)


base-commit: 6bea9ac7c6481c09eb2b61d7cd844fc64a526e3e

Comments

Christoph Hellwig Feb. 16, 2023, 6:09 a.m. UTC | #1
On Wed, Feb 15, 2023 at 01:15:08PM -0700, Uday Shankar wrote:
> The block layer might merge together discard requests up until the
> max_discard_segments limit is hit, but blk_insert_cloned_request checks
> the segment count against max_segments regardless of the req op. This
> can result in errors like the following when discards are issued through
> a DM device and max_discard_segments exceeds max_segments for the queue
> of the chosen underlying device.
> 
> blk_insert_cloned_request: over max segments limit. (256 > 129)
> 
> Fix this by looking at the req_op and enforcing the appropriate segment
> limit - max_discard_segments for REQ_OP_DISCARDs and max_segments for
> everything else.

I'd just remove the debug check entirely.
kernel test robot Feb. 16, 2023, 1:06 p.m. UTC | #2
Hi Uday,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on 6bea9ac7c6481c09eb2b61d7cd844fc64a526e3e]

url:    https://github.com/intel-lab-lkp/linux/commits/Uday-Shankar/blk-mq-enforce-op-specific-segment-limits-in-blk_insert_cloned_request/20230216-041718
base:   6bea9ac7c6481c09eb2b61d7cd844fc64a526e3e
patch link:    https://lore.kernel.org/r/20230215201507.494152-1-ushankar%40purestorage.com
patch subject: [PATCH] blk-mq: enforce op-specific segment limits in blk_insert_cloned_request
config: i386-randconfig-a004-20230213 (https://download.01.org/0day-ci/archive/20230216/202302162040.FaI25ul2-lkp@intel.com/config)
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/2cd958b09698bedea1a5a5f6298f0d25ec522bf9
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Uday-Shankar/blk-mq-enforce-op-specific-segment-limits-in-blk_insert_cloned_request/20230216-041718
        git checkout 2cd958b09698bedea1a5a5f6298f0d25ec522bf9
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=i386 olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=i386 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202302162040.FaI25ul2-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> block/blk-mq.c:3032:36: warning: format specifies type 'unsigned short' but the argument has type 'unsigned int' [-Wformat]
                           __func__, rq->nr_phys_segments, max_segments);
                                                           ^~~~~~~~~~~~
   include/linux/printk.h:457:60: note: expanded from macro 'printk'
   #define printk(fmt, ...) printk_index_wrap(_printk, fmt, ##__VA_ARGS__)
                                                       ~~~    ^~~~~~~~~~~
   include/linux/printk.h:429:19: note: expanded from macro 'printk_index_wrap'
                   _p_func(_fmt, ##__VA_ARGS__);                           \
                           ~~~~    ^~~~~~~~~~~
   1 warning generated.


vim +3032 block/blk-mq.c

  2993	
  2994	#ifdef CONFIG_BLK_MQ_STACKING
  2995	/**
  2996	 * blk_insert_cloned_request - Helper for stacking drivers to submit a request
  2997	 * @rq: the request being queued
  2998	 */
  2999	blk_status_t blk_insert_cloned_request(struct request *rq)
  3000	{
  3001		struct request_queue *q = rq->q;
  3002		unsigned int max_sectors = blk_queue_get_max_sectors(q, req_op(rq));
  3003		unsigned int max_segments = blk_queue_get_max_segments(q, req_op(rq));
  3004		blk_status_t ret;
  3005	
  3006		if (blk_rq_sectors(rq) > max_sectors) {
  3007			/*
  3008			 * SCSI device does not have a good way to return if
  3009			 * Write Same/Zero is actually supported. If a device rejects
  3010			 * a non-read/write command (discard, write same,etc.) the
  3011			 * low-level device driver will set the relevant queue limit to
  3012			 * 0 to prevent blk-lib from issuing more of the offending
  3013			 * operations. Commands queued prior to the queue limit being
  3014			 * reset need to be completed with BLK_STS_NOTSUPP to avoid I/O
  3015			 * errors being propagated to upper layers.
  3016			 */
  3017			if (max_sectors == 0)
  3018				return BLK_STS_NOTSUPP;
  3019	
  3020			printk(KERN_ERR "%s: over max size limit. (%u > %u)\n",
  3021				__func__, blk_rq_sectors(rq), max_sectors);
  3022			return BLK_STS_IOERR;
  3023		}
  3024	
  3025		/*
  3026		 * The queue settings related to segment counting may differ from the
  3027		 * original queue.
  3028		 */
  3029		rq->nr_phys_segments = blk_recalc_rq_segments(rq);
  3030		if (rq->nr_phys_segments > max_segments) {
  3031			printk(KERN_ERR "%s: over max segments limit. (%hu > %hu)\n",
> 3032				__func__, rq->nr_phys_segments, max_segments);
  3033			return BLK_STS_IOERR;
  3034		}
  3035	
  3036		if (q->disk && should_fail_request(q->disk->part0, blk_rq_bytes(rq)))
  3037			return BLK_STS_IOERR;
  3038	
  3039		if (blk_crypto_insert_cloned_request(rq))
  3040			return BLK_STS_IOERR;
  3041	
  3042		blk_account_io_start(rq);
  3043	
  3044		/*
  3045		 * Since we have a scheduler attached on the top device,
  3046		 * bypass a potential scheduler on the bottom device for
  3047		 * insert.
  3048		 */
  3049		blk_mq_run_dispatch_ops(q,
  3050				ret = blk_mq_request_issue_directly(rq, true));
  3051		if (ret)
  3052			blk_account_io_done(rq, ktime_get_ns());
  3053		return ret;
  3054	}
  3055	EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
  3056
Uday Shankar Feb. 16, 2023, 7:27 p.m. UTC | #3
On Wed, Feb 15, 2023 at 10:09:36PM -0800, Christoph Hellwig wrote:
> I'd just remove the debug check entirely

Older kernels have these checks in a separate function called
blk_cloned_rq_check_limits, which carries the following comment:

/**
 * blk_cloned_rq_check_limits - Helper function to check a cloned request
 *                              for the new queue limits
 * @q:  the queue
 * @rq: the request being checked
 *
 * Description:
 *    @rq may have been made based on weaker limitations of upper-level queues
 *    in request stacking drivers, and it may violate the limitation of @q.
 *    Since the block layer and the underlying device driver trust @rq
 *    after it is inserted to @q, it should be checked against @q before
 *    the insertion using this generic function.
 *
 *    Request stacking drivers like request-based dm may change the queue
 *    limits when retrying requests on other queues. Those requests need
 *    to be checked against the new queue limits again during dispatch.
 */.

Is this concern no longer relevant?
Mike Snitzer Feb. 17, 2023, 12:13 a.m. UTC | #4
On Thu, Feb 16 2023 at  2:27P -0500,
Uday Shankar <ushankar@purestorage.com> wrote:

> On Wed, Feb 15, 2023 at 10:09:36PM -0800, Christoph Hellwig wrote:
> > I'd just remove the debug check entirely
> 
> Older kernels have these checks in a separate function called
> blk_cloned_rq_check_limits, which carries the following comment:
> 
> /**
>  * blk_cloned_rq_check_limits - Helper function to check a cloned request
>  *                              for the new queue limits
>  * @q:  the queue
>  * @rq: the request being checked
>  *
>  * Description:
>  *    @rq may have been made based on weaker limitations of upper-level queues
>  *    in request stacking drivers, and it may violate the limitation of @q.
>  *    Since the block layer and the underlying device driver trust @rq
>  *    after it is inserted to @q, it should be checked against @q before
>  *    the insertion using this generic function.
>  *
>  *    Request stacking drivers like request-based dm may change the queue
>  *    limits when retrying requests on other queues. Those requests need
>  *    to be checked against the new queue limits again during dispatch.
>  */.
> 
> Is this concern no longer relevant?

Still relevant.
Christoph Hellwig Feb. 17, 2023, 4:16 p.m. UTC | #5
On Thu, Feb 16, 2023 at 12:27:02PM -0700, Uday Shankar wrote:
>  * Description:
>  *    @rq may have been made based on weaker limitations of upper-level queues
>  *    in request stacking drivers, and it may violate the limitation of @q.
>  *    Since the block layer and the underlying device driver trust @rq
>  *    after it is inserted to @q, it should be checked against @q before
>  *    the insertion using this generic function.
>  *
>  *    Request stacking drivers like request-based dm may change the queue
>  *    limits when retrying requests on other queues. Those requests need
>  *    to be checked against the new queue limits again during dispatch.
>  */.
> 
> Is this concern no longer relevant?

The concern is still valid, but it does not refer to the debug check.
It refers to recalculating nr_phys_segments using
blk_recalc_rq_segments, and the fact that any driver using this
interface needs to stack its limits properly.
Mike Snitzer Feb. 17, 2023, 4:43 p.m. UTC | #6
On Fri, Feb 17 2023 at 11:16P -0500,
Christoph Hellwig <hch@infradead.org> wrote:

> On Thu, Feb 16, 2023 at 12:27:02PM -0700, Uday Shankar wrote:
> >  * Description:
> >  *    @rq may have been made based on weaker limitations of upper-level queues
> >  *    in request stacking drivers, and it may violate the limitation of @q.
> >  *    Since the block layer and the underlying device driver trust @rq
> >  *    after it is inserted to @q, it should be checked against @q before
> >  *    the insertion using this generic function.
> >  *
> >  *    Request stacking drivers like request-based dm may change the queue
> >  *    limits when retrying requests on other queues. Those requests need
> >  *    to be checked against the new queue limits again during dispatch.
> >  */.
> > 
> > Is this concern no longer relevant?
> 
> The concern is still valid, but it does not refer to the debug check.
> It refers to recalculating nr_phys_segments using
> blk_recalc_rq_segments, and the fact that any driver using this
> interface needs to stack its limits properly.

It is a negative check that you're calling a debug check.

Much easier to address a bug in a driver when seeing this message than
figuring it out after more elaborate hunting. Not seeing the downside
of preserving/fixing given it is a quick limit check. *shrug*
Uday Shankar Feb. 17, 2023, 9:41 p.m. UTC | #7
> Much easier to address a bug in a driver when seeing this message than
> figuring it out after more elaborate hunting. Not seeing the downside
> of preserving/fixing given it is a quick limit check. *shrug*

I tend to agree - considering the check has been here for a while, I
don't know if lower levels will have particularly enlightening logging
(if any at all) for these errors.
diff mbox series

Patch

diff --git a/block/blk-merge.c b/block/blk-merge.c
index b7c193d67..7f663c2d3 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -588,9 +588,7 @@  EXPORT_SYMBOL(__blk_rq_map_sg);
 
 static inline unsigned int blk_rq_get_max_segments(struct request *rq)
 {
-	if (req_op(rq) == REQ_OP_DISCARD)
-		return queue_max_discard_segments(rq->q);
-	return queue_max_segments(rq->q);
+	return blk_queue_get_max_segments(rq->q, req_op(rq));
 }
 
 static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
diff --git a/block/blk-mq.c b/block/blk-mq.c
index d3494a796..121b20230 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3000,6 +3000,7 @@  blk_status_t blk_insert_cloned_request(struct request *rq)
 {
 	struct request_queue *q = rq->q;
 	unsigned int max_sectors = blk_queue_get_max_sectors(q, req_op(rq));
+	unsigned int max_segments = blk_queue_get_max_segments(q, req_op(rq));
 	blk_status_t ret;
 
 	if (blk_rq_sectors(rq) > max_sectors) {
@@ -3026,9 +3027,9 @@  blk_status_t blk_insert_cloned_request(struct request *rq)
 	 * original queue.
 	 */
 	rq->nr_phys_segments = blk_recalc_rq_segments(rq);
-	if (rq->nr_phys_segments > queue_max_segments(q)) {
+	if (rq->nr_phys_segments > max_segments) {
 		printk(KERN_ERR "%s: over max segments limit. (%hu > %hu)\n",
-			__func__, rq->nr_phys_segments, queue_max_segments(q));
+			__func__, rq->nr_phys_segments, max_segments);
 		return BLK_STS_IOERR;
 	}
 
diff --git a/block/blk.h b/block/blk.h
index f02381405..8d705c13a 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -169,6 +169,14 @@  static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
 	return q->limits.max_sectors;
 }
 
+static inline unsigned int blk_queue_get_max_segments(struct request_queue *q,
+						      enum req_op op)
+{
+	if (op == REQ_OP_DISCARD)
+		return queue_max_discard_segments(q);
+	return queue_max_segments(q);
+}
+
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 void blk_flush_integrity(void);
 bool __bio_integrity_endio(struct bio *);