[03/47] block: defer timeouts to a workqueue

Message ID	1448037342-18384-4-git-send-email-hch@lst.de (mailing list archive)
State	New, archived
Headers	show Return-Path: <linux-block-owner@kernel.org> From: Christoph Hellwig <hch@lst.de> To: keith.busch@intel.com, axboe@fb.com Cc: linux-nvme@lists.infradead.org, linux-block@vger.kernel.org Subject: [PATCH 03/47] block: defer timeouts to a workqueue Date: Fri, 20 Nov 2015 17:34:58 +0100 Message-Id: <1448037342-18384-4-git-send-email-hch@lst.de> In-Reply-To: <1448037342-18384-1-git-send-email-hch@lst.de> References: <1448037342-18384-1-git-send-email-hch@lst.de> Sender: linux-block-owner@vger.kernel.org Precedence: bulk

Message ID

1448037342-18384-4-git-send-email-hch@lst.de (mailing list archive)

State

New, archived

Headers

From: Christoph Hellwig <hch@lst.de>
To: keith.busch@intel.com, axboe@fb.com
Cc: linux-nvme@lists.infradead.org, linux-block@vger.kernel.org
Subject: [PATCH 03/47] block: defer timeouts to a workqueue
Date: Fri, 20 Nov 2015 17:34:58 +0100
Message-Id: <1448037342-18384-4-git-send-email-hch@lst.de>
In-Reply-To: <1448037342-18384-1-git-send-email-hch@lst.de>
References: <1448037342-18384-1-git-send-email-hch@lst.de>
Sender: linux-block-owner@vger.kernel.org
Precedence: bulk

Commit Message

Christoph Hellwig Nov. 20, 2015, 4:34 p.m. UTC

Timer context is not very useful for drivers to perform any meaningful abort
action from.  So instead of calling the driver from this useless context
defer it to a workqueue as soon as possible.

Note that while a delayed_work item would seem the right thing here I didn't
dare to use it due to the magic in blk_add_timer that pokes deep into timer
internals.  But maybe this encourages Tejun to add a sensible API for that to
the workqueue API and we'll all be fine in the end :)

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-core.c       | 8 ++++++++
 block/blk-mq.c         | 8 +++++---
 block/blk-timeout.c    | 5 +++--
 block/blk.h            | 2 +-
 include/linux/blkdev.h | 1 +
 5 files changed, 18 insertions(+), 6 deletions(-)

Comments

Jeff Moyer Nov. 23, 2015, 8:31 p.m. UTC | #1

Christoph Hellwig <hch@lst.de> writes:

> Timer context is not very useful for drivers to perform any meaningful abort
> action from.  So instead of calling the driver from this useless context
> defer it to a workqueue as soon as possible.
>
> Note that while a delayed_work item would seem the right thing here I didn't
> dare to use it due to the magic in blk_add_timer that pokes deep into timer
> internals.  But maybe this encourages Tejun to add a sensible API for that to
> the workqueue API and we'll all be fine in the end :)

I don't see where the blk-mq timeout work is ever scheduled.  You
removed the call to setup_timer for the mq case, so what causes the mq
timeout work to run?  I must be missing something.

-Jeff

>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  block/blk-core.c       | 8 ++++++++
>  block/blk-mq.c         | 8 +++++---
>  block/blk-timeout.c    | 5 +++--
>  block/blk.h            | 2 +-
>  include/linux/blkdev.h | 1 +
>  5 files changed, 18 insertions(+), 6 deletions(-)
>
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 5131993b..1de0974 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -664,6 +664,13 @@ static void blk_queue_usage_counter_release(struct percpu_ref *ref)
>  	wake_up_all(&q->mq_freeze_wq);
>  }
>  
> +static void blk_rq_timed_out_timer(unsigned long data)
> +{
> +	struct request_queue *q = (struct request_queue *)data;
> +
> +	kblockd_schedule_work(&q->timeout_work);
> +}
> +
>  struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
>  {
>  	struct request_queue *q;
> @@ -825,6 +832,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
>  	if (blk_init_rl(&q->root_rl, q, GFP_KERNEL))
>  		goto fail;
>  
> +	INIT_WORK(&q->timeout_work, blk_timeout_work);
>  	q->request_fn		= rfn;
>  	q->prep_rq_fn		= NULL;
>  	q->unprep_rq_fn		= NULL;
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index 3ae09de..8354601 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -85,6 +85,7 @@ void blk_mq_freeze_queue_start(struct request_queue *q)
>  	freeze_depth = atomic_inc_return(&q->mq_freeze_depth);
>  	if (freeze_depth == 1) {
>  		percpu_ref_kill(&q->q_usage_counter);
> +		cancel_work_sync(&q->timeout_work);
>  		blk_mq_run_hw_queues(q, false);
>  	}
>  }
> @@ -617,9 +618,10 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
>  	}
>  }
>  
> -static void blk_mq_rq_timer(unsigned long priv)
> +static void blk_mq_timeout_work(struct work_struct *work)
>  {
> -	struct request_queue *q = (struct request_queue *)priv;
> +	struct request_queue *q =
> +		container_of(work, struct request_queue, timeout_work);
>  	struct blk_mq_timeout_data data = {
>  		.next		= 0,
>  		.next_set	= 0,
> @@ -2015,7 +2017,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
>  		hctxs[i]->queue_num = i;
>  	}
>  
> -	setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q);
> +	INIT_WORK(&q->timeout_work, blk_mq_timeout_work);
>  	blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ);
>  
>  	q->nr_queues = nr_cpu_ids;
> diff --git a/block/blk-timeout.c b/block/blk-timeout.c
> index aa40aa9..aedd128 100644
> --- a/block/blk-timeout.c
> +++ b/block/blk-timeout.c
> @@ -127,9 +127,10 @@ static void blk_rq_check_expired(struct request *rq, unsigned long *next_timeout
>  	}
>  }
>  
> -void blk_rq_timed_out_timer(unsigned long data)
> +void blk_timeout_work(struct work_struct *work)
>  {
> -	struct request_queue *q = (struct request_queue *) data;
> +	struct request_queue *q =
> +		container_of(work, struct request_queue, timeout_work);
>  	unsigned long flags, next = 0;
>  	struct request *rq, *tmp;
>  	int next_set = 0;
> diff --git a/block/blk.h b/block/blk.h
> index da722eb..37b9165 100644
> --- a/block/blk.h
> +++ b/block/blk.h
> @@ -95,7 +95,7 @@ static inline void blk_flush_integrity(void)
>  }
>  #endif
>  
> -void blk_rq_timed_out_timer(unsigned long data);
> +void blk_timeout_work(struct work_struct *work);
>  unsigned long blk_rq_timeout(unsigned long timeout);
>  void blk_add_timer(struct request *req);
>  void blk_delete_timer(struct request *);
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 3fe27f8..9a8424a 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -407,6 +407,7 @@ struct request_queue {
>  
>  	unsigned int		rq_timeout;
>  	struct timer_list	timeout;
> +	struct work_struct	timeout_work;
>  	struct list_head	timeout_list;
>  
>  	struct list_head	icq_list;
--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Christoph Hellwig Nov. 23, 2015, 8:48 p.m. UTC | #2

On Mon, Nov 23, 2015 at 03:31:32PM -0500, Jeff Moyer wrote:
> I don't see where the blk-mq timeout work is ever scheduled.  You
> removed the call to setup_timer for the mq case, so what causes the mq
> timeout work to run?  I must be missing something.

No we use the timer which we initialize in blk_alloc_queue_node instead
of overwriting it later in the blk-mq case.  The timer gets added in
blk_add_timer either way.
--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Jeff Moyer Nov. 23, 2015, 8:59 p.m. UTC | #3

Christoph Hellwig <hch@lst.de> writes:

> On Mon, Nov 23, 2015 at 03:31:32PM -0500, Jeff Moyer wrote:
>> I don't see where the blk-mq timeout work is ever scheduled.  You
>> removed the call to setup_timer for the mq case, so what causes the mq
>> timeout work to run?  I must be missing something.
>
> No we use the timer which we initialize in blk_alloc_queue_node instead
> of overwriting it later in the blk-mq case.  The timer gets added in
> blk_add_timer either way.

Ah, I see.  I missed the call to blk_alloc_queue_node from
blk_mq_init_queue.  Thanks.

The patch looks good to me.  Everything will still be running in the
same context (irqs off).

Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

diff --git a/block/blk-core.c b/block/blk-core.c
index 5131993b..1de0974 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -664,6 +664,13 @@  static void blk_queue_usage_counter_release(struct percpu_ref *ref)
 	wake_up_all(&q->mq_freeze_wq);
 }
 
+static void blk_rq_timed_out_timer(unsigned long data)
+{
+	struct request_queue *q = (struct request_queue *)data;
+
+	kblockd_schedule_work(&q->timeout_work);
+}
+
 struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 {
 	struct request_queue *q;
@@ -825,6 +832,7 @@  blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
 	if (blk_init_rl(&q->root_rl, q, GFP_KERNEL))
 		goto fail;
 
+	INIT_WORK(&q->timeout_work, blk_timeout_work);
 	q->request_fn		= rfn;
 	q->prep_rq_fn		= NULL;
 	q->unprep_rq_fn		= NULL;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 3ae09de..8354601 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -85,6 +85,7 @@  void blk_mq_freeze_queue_start(struct request_queue *q)
 	freeze_depth = atomic_inc_return(&q->mq_freeze_depth);
 	if (freeze_depth == 1) {
 		percpu_ref_kill(&q->q_usage_counter);
+		cancel_work_sync(&q->timeout_work);
 		blk_mq_run_hw_queues(q, false);
 	}
 }
@@ -617,9 +618,10 @@  static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
 	}
 }
 
-static void blk_mq_rq_timer(unsigned long priv)
+static void blk_mq_timeout_work(struct work_struct *work)
 {
-	struct request_queue *q = (struct request_queue *)priv;
+	struct request_queue *q =
+		container_of(work, struct request_queue, timeout_work);
 	struct blk_mq_timeout_data data = {
 		.next		= 0,
 		.next_set	= 0,
@@ -2015,7 +2017,7 @@  struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 		hctxs[i]->queue_num = i;
 	}
 
-	setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q);
+	INIT_WORK(&q->timeout_work, blk_mq_timeout_work);
 	blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ);
 
 	q->nr_queues = nr_cpu_ids;
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index aa40aa9..aedd128 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -127,9 +127,10 @@  static void blk_rq_check_expired(struct request *rq, unsigned long *next_timeout
 	}
 }
 
-void blk_rq_timed_out_timer(unsigned long data)
+void blk_timeout_work(struct work_struct *work)
 {
-	struct request_queue *q = (struct request_queue *) data;
+	struct request_queue *q =
+		container_of(work, struct request_queue, timeout_work);
 	unsigned long flags, next = 0;
 	struct request *rq, *tmp;
 	int next_set = 0;
diff --git a/block/blk.h b/block/blk.h
index da722eb..37b9165 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -95,7 +95,7 @@  static inline void blk_flush_integrity(void)
 }
 #endif
 
-void blk_rq_timed_out_timer(unsigned long data);
+void blk_timeout_work(struct work_struct *work);
 unsigned long blk_rq_timeout(unsigned long timeout);
 void blk_add_timer(struct request *req);
 void blk_delete_timer(struct request *);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 3fe27f8..9a8424a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -407,6 +407,7 @@  struct request_queue {
 
 	unsigned int		rq_timeout;
 	struct timer_list	timeout;
+	struct work_struct	timeout_work;
 	struct list_head	timeout_list;
 
 	struct list_head	icq_list;

[03/47] block: defer timeouts to a workqueue

Commit Message

Comments

Patch