diff mbox series

[02/11] target: add workqueue cmd submission helper

Message ID 20210204113513.93204-3-michael.christie@oracle.com (mailing list archive)
State New, archived
Headers show
Series target: fix cmd plugging and completion | expand

Commit Message

Mike Christie Feb. 4, 2021, 11:35 a.m. UTC
loop and vhost-scsi do their target cmd submission from driver
workqueues. This allows them to avoid an issue where the backend may
block waiting for resources like tags/requests, mem/locks, etc
and that ends up blocking their entire submission path and for the
case of vhost-scsi both the submission and completion path.

This patch adds a helper these drivers can use to submit from the
lio workqueue. This code will then be extended in the next patches
to fix the plugging of backend devices.

Signed-off-by: Mike Christie <michael.christie@oracle.com>
---
 drivers/target/target_core_transport.c | 102 ++++++++++++++++++++++++-
 include/target/target_core_base.h      |  10 ++-
 include/target/target_core_fabric.h    |   3 +
 3 files changed, 111 insertions(+), 4 deletions(-)

Comments

Chaitanya Kulkarni Feb. 4, 2021, 11:13 p.m. UTC | #1
On 2/4/21 03:41, Mike Christie wrote:
> loop and vhost-scsi do their target cmd submission from driver
> workqueues. This allows them to avoid an issue where the backend may
> block waiting for resources like tags/requests, mem/locks, etc
> and that ends up blocking their entire submission path and for the
> case of vhost-scsi both the submission and completion path.
>
> This patch adds a helper these drivers can use to submit from the
> lio workqueue. This code will then be extended in the next patches
> to fix the plugging of backend devices.
>
> Signed-off-by: Mike Christie <michael.christie@oracle.com>
> ---
>  drivers/target/target_core_transport.c | 102 ++++++++++++++++++++++++-
>  include/target/target_core_base.h      |  10 ++-
>  include/target/target_core_fabric.h    |   3 +
>  3 files changed, 111 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
> index 7c5d37bac561..dec89e911348 100644
> --- a/drivers/target/target_core_transport.c
> +++ b/drivers/target/target_core_transport.c
> @@ -41,6 +41,7 @@
>  #include <trace/events/target.h>
>  
>  static struct workqueue_struct *target_completion_wq;
> +static struct workqueue_struct *target_submission_wq;
>  static struct kmem_cache *se_sess_cache;
>  struct kmem_cache *se_ua_cache;
>  struct kmem_cache *t10_pr_reg_cache;
> @@ -129,8 +130,15 @@ int init_se_kmem_caches(void)
>  	if (!target_completion_wq)
>  		goto out_free_lba_map_mem_cache;
>  
> +	target_submission_wq = alloc_workqueue("target_submission",
> +					       WQ_MEM_RECLAIM, 0);
> +	if (!target_submission_wq)
> +		goto out_free_completion_wq;
> +
>  	return 0;
>  
> +out_free_completion_wq:
> +	destroy_workqueue(target_completion_wq);
>  out_free_lba_map_mem_cache:
>  	kmem_cache_destroy(t10_alua_lba_map_mem_cache);
>  out_free_lba_map_cache:
> @@ -153,6 +161,7 @@ int init_se_kmem_caches(void)
>  
>  void release_se_kmem_caches(void)
>  {
> +	destroy_workqueue(target_submission_wq);
>  	destroy_workqueue(target_completion_wq);
>  	kmem_cache_destroy(se_sess_cache);
>  	kmem_cache_destroy(se_ua_cache);
> @@ -218,6 +227,69 @@ static void target_release_sess_cmd_refcnt(struct percpu_ref *ref)
>  	wake_up(&sess->cmd_count_wq);
>  }
>  
> +static void target_queued_submit_work(struct work_struct *work)
> +{
> +	struct se_sess_cmd_queue *sq =
> +				container_of(work, struct se_sess_cmd_queue,
> +					     work);
> +	struct se_session *se_sess = sq->se_sess;
> +	struct se_cmd *se_cmd, *next_cmd;
> +	struct llist_node *cmd_list;
> +
> +	cmd_list = llist_del_all(&sq->cmd_list);
> +	if (!cmd_list)
> +		/* Previous call took what we were queued to submit */
> +		return;
> +
> +	cmd_list = llist_reverse_order(cmd_list);
> +	llist_for_each_entry_safe(se_cmd, next_cmd, cmd_list, se_cmd_list)
> +		se_sess->tfo->submit_queued_cmd(se_cmd);
> +}
> +
> +static void target_queue_cmd_work(struct se_sess_cmd_queue *q,
> +				  struct se_cmd *se_cmd, int cpu)
> +{
> +	llist_add(&se_cmd->se_cmd_list, &q->cmd_list);
> +	queue_work_on(cpu, target_submission_wq, &q->work);
> +}
> +
> +/**
> + * target_queue_cmd_submit - queue a se_cmd to be executed from the lio wq
> + * @se_sess: cmd's session
> + * @cmd_list: cmd to queue
> + */
> +void target_queue_cmd_submit(struct se_session *se_sess, struct se_cmd *se_cmd)
> +{
> +	int cpu = smp_processor_id();
> +
> +	target_queue_cmd_work(&se_sess->sq[cpu], se_cmd, cpu);
> +}
> +EXPORT_SYMBOL_GPL(target_queue_cmd_submit);
> +
> +static void target_flush_queued_cmds(struct se_session *se_sess)
> +{
> +	int i;
> +
> +	if (!se_sess->sq)
> +		return;
> +
> +	for (i = 0; i < se_sess->q_cnt; i++)
> +		cancel_work_sync(&se_sess->sq[i].work);
> +}
> +
> +static void target_init_sess_cmd_queues(struct se_session *se_sess,
> +					struct se_sess_cmd_queue *q,
> +					void (*work_fn)(struct work_struct *work))
> +{
> +	int i;
> +
> +	for (i = 0; i < se_sess->q_cnt; i++) {
> +		init_llist_head(&q[i].cmd_list);
> +		INIT_WORK(&q[i].work, work_fn);
> +		q[i].se_sess = se_sess;
> +	}
> +}
> +
Can we opencode above function if there is only one caller ?
unless there is a specific reason to have it on its own which I failed to
understand.
>  /**
>   * transport_init_session - initialize a session object
>   * @tfo: target core fabric ops
> @@ -228,6 +300,8 @@ static void target_release_sess_cmd_refcnt(struct percpu_ref *ref)
>  int transport_init_session(const struct target_core_fabric_ops *tfo,
>  			   struct se_session *se_sess)
>  {
> +	int rc;
> +
>  	INIT_LIST_HEAD(&se_sess->sess_list);
>  	INIT_LIST_HEAD(&se_sess->sess_acl_list);
>  	spin_lock_init(&se_sess->sess_cmd_lock);
> @@ -235,13 +309,34 @@ int transport_init_session(const struct target_core_fabric_ops *tfo,
>  	init_completion(&se_sess->stop_done);
>  	atomic_set(&se_sess->stopped, 0);
>  	se_sess->tfo = tfo;
> -	return percpu_ref_init(&se_sess->cmd_count,
> -			       target_release_sess_cmd_refcnt, 0, GFP_KERNEL);
> +
> +	if (tfo->submit_queued_cmd) {
> +		se_sess->sq = kcalloc(nr_cpu_ids, sizeof(*se_sess->sq),
> +				      GFP_KERNEL);
> +		if (!se_sess->sq)
> +			return -ENOMEM;
> +
> +		se_sess->q_cnt = nr_cpu_ids;
> +		target_init_sess_cmd_queues(se_sess, se_sess->sq,
> +					    target_queued_submit_work);
> +	}
> +
> +	rc = percpu_ref_init(&se_sess->cmd_count,
> +			     target_release_sess_cmd_refcnt, 0, GFP_KERNEL);
> +	if (rc)
> +		goto free_sq;
> +
> +	return 0;
> +
> +free_sq:
> +	kfree(se_sess->sq);
> +	return rc;
>  }
>  EXPORT_SYMBOL(transport_init_session);
>  
>  void transport_uninit_session(struct se_session *se_sess)
>  {
> +	kfree(se_sess->sq);
>  	/*
>  	 * Drivers like iscsi and loop do not call target_stop_session
>  	 * during session shutdown so we have to drop the ref taken at init
> @@ -1385,7 +1480,6 @@ void transport_init_se_cmd(
>  {
>  	INIT_LIST_HEAD(&cmd->se_delayed_node);
>  	INIT_LIST_HEAD(&cmd->se_qf_node);
> -	INIT_LIST_HEAD(&cmd->se_cmd_list);
>  	INIT_LIST_HEAD(&cmd->state_list);
>  	init_completion(&cmd->t_transport_stop_comp);
>  	cmd->free_compl = NULL;
> @@ -2968,6 +3062,8 @@ void target_wait_for_sess_cmds(struct se_session *se_sess)
>  {
>  	int ret;
>  
> +	target_flush_queued_cmds(se_sess);
> +
>  	WARN_ON_ONCE(!atomic_read(&se_sess->stopped));
>  
>  	do {
> diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
> index 50103a22b0e2..97138bff14d1 100644
> --- a/include/target/target_core_base.h
> +++ b/include/target/target_core_base.h
> @@ -488,7 +488,7 @@ struct se_cmd {
>  	/* Only used for internal passthrough and legacy TCM fabric modules */
>  	struct se_session	*se_sess;
>  	struct se_tmr_req	*se_tmr_req;
> -	struct list_head	se_cmd_list;
> +	struct llist_node	se_cmd_list;
>  	struct completion	*free_compl;
>  	struct completion	*abrt_compl;
>  	const struct target_core_fabric_ops *se_tfo;
> @@ -612,6 +612,12 @@ static inline struct se_node_acl *fabric_stat_to_nacl(struct config_item *item)
>  			acl_fabric_stat_group);
>  }
>  
> +struct se_sess_cmd_queue {
> +	struct llist_head	cmd_list;
> +	struct work_struct	work;
> +	struct se_session	*se_sess;
> +};
> +
>  struct se_session {
>  	atomic_t		stopped;
>  	u64			sess_bin_isid;
> @@ -629,6 +635,8 @@ struct se_session {
>  	void			*sess_cmd_map;
>  	struct sbitmap_queue	sess_tag_pool;
>  	const struct target_core_fabric_ops *tfo;
> +	struct se_sess_cmd_queue *sq;
> +	int			q_cnt;
>  };
>  
>  struct se_device;
> diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h
> index cdf610838ba5..899948967a65 100644
> --- a/include/target/target_core_fabric.h
> +++ b/include/target/target_core_fabric.h
> @@ -80,6 +80,7 @@ struct target_core_fabric_ops {
>  	int (*queue_status)(struct se_cmd *);
>  	void (*queue_tm_rsp)(struct se_cmd *);
>  	void (*aborted_task)(struct se_cmd *);
> +	void (*submit_queued_cmd)(struct se_cmd *);
>  	/*
>  	 * fabric module calls for target_core_fabric_configfs.c
>  	 */
> @@ -166,6 +167,8 @@ int	target_submit_tmr(struct se_cmd *se_cmd, struct se_session *se_sess,
>  		unsigned char *sense, u64 unpacked_lun,
>  		void *fabric_tmr_ptr, unsigned char tm_type,
>  		gfp_t, u64, int);
> +void	target_queue_cmd_submit(struct se_session *se_sess,
> +				struct se_cmd *se_cmd);
>  int	transport_handle_cdb_direct(struct se_cmd *);
>  sense_reason_t	transport_generic_new_cmd(struct se_cmd *);
>
Mike Christie Feb. 5, 2021, 12:43 a.m. UTC | #2
On 2/4/21 5:13 PM, Chaitanya Kulkarni wrote:
> On 2/4/21 03:41, Mike Christie wrote:
>> loop and vhost-scsi do their target cmd submission from driver
>> workqueues. This allows them to avoid an issue where the backend may
>> block waiting for resources like tags/requests, mem/locks, etc
>> and that ends up blocking their entire submission path and for the
>> case of vhost-scsi both the submission and completion path.
>>
>> This patch adds a helper these drivers can use to submit from the
>> lio workqueue. This code will then be extended in the next patches
>> to fix the plugging of backend devices.
>>
>> Signed-off-by: Mike Christie <michael.christie@oracle.com>
>> ---
>>   drivers/target/target_core_transport.c | 102 ++++++++++++++++++++++++-
>>   include/target/target_core_base.h      |  10 ++-
>>   include/target/target_core_fabric.h    |   3 +
>>   3 files changed, 111 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
>> index 7c5d37bac561..dec89e911348 100644
>> --- a/drivers/target/target_core_transport.c
>> +++ b/drivers/target/target_core_transport.c
>> @@ -41,6 +41,7 @@
>>   #include <trace/events/target.h>
>>   
>>   static struct workqueue_struct *target_completion_wq;
>> +static struct workqueue_struct *target_submission_wq;
>>   static struct kmem_cache *se_sess_cache;
>>   struct kmem_cache *se_ua_cache;
>>   struct kmem_cache *t10_pr_reg_cache;
>> @@ -129,8 +130,15 @@ int init_se_kmem_caches(void)
>>   	if (!target_completion_wq)
>>   		goto out_free_lba_map_mem_cache;
>>   
>> +	target_submission_wq = alloc_workqueue("target_submission",
>> +					       WQ_MEM_RECLAIM, 0);
>> +	if (!target_submission_wq)
>> +		goto out_free_completion_wq;
>> +
>>   	return 0;
>>   
>> +out_free_completion_wq:
>> +	destroy_workqueue(target_completion_wq);
>>   out_free_lba_map_mem_cache:
>>   	kmem_cache_destroy(t10_alua_lba_map_mem_cache);
>>   out_free_lba_map_cache:
>> @@ -153,6 +161,7 @@ int init_se_kmem_caches(void)
>>   
>>   void release_se_kmem_caches(void)
>>   {
>> +	destroy_workqueue(target_submission_wq);
>>   	destroy_workqueue(target_completion_wq);
>>   	kmem_cache_destroy(se_sess_cache);
>>   	kmem_cache_destroy(se_ua_cache);
>> @@ -218,6 +227,69 @@ static void target_release_sess_cmd_refcnt(struct percpu_ref *ref)
>>   	wake_up(&sess->cmd_count_wq);
>>   }
>>   
>> +static void target_queued_submit_work(struct work_struct *work)
>> +{
>> +	struct se_sess_cmd_queue *sq =
>> +				container_of(work, struct se_sess_cmd_queue,
>> +					     work);
>> +	struct se_session *se_sess = sq->se_sess;
>> +	struct se_cmd *se_cmd, *next_cmd;
>> +	struct llist_node *cmd_list;
>> +
>> +	cmd_list = llist_del_all(&sq->cmd_list);
>> +	if (!cmd_list)
>> +		/* Previous call took what we were queued to submit */
>> +		return;
>> +
>> +	cmd_list = llist_reverse_order(cmd_list);
>> +	llist_for_each_entry_safe(se_cmd, next_cmd, cmd_list, se_cmd_list)
>> +		se_sess->tfo->submit_queued_cmd(se_cmd);
>> +}
>> +
>> +static void target_queue_cmd_work(struct se_sess_cmd_queue *q,
>> +				  struct se_cmd *se_cmd, int cpu)
>> +{
>> +	llist_add(&se_cmd->se_cmd_list, &q->cmd_list);
>> +	queue_work_on(cpu, target_submission_wq, &q->work);
>> +}
>> +
>> +/**
>> + * target_queue_cmd_submit - queue a se_cmd to be executed from the lio wq
>> + * @se_sess: cmd's session
>> + * @cmd_list: cmd to queue
>> + */
>> +void target_queue_cmd_submit(struct se_session *se_sess, struct se_cmd *se_cmd)
>> +{
>> +	int cpu = smp_processor_id();
>> +
>> +	target_queue_cmd_work(&se_sess->sq[cpu], se_cmd, cpu);
>> +}
>> +EXPORT_SYMBOL_GPL(target_queue_cmd_submit);
>> +
>> +static void target_flush_queued_cmds(struct se_session *se_sess)
>> +{
>> +	int i;
>> +
>> +	if (!se_sess->sq)
>> +		return;
>> +
>> +	for (i = 0; i < se_sess->q_cnt; i++)
>> +		cancel_work_sync(&se_sess->sq[i].work);
>> +}
>> +
>> +static void target_init_sess_cmd_queues(struct se_session *se_sess,
>> +					struct se_sess_cmd_queue *q,
>> +					void (*work_fn)(struct work_struct *work))
>> +{
>> +	int i;
>> +
>> +	for (i = 0; i < se_sess->q_cnt; i++) {
>> +		init_llist_head(&q[i].cmd_list);
>> +		INIT_WORK(&q[i].work, work_fn);
>> +		q[i].se_sess = se_sess;
>> +	}
>> +}
>> +
> Can we opencode above function if there is only one caller ?
> unless there is a specific reason to have it on its own which I failed to
> understand.

Patch 10 also calls it. I tried to say that in the end of the patch 
description but it was not too clear now that I read it again.

I couldn't decide if I should do it now or later. I selected now since 
it made the 10th pach smaller.
Chaitanya Kulkarni Feb. 5, 2021, 1:50 a.m. UTC | #3
On 2/4/21 16:44, michael.christie@oracle.com wrote:
>>> +
>> Can we opencode above function if there is only one caller ?
>> unless there is a specific reason to have it on its own which I failed to
>> understand.
> Patch 10 also calls it. I tried to say that in the end of the patch 
> description but it was not too clear now that I read it again.
>
> I couldn't decide if I should do it now or later. I selected now since 
> it made the 10th pach smaller.
>
if it does then fine.
diff mbox series

Patch

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 7c5d37bac561..dec89e911348 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -41,6 +41,7 @@ 
 #include <trace/events/target.h>
 
 static struct workqueue_struct *target_completion_wq;
+static struct workqueue_struct *target_submission_wq;
 static struct kmem_cache *se_sess_cache;
 struct kmem_cache *se_ua_cache;
 struct kmem_cache *t10_pr_reg_cache;
@@ -129,8 +130,15 @@  int init_se_kmem_caches(void)
 	if (!target_completion_wq)
 		goto out_free_lba_map_mem_cache;
 
+	target_submission_wq = alloc_workqueue("target_submission",
+					       WQ_MEM_RECLAIM, 0);
+	if (!target_submission_wq)
+		goto out_free_completion_wq;
+
 	return 0;
 
+out_free_completion_wq:
+	destroy_workqueue(target_completion_wq);
 out_free_lba_map_mem_cache:
 	kmem_cache_destroy(t10_alua_lba_map_mem_cache);
 out_free_lba_map_cache:
@@ -153,6 +161,7 @@  int init_se_kmem_caches(void)
 
 void release_se_kmem_caches(void)
 {
+	destroy_workqueue(target_submission_wq);
 	destroy_workqueue(target_completion_wq);
 	kmem_cache_destroy(se_sess_cache);
 	kmem_cache_destroy(se_ua_cache);
@@ -218,6 +227,69 @@  static void target_release_sess_cmd_refcnt(struct percpu_ref *ref)
 	wake_up(&sess->cmd_count_wq);
 }
 
+static void target_queued_submit_work(struct work_struct *work)
+{
+	struct se_sess_cmd_queue *sq =
+				container_of(work, struct se_sess_cmd_queue,
+					     work);
+	struct se_session *se_sess = sq->se_sess;
+	struct se_cmd *se_cmd, *next_cmd;
+	struct llist_node *cmd_list;
+
+	cmd_list = llist_del_all(&sq->cmd_list);
+	if (!cmd_list)
+		/* Previous call took what we were queued to submit */
+		return;
+
+	cmd_list = llist_reverse_order(cmd_list);
+	llist_for_each_entry_safe(se_cmd, next_cmd, cmd_list, se_cmd_list)
+		se_sess->tfo->submit_queued_cmd(se_cmd);
+}
+
+static void target_queue_cmd_work(struct se_sess_cmd_queue *q,
+				  struct se_cmd *se_cmd, int cpu)
+{
+	llist_add(&se_cmd->se_cmd_list, &q->cmd_list);
+	queue_work_on(cpu, target_submission_wq, &q->work);
+}
+
+/**
+ * target_queue_cmd_submit - queue a se_cmd to be executed from the lio wq
+ * @se_sess: cmd's session
+ * @cmd_list: cmd to queue
+ */
+void target_queue_cmd_submit(struct se_session *se_sess, struct se_cmd *se_cmd)
+{
+	int cpu = smp_processor_id();
+
+	target_queue_cmd_work(&se_sess->sq[cpu], se_cmd, cpu);
+}
+EXPORT_SYMBOL_GPL(target_queue_cmd_submit);
+
+static void target_flush_queued_cmds(struct se_session *se_sess)
+{
+	int i;
+
+	if (!se_sess->sq)
+		return;
+
+	for (i = 0; i < se_sess->q_cnt; i++)
+		cancel_work_sync(&se_sess->sq[i].work);
+}
+
+static void target_init_sess_cmd_queues(struct se_session *se_sess,
+					struct se_sess_cmd_queue *q,
+					void (*work_fn)(struct work_struct *work))
+{
+	int i;
+
+	for (i = 0; i < se_sess->q_cnt; i++) {
+		init_llist_head(&q[i].cmd_list);
+		INIT_WORK(&q[i].work, work_fn);
+		q[i].se_sess = se_sess;
+	}
+}
+
 /**
  * transport_init_session - initialize a session object
  * @tfo: target core fabric ops
@@ -228,6 +300,8 @@  static void target_release_sess_cmd_refcnt(struct percpu_ref *ref)
 int transport_init_session(const struct target_core_fabric_ops *tfo,
 			   struct se_session *se_sess)
 {
+	int rc;
+
 	INIT_LIST_HEAD(&se_sess->sess_list);
 	INIT_LIST_HEAD(&se_sess->sess_acl_list);
 	spin_lock_init(&se_sess->sess_cmd_lock);
@@ -235,13 +309,34 @@  int transport_init_session(const struct target_core_fabric_ops *tfo,
 	init_completion(&se_sess->stop_done);
 	atomic_set(&se_sess->stopped, 0);
 	se_sess->tfo = tfo;
-	return percpu_ref_init(&se_sess->cmd_count,
-			       target_release_sess_cmd_refcnt, 0, GFP_KERNEL);
+
+	if (tfo->submit_queued_cmd) {
+		se_sess->sq = kcalloc(nr_cpu_ids, sizeof(*se_sess->sq),
+				      GFP_KERNEL);
+		if (!se_sess->sq)
+			return -ENOMEM;
+
+		se_sess->q_cnt = nr_cpu_ids;
+		target_init_sess_cmd_queues(se_sess, se_sess->sq,
+					    target_queued_submit_work);
+	}
+
+	rc = percpu_ref_init(&se_sess->cmd_count,
+			     target_release_sess_cmd_refcnt, 0, GFP_KERNEL);
+	if (rc)
+		goto free_sq;
+
+	return 0;
+
+free_sq:
+	kfree(se_sess->sq);
+	return rc;
 }
 EXPORT_SYMBOL(transport_init_session);
 
 void transport_uninit_session(struct se_session *se_sess)
 {
+	kfree(se_sess->sq);
 	/*
 	 * Drivers like iscsi and loop do not call target_stop_session
 	 * during session shutdown so we have to drop the ref taken at init
@@ -1385,7 +1480,6 @@  void transport_init_se_cmd(
 {
 	INIT_LIST_HEAD(&cmd->se_delayed_node);
 	INIT_LIST_HEAD(&cmd->se_qf_node);
-	INIT_LIST_HEAD(&cmd->se_cmd_list);
 	INIT_LIST_HEAD(&cmd->state_list);
 	init_completion(&cmd->t_transport_stop_comp);
 	cmd->free_compl = NULL;
@@ -2968,6 +3062,8 @@  void target_wait_for_sess_cmds(struct se_session *se_sess)
 {
 	int ret;
 
+	target_flush_queued_cmds(se_sess);
+
 	WARN_ON_ONCE(!atomic_read(&se_sess->stopped));
 
 	do {
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 50103a22b0e2..97138bff14d1 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -488,7 +488,7 @@  struct se_cmd {
 	/* Only used for internal passthrough and legacy TCM fabric modules */
 	struct se_session	*se_sess;
 	struct se_tmr_req	*se_tmr_req;
-	struct list_head	se_cmd_list;
+	struct llist_node	se_cmd_list;
 	struct completion	*free_compl;
 	struct completion	*abrt_compl;
 	const struct target_core_fabric_ops *se_tfo;
@@ -612,6 +612,12 @@  static inline struct se_node_acl *fabric_stat_to_nacl(struct config_item *item)
 			acl_fabric_stat_group);
 }
 
+struct se_sess_cmd_queue {
+	struct llist_head	cmd_list;
+	struct work_struct	work;
+	struct se_session	*se_sess;
+};
+
 struct se_session {
 	atomic_t		stopped;
 	u64			sess_bin_isid;
@@ -629,6 +635,8 @@  struct se_session {
 	void			*sess_cmd_map;
 	struct sbitmap_queue	sess_tag_pool;
 	const struct target_core_fabric_ops *tfo;
+	struct se_sess_cmd_queue *sq;
+	int			q_cnt;
 };
 
 struct se_device;
diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h
index cdf610838ba5..899948967a65 100644
--- a/include/target/target_core_fabric.h
+++ b/include/target/target_core_fabric.h
@@ -80,6 +80,7 @@  struct target_core_fabric_ops {
 	int (*queue_status)(struct se_cmd *);
 	void (*queue_tm_rsp)(struct se_cmd *);
 	void (*aborted_task)(struct se_cmd *);
+	void (*submit_queued_cmd)(struct se_cmd *);
 	/*
 	 * fabric module calls for target_core_fabric_configfs.c
 	 */
@@ -166,6 +167,8 @@  int	target_submit_tmr(struct se_cmd *se_cmd, struct se_session *se_sess,
 		unsigned char *sense, u64 unpacked_lun,
 		void *fabric_tmr_ptr, unsigned char tm_type,
 		gfp_t, u64, int);
+void	target_queue_cmd_submit(struct se_session *se_sess,
+				struct se_cmd *se_cmd);
 int	transport_handle_cdb_direct(struct se_cmd *);
 sense_reason_t	transport_generic_new_cmd(struct se_cmd *);