Message ID | 20210204113513.93204-3-michael.christie@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | target: fix cmd plugging and completion | expand |
On 2/4/21 03:41, Mike Christie wrote: > loop and vhost-scsi do their target cmd submission from driver > workqueues. This allows them to avoid an issue where the backend may > block waiting for resources like tags/requests, mem/locks, etc > and that ends up blocking their entire submission path and for the > case of vhost-scsi both the submission and completion path. > > This patch adds a helper these drivers can use to submit from the > lio workqueue. This code will then be extended in the next patches > to fix the plugging of backend devices. > > Signed-off-by: Mike Christie <michael.christie@oracle.com> > --- > drivers/target/target_core_transport.c | 102 ++++++++++++++++++++++++- > include/target/target_core_base.h | 10 ++- > include/target/target_core_fabric.h | 3 + > 3 files changed, 111 insertions(+), 4 deletions(-) > > diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c > index 7c5d37bac561..dec89e911348 100644 > --- a/drivers/target/target_core_transport.c > +++ b/drivers/target/target_core_transport.c > @@ -41,6 +41,7 @@ > #include <trace/events/target.h> > > static struct workqueue_struct *target_completion_wq; > +static struct workqueue_struct *target_submission_wq; > static struct kmem_cache *se_sess_cache; > struct kmem_cache *se_ua_cache; > struct kmem_cache *t10_pr_reg_cache; > @@ -129,8 +130,15 @@ int init_se_kmem_caches(void) > if (!target_completion_wq) > goto out_free_lba_map_mem_cache; > > + target_submission_wq = alloc_workqueue("target_submission", > + WQ_MEM_RECLAIM, 0); > + if (!target_submission_wq) > + goto out_free_completion_wq; > + > return 0; > > +out_free_completion_wq: > + destroy_workqueue(target_completion_wq); > out_free_lba_map_mem_cache: > kmem_cache_destroy(t10_alua_lba_map_mem_cache); > out_free_lba_map_cache: > @@ -153,6 +161,7 @@ int init_se_kmem_caches(void) > > void release_se_kmem_caches(void) > { > + destroy_workqueue(target_submission_wq); > destroy_workqueue(target_completion_wq); > kmem_cache_destroy(se_sess_cache); > kmem_cache_destroy(se_ua_cache); > @@ -218,6 +227,69 @@ static void target_release_sess_cmd_refcnt(struct percpu_ref *ref) > wake_up(&sess->cmd_count_wq); > } > > +static void target_queued_submit_work(struct work_struct *work) > +{ > + struct se_sess_cmd_queue *sq = > + container_of(work, struct se_sess_cmd_queue, > + work); > + struct se_session *se_sess = sq->se_sess; > + struct se_cmd *se_cmd, *next_cmd; > + struct llist_node *cmd_list; > + > + cmd_list = llist_del_all(&sq->cmd_list); > + if (!cmd_list) > + /* Previous call took what we were queued to submit */ > + return; > + > + cmd_list = llist_reverse_order(cmd_list); > + llist_for_each_entry_safe(se_cmd, next_cmd, cmd_list, se_cmd_list) > + se_sess->tfo->submit_queued_cmd(se_cmd); > +} > + > +static void target_queue_cmd_work(struct se_sess_cmd_queue *q, > + struct se_cmd *se_cmd, int cpu) > +{ > + llist_add(&se_cmd->se_cmd_list, &q->cmd_list); > + queue_work_on(cpu, target_submission_wq, &q->work); > +} > + > +/** > + * target_queue_cmd_submit - queue a se_cmd to be executed from the lio wq > + * @se_sess: cmd's session > + * @cmd_list: cmd to queue > + */ > +void target_queue_cmd_submit(struct se_session *se_sess, struct se_cmd *se_cmd) > +{ > + int cpu = smp_processor_id(); > + > + target_queue_cmd_work(&se_sess->sq[cpu], se_cmd, cpu); > +} > +EXPORT_SYMBOL_GPL(target_queue_cmd_submit); > + > +static void target_flush_queued_cmds(struct se_session *se_sess) > +{ > + int i; > + > + if (!se_sess->sq) > + return; > + > + for (i = 0; i < se_sess->q_cnt; i++) > + cancel_work_sync(&se_sess->sq[i].work); > +} > + > +static void target_init_sess_cmd_queues(struct se_session *se_sess, > + struct se_sess_cmd_queue *q, > + void (*work_fn)(struct work_struct *work)) > +{ > + int i; > + > + for (i = 0; i < se_sess->q_cnt; i++) { > + init_llist_head(&q[i].cmd_list); > + INIT_WORK(&q[i].work, work_fn); > + q[i].se_sess = se_sess; > + } > +} > + Can we opencode above function if there is only one caller ? unless there is a specific reason to have it on its own which I failed to understand. > /** > * transport_init_session - initialize a session object > * @tfo: target core fabric ops > @@ -228,6 +300,8 @@ static void target_release_sess_cmd_refcnt(struct percpu_ref *ref) > int transport_init_session(const struct target_core_fabric_ops *tfo, > struct se_session *se_sess) > { > + int rc; > + > INIT_LIST_HEAD(&se_sess->sess_list); > INIT_LIST_HEAD(&se_sess->sess_acl_list); > spin_lock_init(&se_sess->sess_cmd_lock); > @@ -235,13 +309,34 @@ int transport_init_session(const struct target_core_fabric_ops *tfo, > init_completion(&se_sess->stop_done); > atomic_set(&se_sess->stopped, 0); > se_sess->tfo = tfo; > - return percpu_ref_init(&se_sess->cmd_count, > - target_release_sess_cmd_refcnt, 0, GFP_KERNEL); > + > + if (tfo->submit_queued_cmd) { > + se_sess->sq = kcalloc(nr_cpu_ids, sizeof(*se_sess->sq), > + GFP_KERNEL); > + if (!se_sess->sq) > + return -ENOMEM; > + > + se_sess->q_cnt = nr_cpu_ids; > + target_init_sess_cmd_queues(se_sess, se_sess->sq, > + target_queued_submit_work); > + } > + > + rc = percpu_ref_init(&se_sess->cmd_count, > + target_release_sess_cmd_refcnt, 0, GFP_KERNEL); > + if (rc) > + goto free_sq; > + > + return 0; > + > +free_sq: > + kfree(se_sess->sq); > + return rc; > } > EXPORT_SYMBOL(transport_init_session); > > void transport_uninit_session(struct se_session *se_sess) > { > + kfree(se_sess->sq); > /* > * Drivers like iscsi and loop do not call target_stop_session > * during session shutdown so we have to drop the ref taken at init > @@ -1385,7 +1480,6 @@ void transport_init_se_cmd( > { > INIT_LIST_HEAD(&cmd->se_delayed_node); > INIT_LIST_HEAD(&cmd->se_qf_node); > - INIT_LIST_HEAD(&cmd->se_cmd_list); > INIT_LIST_HEAD(&cmd->state_list); > init_completion(&cmd->t_transport_stop_comp); > cmd->free_compl = NULL; > @@ -2968,6 +3062,8 @@ void target_wait_for_sess_cmds(struct se_session *se_sess) > { > int ret; > > + target_flush_queued_cmds(se_sess); > + > WARN_ON_ONCE(!atomic_read(&se_sess->stopped)); > > do { > diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h > index 50103a22b0e2..97138bff14d1 100644 > --- a/include/target/target_core_base.h > +++ b/include/target/target_core_base.h > @@ -488,7 +488,7 @@ struct se_cmd { > /* Only used for internal passthrough and legacy TCM fabric modules */ > struct se_session *se_sess; > struct se_tmr_req *se_tmr_req; > - struct list_head se_cmd_list; > + struct llist_node se_cmd_list; > struct completion *free_compl; > struct completion *abrt_compl; > const struct target_core_fabric_ops *se_tfo; > @@ -612,6 +612,12 @@ static inline struct se_node_acl *fabric_stat_to_nacl(struct config_item *item) > acl_fabric_stat_group); > } > > +struct se_sess_cmd_queue { > + struct llist_head cmd_list; > + struct work_struct work; > + struct se_session *se_sess; > +}; > + > struct se_session { > atomic_t stopped; > u64 sess_bin_isid; > @@ -629,6 +635,8 @@ struct se_session { > void *sess_cmd_map; > struct sbitmap_queue sess_tag_pool; > const struct target_core_fabric_ops *tfo; > + struct se_sess_cmd_queue *sq; > + int q_cnt; > }; > > struct se_device; > diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h > index cdf610838ba5..899948967a65 100644 > --- a/include/target/target_core_fabric.h > +++ b/include/target/target_core_fabric.h > @@ -80,6 +80,7 @@ struct target_core_fabric_ops { > int (*queue_status)(struct se_cmd *); > void (*queue_tm_rsp)(struct se_cmd *); > void (*aborted_task)(struct se_cmd *); > + void (*submit_queued_cmd)(struct se_cmd *); > /* > * fabric module calls for target_core_fabric_configfs.c > */ > @@ -166,6 +167,8 @@ int target_submit_tmr(struct se_cmd *se_cmd, struct se_session *se_sess, > unsigned char *sense, u64 unpacked_lun, > void *fabric_tmr_ptr, unsigned char tm_type, > gfp_t, u64, int); > +void target_queue_cmd_submit(struct se_session *se_sess, > + struct se_cmd *se_cmd); > int transport_handle_cdb_direct(struct se_cmd *); > sense_reason_t transport_generic_new_cmd(struct se_cmd *); >
On 2/4/21 5:13 PM, Chaitanya Kulkarni wrote: > On 2/4/21 03:41, Mike Christie wrote: >> loop and vhost-scsi do their target cmd submission from driver >> workqueues. This allows them to avoid an issue where the backend may >> block waiting for resources like tags/requests, mem/locks, etc >> and that ends up blocking their entire submission path and for the >> case of vhost-scsi both the submission and completion path. >> >> This patch adds a helper these drivers can use to submit from the >> lio workqueue. This code will then be extended in the next patches >> to fix the plugging of backend devices. >> >> Signed-off-by: Mike Christie <michael.christie@oracle.com> >> --- >> drivers/target/target_core_transport.c | 102 ++++++++++++++++++++++++- >> include/target/target_core_base.h | 10 ++- >> include/target/target_core_fabric.h | 3 + >> 3 files changed, 111 insertions(+), 4 deletions(-) >> >> diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c >> index 7c5d37bac561..dec89e911348 100644 >> --- a/drivers/target/target_core_transport.c >> +++ b/drivers/target/target_core_transport.c >> @@ -41,6 +41,7 @@ >> #include <trace/events/target.h> >> >> static struct workqueue_struct *target_completion_wq; >> +static struct workqueue_struct *target_submission_wq; >> static struct kmem_cache *se_sess_cache; >> struct kmem_cache *se_ua_cache; >> struct kmem_cache *t10_pr_reg_cache; >> @@ -129,8 +130,15 @@ int init_se_kmem_caches(void) >> if (!target_completion_wq) >> goto out_free_lba_map_mem_cache; >> >> + target_submission_wq = alloc_workqueue("target_submission", >> + WQ_MEM_RECLAIM, 0); >> + if (!target_submission_wq) >> + goto out_free_completion_wq; >> + >> return 0; >> >> +out_free_completion_wq: >> + destroy_workqueue(target_completion_wq); >> out_free_lba_map_mem_cache: >> kmem_cache_destroy(t10_alua_lba_map_mem_cache); >> out_free_lba_map_cache: >> @@ -153,6 +161,7 @@ int init_se_kmem_caches(void) >> >> void release_se_kmem_caches(void) >> { >> + destroy_workqueue(target_submission_wq); >> destroy_workqueue(target_completion_wq); >> kmem_cache_destroy(se_sess_cache); >> kmem_cache_destroy(se_ua_cache); >> @@ -218,6 +227,69 @@ static void target_release_sess_cmd_refcnt(struct percpu_ref *ref) >> wake_up(&sess->cmd_count_wq); >> } >> >> +static void target_queued_submit_work(struct work_struct *work) >> +{ >> + struct se_sess_cmd_queue *sq = >> + container_of(work, struct se_sess_cmd_queue, >> + work); >> + struct se_session *se_sess = sq->se_sess; >> + struct se_cmd *se_cmd, *next_cmd; >> + struct llist_node *cmd_list; >> + >> + cmd_list = llist_del_all(&sq->cmd_list); >> + if (!cmd_list) >> + /* Previous call took what we were queued to submit */ >> + return; >> + >> + cmd_list = llist_reverse_order(cmd_list); >> + llist_for_each_entry_safe(se_cmd, next_cmd, cmd_list, se_cmd_list) >> + se_sess->tfo->submit_queued_cmd(se_cmd); >> +} >> + >> +static void target_queue_cmd_work(struct se_sess_cmd_queue *q, >> + struct se_cmd *se_cmd, int cpu) >> +{ >> + llist_add(&se_cmd->se_cmd_list, &q->cmd_list); >> + queue_work_on(cpu, target_submission_wq, &q->work); >> +} >> + >> +/** >> + * target_queue_cmd_submit - queue a se_cmd to be executed from the lio wq >> + * @se_sess: cmd's session >> + * @cmd_list: cmd to queue >> + */ >> +void target_queue_cmd_submit(struct se_session *se_sess, struct se_cmd *se_cmd) >> +{ >> + int cpu = smp_processor_id(); >> + >> + target_queue_cmd_work(&se_sess->sq[cpu], se_cmd, cpu); >> +} >> +EXPORT_SYMBOL_GPL(target_queue_cmd_submit); >> + >> +static void target_flush_queued_cmds(struct se_session *se_sess) >> +{ >> + int i; >> + >> + if (!se_sess->sq) >> + return; >> + >> + for (i = 0; i < se_sess->q_cnt; i++) >> + cancel_work_sync(&se_sess->sq[i].work); >> +} >> + >> +static void target_init_sess_cmd_queues(struct se_session *se_sess, >> + struct se_sess_cmd_queue *q, >> + void (*work_fn)(struct work_struct *work)) >> +{ >> + int i; >> + >> + for (i = 0; i < se_sess->q_cnt; i++) { >> + init_llist_head(&q[i].cmd_list); >> + INIT_WORK(&q[i].work, work_fn); >> + q[i].se_sess = se_sess; >> + } >> +} >> + > Can we opencode above function if there is only one caller ? > unless there is a specific reason to have it on its own which I failed to > understand. Patch 10 also calls it. I tried to say that in the end of the patch description but it was not too clear now that I read it again. I couldn't decide if I should do it now or later. I selected now since it made the 10th pach smaller.
On 2/4/21 16:44, michael.christie@oracle.com wrote: >>> + >> Can we opencode above function if there is only one caller ? >> unless there is a specific reason to have it on its own which I failed to >> understand. > Patch 10 also calls it. I tried to say that in the end of the patch > description but it was not too clear now that I read it again. > > I couldn't decide if I should do it now or later. I selected now since > it made the 10th pach smaller. > if it does then fine.
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 7c5d37bac561..dec89e911348 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -41,6 +41,7 @@ #include <trace/events/target.h> static struct workqueue_struct *target_completion_wq; +static struct workqueue_struct *target_submission_wq; static struct kmem_cache *se_sess_cache; struct kmem_cache *se_ua_cache; struct kmem_cache *t10_pr_reg_cache; @@ -129,8 +130,15 @@ int init_se_kmem_caches(void) if (!target_completion_wq) goto out_free_lba_map_mem_cache; + target_submission_wq = alloc_workqueue("target_submission", + WQ_MEM_RECLAIM, 0); + if (!target_submission_wq) + goto out_free_completion_wq; + return 0; +out_free_completion_wq: + destroy_workqueue(target_completion_wq); out_free_lba_map_mem_cache: kmem_cache_destroy(t10_alua_lba_map_mem_cache); out_free_lba_map_cache: @@ -153,6 +161,7 @@ int init_se_kmem_caches(void) void release_se_kmem_caches(void) { + destroy_workqueue(target_submission_wq); destroy_workqueue(target_completion_wq); kmem_cache_destroy(se_sess_cache); kmem_cache_destroy(se_ua_cache); @@ -218,6 +227,69 @@ static void target_release_sess_cmd_refcnt(struct percpu_ref *ref) wake_up(&sess->cmd_count_wq); } +static void target_queued_submit_work(struct work_struct *work) +{ + struct se_sess_cmd_queue *sq = + container_of(work, struct se_sess_cmd_queue, + work); + struct se_session *se_sess = sq->se_sess; + struct se_cmd *se_cmd, *next_cmd; + struct llist_node *cmd_list; + + cmd_list = llist_del_all(&sq->cmd_list); + if (!cmd_list) + /* Previous call took what we were queued to submit */ + return; + + cmd_list = llist_reverse_order(cmd_list); + llist_for_each_entry_safe(se_cmd, next_cmd, cmd_list, se_cmd_list) + se_sess->tfo->submit_queued_cmd(se_cmd); +} + +static void target_queue_cmd_work(struct se_sess_cmd_queue *q, + struct se_cmd *se_cmd, int cpu) +{ + llist_add(&se_cmd->se_cmd_list, &q->cmd_list); + queue_work_on(cpu, target_submission_wq, &q->work); +} + +/** + * target_queue_cmd_submit - queue a se_cmd to be executed from the lio wq + * @se_sess: cmd's session + * @cmd_list: cmd to queue + */ +void target_queue_cmd_submit(struct se_session *se_sess, struct se_cmd *se_cmd) +{ + int cpu = smp_processor_id(); + + target_queue_cmd_work(&se_sess->sq[cpu], se_cmd, cpu); +} +EXPORT_SYMBOL_GPL(target_queue_cmd_submit); + +static void target_flush_queued_cmds(struct se_session *se_sess) +{ + int i; + + if (!se_sess->sq) + return; + + for (i = 0; i < se_sess->q_cnt; i++) + cancel_work_sync(&se_sess->sq[i].work); +} + +static void target_init_sess_cmd_queues(struct se_session *se_sess, + struct se_sess_cmd_queue *q, + void (*work_fn)(struct work_struct *work)) +{ + int i; + + for (i = 0; i < se_sess->q_cnt; i++) { + init_llist_head(&q[i].cmd_list); + INIT_WORK(&q[i].work, work_fn); + q[i].se_sess = se_sess; + } +} + /** * transport_init_session - initialize a session object * @tfo: target core fabric ops @@ -228,6 +300,8 @@ static void target_release_sess_cmd_refcnt(struct percpu_ref *ref) int transport_init_session(const struct target_core_fabric_ops *tfo, struct se_session *se_sess) { + int rc; + INIT_LIST_HEAD(&se_sess->sess_list); INIT_LIST_HEAD(&se_sess->sess_acl_list); spin_lock_init(&se_sess->sess_cmd_lock); @@ -235,13 +309,34 @@ int transport_init_session(const struct target_core_fabric_ops *tfo, init_completion(&se_sess->stop_done); atomic_set(&se_sess->stopped, 0); se_sess->tfo = tfo; - return percpu_ref_init(&se_sess->cmd_count, - target_release_sess_cmd_refcnt, 0, GFP_KERNEL); + + if (tfo->submit_queued_cmd) { + se_sess->sq = kcalloc(nr_cpu_ids, sizeof(*se_sess->sq), + GFP_KERNEL); + if (!se_sess->sq) + return -ENOMEM; + + se_sess->q_cnt = nr_cpu_ids; + target_init_sess_cmd_queues(se_sess, se_sess->sq, + target_queued_submit_work); + } + + rc = percpu_ref_init(&se_sess->cmd_count, + target_release_sess_cmd_refcnt, 0, GFP_KERNEL); + if (rc) + goto free_sq; + + return 0; + +free_sq: + kfree(se_sess->sq); + return rc; } EXPORT_SYMBOL(transport_init_session); void transport_uninit_session(struct se_session *se_sess) { + kfree(se_sess->sq); /* * Drivers like iscsi and loop do not call target_stop_session * during session shutdown so we have to drop the ref taken at init @@ -1385,7 +1480,6 @@ void transport_init_se_cmd( { INIT_LIST_HEAD(&cmd->se_delayed_node); INIT_LIST_HEAD(&cmd->se_qf_node); - INIT_LIST_HEAD(&cmd->se_cmd_list); INIT_LIST_HEAD(&cmd->state_list); init_completion(&cmd->t_transport_stop_comp); cmd->free_compl = NULL; @@ -2968,6 +3062,8 @@ void target_wait_for_sess_cmds(struct se_session *se_sess) { int ret; + target_flush_queued_cmds(se_sess); + WARN_ON_ONCE(!atomic_read(&se_sess->stopped)); do { diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 50103a22b0e2..97138bff14d1 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -488,7 +488,7 @@ struct se_cmd { /* Only used for internal passthrough and legacy TCM fabric modules */ struct se_session *se_sess; struct se_tmr_req *se_tmr_req; - struct list_head se_cmd_list; + struct llist_node se_cmd_list; struct completion *free_compl; struct completion *abrt_compl; const struct target_core_fabric_ops *se_tfo; @@ -612,6 +612,12 @@ static inline struct se_node_acl *fabric_stat_to_nacl(struct config_item *item) acl_fabric_stat_group); } +struct se_sess_cmd_queue { + struct llist_head cmd_list; + struct work_struct work; + struct se_session *se_sess; +}; + struct se_session { atomic_t stopped; u64 sess_bin_isid; @@ -629,6 +635,8 @@ struct se_session { void *sess_cmd_map; struct sbitmap_queue sess_tag_pool; const struct target_core_fabric_ops *tfo; + struct se_sess_cmd_queue *sq; + int q_cnt; }; struct se_device; diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index cdf610838ba5..899948967a65 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -80,6 +80,7 @@ struct target_core_fabric_ops { int (*queue_status)(struct se_cmd *); void (*queue_tm_rsp)(struct se_cmd *); void (*aborted_task)(struct se_cmd *); + void (*submit_queued_cmd)(struct se_cmd *); /* * fabric module calls for target_core_fabric_configfs.c */ @@ -166,6 +167,8 @@ int target_submit_tmr(struct se_cmd *se_cmd, struct se_session *se_sess, unsigned char *sense, u64 unpacked_lun, void *fabric_tmr_ptr, unsigned char tm_type, gfp_t, u64, int); +void target_queue_cmd_submit(struct se_session *se_sess, + struct se_cmd *se_cmd); int transport_handle_cdb_direct(struct se_cmd *); sense_reason_t transport_generic_new_cmd(struct se_cmd *);
loop and vhost-scsi do their target cmd submission from driver workqueues. This allows them to avoid an issue where the backend may block waiting for resources like tags/requests, mem/locks, etc and that ends up blocking their entire submission path and for the case of vhost-scsi both the submission and completion path. This patch adds a helper these drivers can use to submit from the lio workqueue. This code will then be extended in the next patches to fix the plugging of backend devices. Signed-off-by: Mike Christie <michael.christie@oracle.com> --- drivers/target/target_core_transport.c | 102 ++++++++++++++++++++++++- include/target/target_core_base.h | 10 ++- include/target/target_core_fabric.h | 3 + 3 files changed, 111 insertions(+), 4 deletions(-)