@@ -55,7 +55,7 @@ static void transport_complete_task_attr(struct se_cmd *cmd);
static void translate_sense_reason(struct se_cmd *cmd, sense_reason_t reason);
static void transport_handle_queue_full(struct se_cmd *cmd,
struct se_device *dev, int err, bool write_pending);
-static void target_complete_ok_work(struct work_struct *work);
+static void target_queued_compl_work(struct work_struct *work);
int init_se_kmem_caches(void)
{
@@ -295,10 +295,20 @@ static void target_queued_submit_work(struct work_struct *work)
}
static void target_queue_cmd_work(struct se_sess_cmd_queue *q,
- struct se_cmd *se_cmd, int cpu)
+ struct se_cmd *se_cmd, int cpu,
+ struct workqueue_struct *wq)
{
llist_add(&se_cmd->se_cmd_list, &q->cmd_list);
- queue_work_on(cpu, target_submission_wq, &q->work);
+ queue_work_on(cpu, wq, &q->work);
+}
+
+static void target_queue_cmd_compl(struct se_cmd *se_cmd)
+{
+ struct se_session *se_sess = se_cmd->se_sess;
+ int cpu = se_cmd->cpuid;
+
+ target_queue_cmd_work(&se_sess->cq[cpu], se_cmd, cpu,
+ target_completion_wq);
}
/**
@@ -310,7 +320,8 @@ void target_queue_cmd_submit(struct se_session *se_sess, struct se_cmd *se_cmd)
{
int cpu = smp_processor_id();
- target_queue_cmd_work(&se_sess->sq[cpu], se_cmd, cpu);
+ target_queue_cmd_work(&se_sess->sq[cpu], se_cmd, cpu,
+ target_submission_wq);
}
EXPORT_SYMBOL_GPL(target_queue_cmd_submit);
@@ -318,11 +329,13 @@ static void target_flush_queued_cmds(struct se_session *se_sess)
{
int i;
- if (!se_sess->sq)
- return;
+ if (se_sess->sq) {
+ for (i = 0; i < se_sess->q_cnt; i++)
+ cancel_work_sync(&se_sess->sq[i].work);
+ }
for (i = 0; i < se_sess->q_cnt; i++)
- cancel_work_sync(&se_sess->sq[i].work);
+ cancel_work_sync(&se_sess->cq[i].work);
}
static void target_init_sess_cmd_queues(struct se_session *se_sess,
@@ -359,13 +372,21 @@ int transport_init_session(const struct target_core_fabric_ops *tfo,
atomic_set(&se_sess->stopped, 0);
se_sess->tfo = tfo;
+ se_sess->cq = kcalloc(nr_cpu_ids, sizeof(*se_sess->cq), GFP_KERNEL);
+ if (!se_sess->cq)
+ return -ENOMEM;
+ se_sess->q_cnt = nr_cpu_ids;
+ target_init_sess_cmd_queues(se_sess, se_sess->cq,
+ target_queued_compl_work);
+
if (tfo->submit_queued_cmd) {
se_sess->sq = kcalloc(nr_cpu_ids, sizeof(*se_sess->sq),
GFP_KERNEL);
- if (!se_sess->sq)
- return -ENOMEM;
+ if (!se_sess->sq) {
+ rc = -ENOMEM;
+ goto free_cq;
+ }
- se_sess->q_cnt = nr_cpu_ids;
target_init_sess_cmd_queues(se_sess, se_sess->sq,
target_queued_submit_work);
}
@@ -379,12 +400,15 @@ int transport_init_session(const struct target_core_fabric_ops *tfo,
free_sq:
kfree(se_sess->sq);
+free_cq:
+ kfree(se_sess->cq);
return rc;
}
EXPORT_SYMBOL(transport_init_session);
void transport_uninit_session(struct se_session *se_sess)
{
+ kfree(se_sess->cq);
kfree(se_sess->sq);
/*
* Drivers like iscsi and loop do not call target_stop_session
@@ -877,14 +901,6 @@ static void transport_lun_remove_cmd(struct se_cmd *cmd)
percpu_ref_put(&lun->lun_ref);
}
-static void target_complete_failure_work(struct work_struct *work)
-{
- struct se_cmd *cmd = container_of(work, struct se_cmd, work);
-
- transport_generic_request_failure(cmd,
- TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE);
-}
-
/*
* Used when asking transport to copy Sense Data from the underlying
* Linux/SCSI struct scsi_cmnd
@@ -972,13 +988,6 @@ static void target_handle_abort(struct se_cmd *cmd)
transport_cmd_check_stop_to_fabric(cmd);
}
-static void target_abort_work(struct work_struct *work)
-{
- struct se_cmd *cmd = container_of(work, struct se_cmd, work);
-
- target_handle_abort(cmd);
-}
-
static bool target_cmd_interrupted(struct se_cmd *cmd)
{
int post_ret;
@@ -986,8 +995,8 @@ static bool target_cmd_interrupted(struct se_cmd *cmd)
if (cmd->transport_state & CMD_T_ABORTED) {
if (cmd->transport_complete_callback)
cmd->transport_complete_callback(cmd, false, &post_ret);
- INIT_WORK(&cmd->work, target_abort_work);
- queue_work(target_completion_wq, &cmd->work);
+
+ target_queue_cmd_compl(cmd);
return true;
} else if (cmd->transport_state & CMD_T_STOP) {
if (cmd->transport_complete_callback)
@@ -1002,7 +1011,6 @@ static bool target_cmd_interrupted(struct se_cmd *cmd)
/* May be called from interrupt context so must not sleep. */
void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status)
{
- int success;
unsigned long flags;
if (target_cmd_interrupted(cmd))
@@ -1011,25 +1019,11 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status)
cmd->scsi_status = scsi_status;
spin_lock_irqsave(&cmd->t_state_lock, flags);
- switch (cmd->scsi_status) {
- case SAM_STAT_CHECK_CONDITION:
- if (cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE)
- success = 1;
- else
- success = 0;
- break;
- default:
- success = 1;
- break;
- }
-
cmd->t_state = TRANSPORT_COMPLETE;
cmd->transport_state |= (CMD_T_COMPLETE | CMD_T_ACTIVE);
spin_unlock_irqrestore(&cmd->t_state_lock, flags);
- INIT_WORK(&cmd->work, success ? target_complete_ok_work :
- target_complete_failure_work);
- queue_work_on(cmd->cpuid, target_completion_wq, &cmd->work);
+ target_queue_cmd_compl(cmd);
}
EXPORT_SYMBOL(target_complete_cmd);
@@ -2006,8 +2000,7 @@ void transport_generic_request_failure(struct se_cmd *cmd,
cmd->transport_complete_callback(cmd, false, &post_ret);
if (cmd->transport_state & CMD_T_ABORTED) {
- INIT_WORK(&cmd->work, target_abort_work);
- queue_work(target_completion_wq, &cmd->work);
+ target_queue_cmd_compl(cmd);
return;
}
@@ -2433,10 +2426,32 @@ static bool target_read_prot_action(struct se_cmd *cmd)
return false;
}
-static void target_complete_ok_work(struct work_struct *work)
+static void target_complete_cmd_work(struct se_cmd *cmd)
{
- struct se_cmd *cmd = container_of(work, struct se_cmd, work);
- int ret;
+ int ret, success;
+
+ if (cmd->transport_state & CMD_T_ABORTED) {
+ target_handle_abort(cmd);
+ return;
+ }
+
+ switch (cmd->scsi_status) {
+ case SAM_STAT_CHECK_CONDITION:
+ if (cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE)
+ success = 1;
+ else
+ success = 0;
+ break;
+ default:
+ success = 1;
+ break;
+ }
+
+ if (!success) {
+ transport_generic_request_failure(cmd,
+ TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE);
+ return;
+ }
/*
* Check if we need to move delayed/dormant tasks from cmds on the
@@ -2578,6 +2593,19 @@ static void target_complete_ok_work(struct work_struct *work)
transport_handle_queue_full(cmd, cmd->se_dev, ret, false);
}
+static void target_queued_compl_work(struct work_struct *work)
+{
+ struct se_sess_cmd_queue *cq =
+ container_of(work, struct se_sess_cmd_queue,
+ work);
+ struct se_cmd *se_cmd, *next_cmd;
+ struct llist_node *cmd_list;
+
+ cmd_list = llist_del_all(&cq->cmd_list);
+ llist_for_each_entry_safe(se_cmd, next_cmd, cmd_list, se_cmd_list)
+ target_complete_cmd_work(se_cmd);
+}
+
void target_free_sgl(struct scatterlist *sgl, int nents)
{
sgl_free_n_order(sgl, nents, 0);
@@ -643,6 +643,7 @@ struct se_session {
void *sess_cmd_map;
struct sbitmap_queue sess_tag_pool;
const struct target_core_fabric_ops *tfo;
+ struct se_sess_cmd_queue *cq;
struct se_sess_cmd_queue *sq;
int q_cnt;
};
Doing a work per cmd can lead to lots of threads being created. This patch just replaces the completion work per cmd with a list. Combined with the first patches this allows tcm loop with higher perf initiators like iser to go from around 700K IOPs to 1000K and reduces the number of threads that get created when the system is under heavy load and hitting the initiator drivers tagging limits. Signed-off-by: Mike Christie <michael.christie@oracle.com> --- drivers/target/target_core_transport.c | 124 +++++++++++++++---------- include/target/target_core_base.h | 1 + 2 files changed, 77 insertions(+), 48 deletions(-)