@@ -122,7 +122,7 @@ void retransmit_timer(struct timer_list *t)
spin_lock_irqsave(&qp->state_lock, flags);
if (qp->valid) {
qp->comp.timeout = 1;
- rxe_sched_task(&qp->comp.task);
+ rxe_sched_task(&qp->send_task);
}
spin_unlock_irqrestore(&qp->state_lock, flags);
}
@@ -133,14 +133,14 @@ void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb)
must_sched = skb_queue_len(&qp->resp_pkts) > 0;
if (must_sched != 0)
- rxe_counter_inc(SKB_TO_PKT(skb)->rxe, RXE_CNT_COMPLETER_SCHED);
+ rxe_counter_inc(SKB_TO_PKT(skb)->rxe, RXE_CNT_SENDER_SCHED);
skb_queue_tail(&qp->resp_pkts, skb);
if (must_sched)
- rxe_sched_task(&qp->comp.task);
+ rxe_sched_task(&qp->send_task);
else
- rxe_run_task(&qp->comp.task);
+ rxe_run_task(&qp->send_task);
}
static inline enum comp_state get_wqe(struct rxe_qp *qp,
@@ -325,7 +325,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
qp->comp.psn = pkt->psn;
if (qp->req.wait_psn) {
qp->req.wait_psn = 0;
- rxe_sched_task(&qp->req.task);
+ rxe_sched_task(&qp->send_task);
}
}
return COMPST_ERROR_RETRY;
@@ -476,7 +476,7 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
*/
if (qp->req.wait_fence) {
qp->req.wait_fence = 0;
- rxe_sched_task(&qp->req.task);
+ rxe_sched_task(&qp->send_task);
}
}
@@ -515,7 +515,7 @@ static inline enum comp_state complete_ack(struct rxe_qp *qp,
if (qp->req.need_rd_atomic) {
qp->comp.timeout_retry = 0;
qp->req.need_rd_atomic = 0;
- rxe_sched_task(&qp->req.task);
+ rxe_sched_task(&qp->send_task);
}
}
@@ -541,7 +541,7 @@ static inline enum comp_state complete_wqe(struct rxe_qp *qp,
if (qp->req.wait_psn) {
qp->req.wait_psn = 0;
- rxe_sched_task(&qp->req.task);
+ rxe_sched_task(&qp->send_task);
}
}
@@ -737,7 +737,7 @@ int rxe_completer(struct rxe_qp *qp)
if (qp->req.wait_psn) {
qp->req.wait_psn = 0;
- rxe_sched_task(&qp->req.task);
+ rxe_sched_task(&qp->send_task);
}
state = COMPST_DONE;
@@ -792,7 +792,7 @@ int rxe_completer(struct rxe_qp *qp)
RXE_CNT_COMP_RETRY);
qp->req.need_retry = 1;
qp->comp.started_retry = 1;
- rxe_sched_task(&qp->req.task);
+ rxe_sched_task(&qp->send_task);
}
goto done;
@@ -14,7 +14,7 @@ static const struct rdma_stat_desc rxe_counter_descs[] = {
[RXE_CNT_RCV_RNR].name = "rcvd_rnr_err",
[RXE_CNT_SND_RNR].name = "send_rnr_err",
[RXE_CNT_RCV_SEQ_ERR].name = "rcvd_seq_err",
- [RXE_CNT_COMPLETER_SCHED].name = "ack_deferred",
+ [RXE_CNT_SENDER_SCHED].name = "ack_deferred",
[RXE_CNT_RETRY_EXCEEDED].name = "retry_exceeded_err",
[RXE_CNT_RNR_RETRY_EXCEEDED].name = "retry_rnr_exceeded_err",
[RXE_CNT_COMP_RETRY].name = "completer_retry_err",
@@ -18,7 +18,7 @@ enum rxe_counters {
RXE_CNT_RCV_RNR,
RXE_CNT_SND_RNR,
RXE_CNT_RCV_SEQ_ERR,
- RXE_CNT_COMPLETER_SCHED,
+ RXE_CNT_SENDER_SCHED,
RXE_CNT_RETRY_EXCEEDED,
RXE_CNT_RNR_RETRY_EXCEEDED,
RXE_CNT_COMP_RETRY,
@@ -164,7 +164,8 @@ void rxe_dealloc(struct ib_device *ib_dev);
int rxe_completer(struct rxe_qp *qp);
int rxe_requester(struct rxe_qp *qp);
-int rxe_responder(struct rxe_qp *qp);
+int rxe_sender(struct rxe_qp *qp);
+int rxe_receiver(struct rxe_qp *qp);
/* rxe_icrc.c */
int rxe_icrc_init(struct rxe_dev *rxe);
@@ -351,7 +351,7 @@ static void rxe_skb_tx_dtor(struct sk_buff *skb)
if (unlikely(qp->need_req_skb &&
skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW))
- rxe_sched_task(&qp->req.task);
+ rxe_sched_task(&qp->send_task);
rxe_put(qp);
}
@@ -443,7 +443,7 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
if ((qp_type(qp) != IB_QPT_RC) &&
(pkt->mask & RXE_END_MASK)) {
pkt->wqe->state = wqe_state_done;
- rxe_sched_task(&qp->comp.task);
+ rxe_sched_task(&qp->send_task);
}
rxe_counter_inc(rxe, RXE_CNT_SENT_PKTS);
@@ -265,8 +265,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
qp->req.opcode = -1;
qp->comp.opcode = -1;
- rxe_init_task(&qp->req.task, qp, rxe_requester);
- rxe_init_task(&qp->comp.task, qp, rxe_completer);
+ rxe_init_task(&qp->send_task, qp, rxe_sender);
qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */
if (init->qp_type == IB_QPT_RC) {
@@ -337,7 +336,7 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
return err;
}
- rxe_init_task(&qp->resp.task, qp, rxe_responder);
+ rxe_init_task(&qp->recv_task, qp, rxe_receiver);
qp->resp.opcode = OPCODE_NONE;
qp->resp.msn = 0;
@@ -514,14 +513,12 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
static void rxe_qp_reset(struct rxe_qp *qp)
{
/* stop tasks from running */
- rxe_disable_task(&qp->resp.task);
- rxe_disable_task(&qp->comp.task);
- rxe_disable_task(&qp->req.task);
+ rxe_disable_task(&qp->recv_task);
+ rxe_disable_task(&qp->send_task);
/* drain work and packet queuesc */
- rxe_requester(qp);
- rxe_completer(qp);
- rxe_responder(qp);
+ rxe_sender(qp);
+ rxe_receiver(qp);
if (qp->rq.queue)
rxe_queue_reset(qp->rq.queue);
@@ -548,9 +545,8 @@ static void rxe_qp_reset(struct rxe_qp *qp)
cleanup_rd_atomic_resources(qp);
/* reenable tasks */
- rxe_enable_task(&qp->resp.task);
- rxe_enable_task(&qp->comp.task);
- rxe_enable_task(&qp->req.task);
+ rxe_enable_task(&qp->recv_task);
+ rxe_enable_task(&qp->send_task);
}
/* move the qp to the error state */
@@ -562,9 +558,8 @@ void rxe_qp_error(struct rxe_qp *qp)
qp->attr.qp_state = IB_QPS_ERR;
/* drain work and packet queues */
- rxe_sched_task(&qp->resp.task);
- rxe_sched_task(&qp->comp.task);
- rxe_sched_task(&qp->req.task);
+ rxe_sched_task(&qp->recv_task);
+ rxe_sched_task(&qp->send_task);
spin_unlock_irqrestore(&qp->state_lock, flags);
}
@@ -575,8 +570,7 @@ static void rxe_qp_sqd(struct rxe_qp *qp, struct ib_qp_attr *attr,
spin_lock_irqsave(&qp->state_lock, flags);
qp->attr.sq_draining = 1;
- rxe_sched_task(&qp->comp.task);
- rxe_sched_task(&qp->req.task);
+ rxe_sched_task(&qp->send_task);
spin_unlock_irqrestore(&qp->state_lock, flags);
}
@@ -821,19 +815,15 @@ static void rxe_qp_do_cleanup(struct work_struct *work)
del_timer_sync(&qp->rnr_nak_timer);
}
- if (qp->resp.task.func)
- rxe_cleanup_task(&qp->resp.task);
+ if (qp->recv_task.func)
+ rxe_cleanup_task(&qp->recv_task);
- if (qp->req.task.func)
- rxe_cleanup_task(&qp->req.task);
-
- if (qp->comp.task.func)
- rxe_cleanup_task(&qp->comp.task);
+ if (qp->send_task.func)
+ rxe_cleanup_task(&qp->send_task);
/* flush out any receive wr's or pending requests */
- rxe_requester(qp);
- rxe_completer(qp);
- rxe_responder(qp);
+ rxe_sender(qp);
+ rxe_receiver(qp);
if (qp->sq.queue)
rxe_queue_cleanup(qp->sq.queue);
@@ -108,7 +108,7 @@ void rnr_nak_timer(struct timer_list *t)
/* request a send queue retry */
qp->req.need_retry = 1;
qp->req.wait_for_rnr_timer = 0;
- rxe_sched_task(&qp->req.task);
+ rxe_sched_task(&qp->send_task);
}
spin_unlock_irqrestore(&qp->state_lock, flags);
}
@@ -659,7 +659,7 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
* which can lead to a deadlock. So go ahead and complete
* it now.
*/
- rxe_sched_task(&qp->comp.task);
+ rxe_sched_task(&qp->send_task);
return 0;
}
@@ -786,7 +786,7 @@ int rxe_requester(struct rxe_qp *qp)
qp->req.wqe_index);
wqe->state = wqe_state_done;
wqe->status = IB_WC_SUCCESS;
- rxe_sched_task(&qp->comp.task);
+ rxe_sched_task(&qp->send_task);
goto done;
}
payload = mtu;
@@ -855,7 +855,7 @@ int rxe_requester(struct rxe_qp *qp)
*/
qp->need_req_skb = 1;
- rxe_sched_task(&qp->req.task);
+ rxe_sched_task(&qp->send_task);
goto exit;
}
@@ -878,3 +878,20 @@ int rxe_requester(struct rxe_qp *qp)
out:
return ret;
}
+
+int rxe_sender(struct rxe_qp *qp)
+{
+ int req_ret;
+ int comp_ret;
+
+ /* process the send queue */
+ req_ret = rxe_requester(qp);
+
+ /* process the response queue */
+ comp_ret = rxe_completer(qp);
+
+ /* exit the task loop if both requester and completer
+ * are ready
+ */
+ return (req_ret && comp_ret) ? -EAGAIN : 0;
+}
@@ -58,9 +58,9 @@ void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb)
(skb_queue_len(&qp->req_pkts) > 1);
if (must_sched)
- rxe_sched_task(&qp->resp.task);
+ rxe_sched_task(&qp->recv_task);
else
- rxe_run_task(&qp->resp.task);
+ rxe_run_task(&qp->recv_task);
}
static inline enum resp_states get_req(struct rxe_qp *qp,
@@ -1485,7 +1485,7 @@ static void flush_recv_queue(struct rxe_qp *qp, bool notify)
qp->resp.wqe = NULL;
}
-int rxe_responder(struct rxe_qp *qp)
+int rxe_receiver(struct rxe_qp *qp)
{
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
enum resp_states state;
@@ -905,7 +905,7 @@ static int rxe_post_send_kernel(struct rxe_qp *qp,
/* kickoff processing of any posted wqes */
if (good)
- rxe_sched_task(&qp->req.task);
+ rxe_sched_task(&qp->send_task);
return err;
}
@@ -935,7 +935,7 @@ static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
if (qp->is_user) {
/* Utilize process context to do protocol processing */
- rxe_run_task(&qp->req.task);
+ rxe_run_task(&qp->send_task);
} else {
err = rxe_post_send_kernel(qp, wr, bad_wr);
if (err)
@@ -1045,7 +1045,7 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
spin_lock_irqsave(&qp->state_lock, flags);
if (qp_state(qp) == IB_QPS_ERR)
- rxe_sched_task(&qp->resp.task);
+ rxe_sched_task(&qp->recv_task);
spin_unlock_irqrestore(&qp->state_lock, flags);
return err;
@@ -113,7 +113,6 @@ struct rxe_req_info {
int need_retry;
int wait_for_rnr_timer;
int noack_pkts;
- struct rxe_task task;
};
struct rxe_comp_info {
@@ -124,7 +123,6 @@ struct rxe_comp_info {
int started_retry;
u32 retry_cnt;
u32 rnr_retry;
- struct rxe_task task;
};
enum rdatm_res_state {
@@ -196,7 +194,6 @@ struct rxe_resp_info {
unsigned int res_head;
unsigned int res_tail;
struct resp_res *res;
- struct rxe_task task;
};
struct rxe_qp {
@@ -229,6 +226,9 @@ struct rxe_qp {
struct sk_buff_head req_pkts;
struct sk_buff_head resp_pkts;
+ struct rxe_task send_task;
+ struct rxe_task recv_task;
+
struct rxe_req_info req;
struct rxe_comp_info comp;
struct rxe_resp_info resp;
Currently the rxe driver has three work queue tasks per qp. These are the req.task, comp.task and resp.task which call rxe_requester(), rxe_completer() and rxe_responder() respectively directly or on work queues. Each of these subroutines checks to see if there is work to be performed on the send queue or on the response packet queue or the request packet queue and will run until there is no work remaining or yield the cpu and reschedule itself until there is no work remaining. This commit combines the req.task and comp.task into a single send.task and renames the resp.task to the recv.task. The combined send.task calls rxe_requester() and rxe_completer() serially and continues until all work on both the send queue and the response packet queue are done. In various benchmarks the performance is either improved or left the same. At high scale there is a significant reduction in the load on the cpu. This is the first step in combining these two tasks. Once they are serialized cross rescheduling of req.task and comp.task can be more efficiently handled by just letting the send.task continue to run. This will be done in the next several patches. Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com> --- drivers/infiniband/sw/rxe/rxe_comp.c | 20 +++++----- drivers/infiniband/sw/rxe/rxe_hw_counters.c | 2 +- drivers/infiniband/sw/rxe/rxe_hw_counters.h | 2 +- drivers/infiniband/sw/rxe/rxe_loc.h | 3 +- drivers/infiniband/sw/rxe/rxe_net.c | 4 +- drivers/infiniband/sw/rxe/rxe_qp.c | 44 ++++++++------------- drivers/infiniband/sw/rxe/rxe_req.c | 25 ++++++++++-- drivers/infiniband/sw/rxe/rxe_resp.c | 6 +-- drivers/infiniband/sw/rxe/rxe_verbs.c | 6 +-- drivers/infiniband/sw/rxe/rxe_verbs.h | 6 +-- 10 files changed, 63 insertions(+), 55 deletions(-)