Message ID | 20180313152515.15236-1-sergeygo@mellanox.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Hi Sagi, can you please review this one (you're the only active maintainer for iSER) ? We'll fix that soon :) -Max. On 3/13/2018 5:25 PM, Sergey Gorenko wrote: > Some users complain about RNR errors on the target, when heavy > high-priority tasks run on the initiator. After the > investigation, we found out that the receive WRs were exhausted, > because the initiator could not post them on time. > > Receive work reqeusts are posted in chunks of min_posted_rx to > reduce the number of hits to the HCA. The WRs are posted in the > receive completion handler when the number of free receive buffers > reaches min_posted_rx. But on a high-loaded host, receive CQEs > processing can be delayed and all receive WRs will be exhausted. > In this case, the target will get an RNR error. > > To avoid this, we post receive WR, as soon as at least one receive > buffer is freed. This increases the number of hits to the HCA, but > test results show that performance degradation is not significant. > > Performance results running fio (8 jobs, 64 iodepth) using ramdisk > (w/w.o patch): > > bs IOPS(randread) IOPS(randwrite) > ------ --------------- --------------- > 512 329.4K / 340.3K 379.1K / 387.7K > 1K 333.4K / 337.6K 364.2K / 370.0K > 2K 321.1K / 323.5K 334.2K / 337.8K > 4K 300.7K / 302.9K 290.2K / 291.6K > 8K 235.9K / 237.1K 228.2K / 228.8K > 16K 176.3K / 177.0K 126.3K / 125.9K > 32K 115.2K / 115.4K 82.2K / 82.0K > 64K 70.7K / 70.7K 47.8K / 47.6K > 128K 38.5K / 38.6K 25.8K / 25.7K > 256K 20.7K / 20.7K 13.7K / 13.6K > 512K 10.0K / 10.0K 7.0K / 7.0K > > Signed-off-by: Sergey Gorenko <sergeygo@mellanox.com> > Signed-off-by: Vladimir Neyelov <vladimirn@mellanox.com> > Reviewed-by: Max Gurtovoy <maxg@mellanox.com> > --- > drivers/infiniband/ulp/iser/iscsi_iser.h | 14 +-------- > drivers/infiniband/ulp/iser/iser_initiator.c | 47 ++++++++++------------------ > drivers/infiniband/ulp/iser/iser_verbs.c | 39 +++++++---------------- > 3 files changed, 29 insertions(+), 71 deletions(-) > > diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h > index c1ae4aeae2f9..925996ddcf9f 100644 > --- a/drivers/infiniband/ulp/iser/iscsi_iser.h > +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h > @@ -123,8 +123,6 @@ > > #define ISER_QP_MAX_RECV_DTOS (ISER_DEF_XMIT_CMDS_MAX) > > -#define ISER_MIN_POSTED_RX (ISER_DEF_XMIT_CMDS_MAX >> 2) > - > /* the max TX (send) WR supported by the iSER QP is defined by * > * max_send_wr = T * (1 + D) + C ; D is how many inflight dataouts we expect * > * to have at max for SCSI command. The tx posting & completion handling code * > @@ -452,9 +450,7 @@ struct iser_fr_pool { > * > * @cma_id: rdma_cm connection maneger handle > * @qp: Connection Queue-pair > - * @post_recv_buf_count: post receive counter > * @sig_count: send work request signal count > - * @rx_wr: receive work request for batch posts > * @device: reference to iser device > * @comp: iser completion context > * @fr_pool: connection fast registration poool > @@ -463,9 +459,7 @@ struct iser_fr_pool { > struct ib_conn { > struct rdma_cm_id *cma_id; > struct ib_qp *qp; > - int post_recv_buf_count; > u8 sig_count; > - struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX]; > struct iser_device *device; > struct iser_comp *comp; > struct iser_fr_pool fr_pool; > @@ -482,8 +476,6 @@ struct ib_conn { > * @state: connection logical state > * @qp_max_recv_dtos: maximum number of data outs, corresponds > * to max number of post recvs > - * @qp_max_recv_dtos_mask: (qp_max_recv_dtos - 1) > - * @min_posted_rx: (qp_max_recv_dtos >> 2) > * @max_cmds: maximum cmds allowed for this connection > * @name: connection peer portal > * @release_work: deffered work for release job > @@ -494,7 +486,6 @@ struct ib_conn { > * (state is ISER_CONN_UP) > * @conn_list: entry in ig conn list > * @login_desc: login descriptor > - * @rx_desc_head: head of rx_descs cyclic buffer > * @rx_descs: rx buffers array (cyclic buffer) > * @num_rx_descs: number of rx descriptors > * @scsi_sg_tablesize: scsi host sg_tablesize > @@ -505,8 +496,6 @@ struct iser_conn { > struct iscsi_endpoint *ep; > enum iser_conn_state state; > unsigned qp_max_recv_dtos; > - unsigned qp_max_recv_dtos_mask; > - unsigned min_posted_rx; > u16 max_cmds; > char name[ISER_OBJECT_NAME_SIZE]; > struct work_struct release_work; > @@ -516,7 +505,6 @@ struct iser_conn { > struct completion up_completion; > struct list_head conn_list; > struct iser_login_desc login_desc; > - unsigned int rx_desc_head; > struct iser_rx_desc *rx_descs; > u32 num_rx_descs; > unsigned short scsi_sg_tablesize; > @@ -638,7 +626,7 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, > enum iser_data_dir cmd_dir); > > int iser_post_recvl(struct iser_conn *iser_conn); > -int iser_post_recvm(struct iser_conn *iser_conn, int count); > +int iser_post_recvm(struct iser_conn *iser_conn, struct iser_rx_desc *rx_desc); > int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, > bool signal); > > diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c > index df49c4eb67f7..7d4cb480db75 100644 > --- a/drivers/infiniband/ulp/iser/iser_initiator.c > +++ b/drivers/infiniband/ulp/iser/iser_initiator.c > @@ -247,8 +247,6 @@ int iser_alloc_rx_descriptors(struct iser_conn *iser_conn, > struct iser_device *device = ib_conn->device; > > iser_conn->qp_max_recv_dtos = session->cmds_max; > - iser_conn->qp_max_recv_dtos_mask = session->cmds_max - 1; /* cmds_max is 2^N */ > - iser_conn->min_posted_rx = iser_conn->qp_max_recv_dtos >> 2; > > if (device->reg_ops->alloc_reg_res(ib_conn, session->scsi_cmds_max, > iser_conn->scsi_sg_tablesize)) > @@ -279,7 +277,6 @@ int iser_alloc_rx_descriptors(struct iser_conn *iser_conn, > rx_sg->lkey = device->pd->local_dma_lkey; > } > > - iser_conn->rx_desc_head = 0; > return 0; > > rx_desc_dma_map_failed: > @@ -322,30 +319,28 @@ void iser_free_rx_descriptors(struct iser_conn *iser_conn) > static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req) > { > struct iser_conn *iser_conn = conn->dd_data; > - struct ib_conn *ib_conn = &iser_conn->ib_conn; > struct iscsi_session *session = conn->session; > + /* There is one posted recv buffer (for the last login response). */ > + int already_posted = 1; > + int i; > > iser_dbg("req op %x flags %x\n", req->opcode, req->flags); > /* check if this is the last login - going to full feature phase */ > if ((req->flags & ISCSI_FULL_FEATURE_PHASE) != ISCSI_FULL_FEATURE_PHASE) > return 0; > > - /* > - * Check that there is one posted recv buffer > - * (for the last login response). > - */ > - WARN_ON(ib_conn->post_recv_buf_count != 1); > - > if (session->discovery_sess) { > iser_info("Discovery session, re-using login RX buffer\n"); > return 0; > } else > iser_info("Normal session, posting batch of RX %d buffers\n", > - iser_conn->min_posted_rx); > + iser_conn->qp_max_recv_dtos - already_posted); > > /* Initial post receive buffers */ > - if (iser_post_recvm(iser_conn, iser_conn->min_posted_rx)) > - return -ENOMEM; > + for (i = already_posted; i < iser_conn->qp_max_recv_dtos; i++) { > + if (iser_post_recvm(iser_conn, &iser_conn->rx_descs[i])) > + return -ENOMEM; > + } > > return 0; > } > @@ -585,7 +580,11 @@ void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc) > desc->rsp_dma, ISER_RX_LOGIN_SIZE, > DMA_FROM_DEVICE); > > - ib_conn->post_recv_buf_count--; > + if (iser_conn->iscsi_conn->session->discovery_sess) > + return; > + > + /* Post the first RX buffer that is skipped in iser_post_rx_bufs() */ > + iser_post_recvm(iser_conn, iser_conn->rx_descs); > } > > static inline void > @@ -645,8 +644,7 @@ void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc) > struct iser_conn *iser_conn = to_iser_conn(ib_conn); > struct iser_rx_desc *desc = iser_rx(wc->wr_cqe); > struct iscsi_hdr *hdr; > - int length; > - int outstanding, count, err; > + int length, err; > > if (unlikely(wc->status != IB_WC_SUCCESS)) { > iser_err_comp(wc, "task_rsp"); > @@ -675,20 +673,9 @@ void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc) > desc->dma_addr, ISER_RX_PAYLOAD_SIZE, > DMA_FROM_DEVICE); > > - /* decrementing conn->post_recv_buf_count only --after-- freeing the * > - * task eliminates the need to worry on tasks which are completed in * > - * parallel to the execution of iser_conn_term. So the code that waits * > - * for the posted rx bufs refcount to become zero handles everything */ > - ib_conn->post_recv_buf_count--; > - > - outstanding = ib_conn->post_recv_buf_count; > - if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) { > - count = min(iser_conn->qp_max_recv_dtos - outstanding, > - iser_conn->min_posted_rx); > - err = iser_post_recvm(iser_conn, count); > - if (err) > - iser_err("posting %d rx bufs err %d\n", count, err); > - } > + err = iser_post_recvm(iser_conn, desc); > + if (err) > + iser_err("posting rx buffer err %d\n", err); > } > > void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc) > diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c > index 56b7240a3fc3..2019500a2425 100644 > --- a/drivers/infiniband/ulp/iser/iser_verbs.c > +++ b/drivers/infiniband/ulp/iser/iser_verbs.c > @@ -936,7 +936,6 @@ void iser_conn_init(struct iser_conn *iser_conn) > INIT_LIST_HEAD(&iser_conn->conn_list); > mutex_init(&iser_conn->state_mutex); > > - ib_conn->post_recv_buf_count = 0; > ib_conn->reg_cqe.done = iser_reg_comp; > } > > @@ -1020,44 +1019,28 @@ int iser_post_recvl(struct iser_conn *iser_conn) > wr.num_sge = 1; > wr.next = NULL; > > - ib_conn->post_recv_buf_count++; > ib_ret = ib_post_recv(ib_conn->qp, &wr, &wr_failed); > - if (ib_ret) { > + if (ib_ret) > iser_err("ib_post_recv failed ret=%d\n", ib_ret); > - ib_conn->post_recv_buf_count--; > - } > > return ib_ret; > } > > -int iser_post_recvm(struct iser_conn *iser_conn, int count) > +int iser_post_recvm(struct iser_conn *iser_conn, struct iser_rx_desc *rx_desc) > { > struct ib_conn *ib_conn = &iser_conn->ib_conn; > - unsigned int my_rx_head = iser_conn->rx_desc_head; > - struct iser_rx_desc *rx_desc; > - struct ib_recv_wr *wr, *wr_failed; > - int i, ib_ret; > - > - for (wr = ib_conn->rx_wr, i = 0; i < count; i++, wr++) { > - rx_desc = &iser_conn->rx_descs[my_rx_head]; > - rx_desc->cqe.done = iser_task_rsp; > - wr->wr_cqe = &rx_desc->cqe; > - wr->sg_list = &rx_desc->rx_sg; > - wr->num_sge = 1; > - wr->next = wr + 1; > - my_rx_head = (my_rx_head + 1) & iser_conn->qp_max_recv_dtos_mask; > - } > + struct ib_recv_wr wr, *wr_failed; > + int ib_ret; > > - wr--; > - wr->next = NULL; /* mark end of work requests list */ > + rx_desc->cqe.done = iser_task_rsp; > + wr.wr_cqe = &rx_desc->cqe; > + wr.sg_list = &rx_desc->rx_sg; > + wr.num_sge = 1; > + wr.next = NULL; > > - ib_conn->post_recv_buf_count += count; > - ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &wr_failed); > - if (ib_ret) { > + ib_ret = ib_post_recv(ib_conn->qp, &wr, &wr_failed); > + if (ib_ret) > iser_err("ib_post_recv failed ret=%d\n", ib_ret); > - ib_conn->post_recv_buf_count -= count; > - } else > - iser_conn->rx_desc_head = my_rx_head; > > return ib_ret; > } > -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Sergey, sorry for the late response, missed this patch. On 03/13/2018 05:25 PM, Sergey Gorenko wrote: > Some users complain about RNR errors on the target, when heavy > high-priority tasks run on the initiator. After the > investigation, we found out that the receive WRs were exhausted, > because the initiator could not post them on time. > > Receive work reqeusts are posted in chunks of min_posted_rx to > reduce the number of hits to the HCA. The WRs are posted in the > receive completion handler when the number of free receive buffers > reaches min_posted_rx. But on a high-loaded host, receive CQEs > processing can be delayed and all receive WRs will be exhausted. > In this case, the target will get an RNR error. > > To avoid this, we post receive WR, as soon as at least one receive > buffer is freed. This increases the number of hits to the HCA, but > test results show that performance degradation is not significant. Overall this looks fine, small comments below. > @@ -322,30 +319,28 @@ void iser_free_rx_descriptors(struct iser_conn *iser_conn) > static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req) > { > struct iser_conn *iser_conn = conn->dd_data; > - struct ib_conn *ib_conn = &iser_conn->ib_conn; > struct iscsi_session *session = conn->session; > + /* There is one posted recv buffer (for the last login response). */ > + int already_posted = 1; No need for a local variable for a constant really. > + int i; > > iser_dbg("req op %x flags %x\n", req->opcode, req->flags); > /* check if this is the last login - going to full feature phase */ > if ((req->flags & ISCSI_FULL_FEATURE_PHASE) != ISCSI_FULL_FEATURE_PHASE) > return 0; > > - /* > - * Check that there is one posted recv buffer > - * (for the last login response). > - */ > - WARN_ON(ib_conn->post_recv_buf_count != 1); > - > if (session->discovery_sess) { > iser_info("Discovery session, re-using login RX buffer\n"); > return 0; > } else > iser_info("Normal session, posting batch of RX %d buffers\n", > - iser_conn->min_posted_rx); > + iser_conn->qp_max_recv_dtos - already_posted); > > /* Initial post receive buffers */ > - if (iser_post_recvm(iser_conn, iser_conn->min_posted_rx)) > - return -ENOMEM; > + for (i = already_posted; i < iser_conn->qp_max_recv_dtos; i++) { > + if (iser_post_recvm(iser_conn, &iser_conn->rx_descs[i])) > + return -ENOMEM; > + } Please propagate the return code here. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Sagi, Thanks for your feedback. I will update the patch according to your remarks and send a new version soon. Regards, Sergey > -----Original Message----- > From: Sagi Grimberg [mailto:sagi@grimberg.me] > Sent: Monday, April 16, 2018 11:57 AM > To: Sergey Gorenko <sergeygo@mellanox.com>; linux- > rdma@vger.kernel.org; Jason Gunthorpe <jgg@mellanox.com>; > dledford@redhat.com > Cc: Vladimir Neyelov <vladimirn@mellanox.com> > Subject: Re: [PATCH] IB/iser: Fix RNR errors > > Hi Sergey, sorry for the late response, missed this patch. > > On 03/13/2018 05:25 PM, Sergey Gorenko wrote: > > Some users complain about RNR errors on the target, when heavy > > high-priority tasks run on the initiator. After the investigation, we > > found out that the receive WRs were exhausted, because the initiator > > could not post them on time. > > > > Receive work reqeusts are posted in chunks of min_posted_rx to reduce > > the number of hits to the HCA. The WRs are posted in the receive > > completion handler when the number of free receive buffers reaches > > min_posted_rx. But on a high-loaded host, receive CQEs processing can > > be delayed and all receive WRs will be exhausted. > > In this case, the target will get an RNR error. > > > > To avoid this, we post receive WR, as soon as at least one receive > > buffer is freed. This increases the number of hits to the HCA, but > > test results show that performance degradation is not significant. > > Overall this looks fine, small comments below. > > > @@ -322,30 +319,28 @@ void iser_free_rx_descriptors(struct iser_conn > *iser_conn) > > static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req) > > { > > struct iser_conn *iser_conn = conn->dd_data; > > - struct ib_conn *ib_conn = &iser_conn->ib_conn; > > struct iscsi_session *session = conn->session; > > + /* There is one posted recv buffer (for the last login response). */ > > + int already_posted = 1; > > No need for a local variable for a constant really. > > > + int i; > > > > iser_dbg("req op %x flags %x\n", req->opcode, req->flags); > > /* check if this is the last login - going to full feature phase */ > > if ((req->flags & ISCSI_FULL_FEATURE_PHASE) != > ISCSI_FULL_FEATURE_PHASE) > > return 0; > > > > - /* > > - * Check that there is one posted recv buffer > > - * (for the last login response). > > - */ > > - WARN_ON(ib_conn->post_recv_buf_count != 1); > > - > > if (session->discovery_sess) { > > iser_info("Discovery session, re-using login RX buffer\n"); > > return 0; > > } else > > iser_info("Normal session, posting batch of RX %d > buffers\n", > > - iser_conn->min_posted_rx); > > + iser_conn->qp_max_recv_dtos - already_posted); > > > > /* Initial post receive buffers */ > > - if (iser_post_recvm(iser_conn, iser_conn->min_posted_rx)) > > - return -ENOMEM; > > + for (i = already_posted; i < iser_conn->qp_max_recv_dtos; i++) { > > + if (iser_post_recvm(iser_conn, &iser_conn->rx_descs[i])) > > + return -ENOMEM; > > + } > > Please propagate the return code here.
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index c1ae4aeae2f9..925996ddcf9f 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -123,8 +123,6 @@ #define ISER_QP_MAX_RECV_DTOS (ISER_DEF_XMIT_CMDS_MAX) -#define ISER_MIN_POSTED_RX (ISER_DEF_XMIT_CMDS_MAX >> 2) - /* the max TX (send) WR supported by the iSER QP is defined by * * max_send_wr = T * (1 + D) + C ; D is how many inflight dataouts we expect * * to have at max for SCSI command. The tx posting & completion handling code * @@ -452,9 +450,7 @@ struct iser_fr_pool { * * @cma_id: rdma_cm connection maneger handle * @qp: Connection Queue-pair - * @post_recv_buf_count: post receive counter * @sig_count: send work request signal count - * @rx_wr: receive work request for batch posts * @device: reference to iser device * @comp: iser completion context * @fr_pool: connection fast registration poool @@ -463,9 +459,7 @@ struct iser_fr_pool { struct ib_conn { struct rdma_cm_id *cma_id; struct ib_qp *qp; - int post_recv_buf_count; u8 sig_count; - struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX]; struct iser_device *device; struct iser_comp *comp; struct iser_fr_pool fr_pool; @@ -482,8 +476,6 @@ struct ib_conn { * @state: connection logical state * @qp_max_recv_dtos: maximum number of data outs, corresponds * to max number of post recvs - * @qp_max_recv_dtos_mask: (qp_max_recv_dtos - 1) - * @min_posted_rx: (qp_max_recv_dtos >> 2) * @max_cmds: maximum cmds allowed for this connection * @name: connection peer portal * @release_work: deffered work for release job @@ -494,7 +486,6 @@ struct ib_conn { * (state is ISER_CONN_UP) * @conn_list: entry in ig conn list * @login_desc: login descriptor - * @rx_desc_head: head of rx_descs cyclic buffer * @rx_descs: rx buffers array (cyclic buffer) * @num_rx_descs: number of rx descriptors * @scsi_sg_tablesize: scsi host sg_tablesize @@ -505,8 +496,6 @@ struct iser_conn { struct iscsi_endpoint *ep; enum iser_conn_state state; unsigned qp_max_recv_dtos; - unsigned qp_max_recv_dtos_mask; - unsigned min_posted_rx; u16 max_cmds; char name[ISER_OBJECT_NAME_SIZE]; struct work_struct release_work; @@ -516,7 +505,6 @@ struct iser_conn { struct completion up_completion; struct list_head conn_list; struct iser_login_desc login_desc; - unsigned int rx_desc_head; struct iser_rx_desc *rx_descs; u32 num_rx_descs; unsigned short scsi_sg_tablesize; @@ -638,7 +626,7 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir); int iser_post_recvl(struct iser_conn *iser_conn); -int iser_post_recvm(struct iser_conn *iser_conn, int count); +int iser_post_recvm(struct iser_conn *iser_conn, struct iser_rx_desc *rx_desc); int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, bool signal); diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index df49c4eb67f7..7d4cb480db75 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -247,8 +247,6 @@ int iser_alloc_rx_descriptors(struct iser_conn *iser_conn, struct iser_device *device = ib_conn->device; iser_conn->qp_max_recv_dtos = session->cmds_max; - iser_conn->qp_max_recv_dtos_mask = session->cmds_max - 1; /* cmds_max is 2^N */ - iser_conn->min_posted_rx = iser_conn->qp_max_recv_dtos >> 2; if (device->reg_ops->alloc_reg_res(ib_conn, session->scsi_cmds_max, iser_conn->scsi_sg_tablesize)) @@ -279,7 +277,6 @@ int iser_alloc_rx_descriptors(struct iser_conn *iser_conn, rx_sg->lkey = device->pd->local_dma_lkey; } - iser_conn->rx_desc_head = 0; return 0; rx_desc_dma_map_failed: @@ -322,30 +319,28 @@ void iser_free_rx_descriptors(struct iser_conn *iser_conn) static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req) { struct iser_conn *iser_conn = conn->dd_data; - struct ib_conn *ib_conn = &iser_conn->ib_conn; struct iscsi_session *session = conn->session; + /* There is one posted recv buffer (for the last login response). */ + int already_posted = 1; + int i; iser_dbg("req op %x flags %x\n", req->opcode, req->flags); /* check if this is the last login - going to full feature phase */ if ((req->flags & ISCSI_FULL_FEATURE_PHASE) != ISCSI_FULL_FEATURE_PHASE) return 0; - /* - * Check that there is one posted recv buffer - * (for the last login response). - */ - WARN_ON(ib_conn->post_recv_buf_count != 1); - if (session->discovery_sess) { iser_info("Discovery session, re-using login RX buffer\n"); return 0; } else iser_info("Normal session, posting batch of RX %d buffers\n", - iser_conn->min_posted_rx); + iser_conn->qp_max_recv_dtos - already_posted); /* Initial post receive buffers */ - if (iser_post_recvm(iser_conn, iser_conn->min_posted_rx)) - return -ENOMEM; + for (i = already_posted; i < iser_conn->qp_max_recv_dtos; i++) { + if (iser_post_recvm(iser_conn, &iser_conn->rx_descs[i])) + return -ENOMEM; + } return 0; } @@ -585,7 +580,11 @@ void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc) desc->rsp_dma, ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); - ib_conn->post_recv_buf_count--; + if (iser_conn->iscsi_conn->session->discovery_sess) + return; + + /* Post the first RX buffer that is skipped in iser_post_rx_bufs() */ + iser_post_recvm(iser_conn, iser_conn->rx_descs); } static inline void @@ -645,8 +644,7 @@ void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc) struct iser_conn *iser_conn = to_iser_conn(ib_conn); struct iser_rx_desc *desc = iser_rx(wc->wr_cqe); struct iscsi_hdr *hdr; - int length; - int outstanding, count, err; + int length, err; if (unlikely(wc->status != IB_WC_SUCCESS)) { iser_err_comp(wc, "task_rsp"); @@ -675,20 +673,9 @@ void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc) desc->dma_addr, ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); - /* decrementing conn->post_recv_buf_count only --after-- freeing the * - * task eliminates the need to worry on tasks which are completed in * - * parallel to the execution of iser_conn_term. So the code that waits * - * for the posted rx bufs refcount to become zero handles everything */ - ib_conn->post_recv_buf_count--; - - outstanding = ib_conn->post_recv_buf_count; - if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) { - count = min(iser_conn->qp_max_recv_dtos - outstanding, - iser_conn->min_posted_rx); - err = iser_post_recvm(iser_conn, count); - if (err) - iser_err("posting %d rx bufs err %d\n", count, err); - } + err = iser_post_recvm(iser_conn, desc); + if (err) + iser_err("posting rx buffer err %d\n", err); } void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc) diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 56b7240a3fc3..2019500a2425 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -936,7 +936,6 @@ void iser_conn_init(struct iser_conn *iser_conn) INIT_LIST_HEAD(&iser_conn->conn_list); mutex_init(&iser_conn->state_mutex); - ib_conn->post_recv_buf_count = 0; ib_conn->reg_cqe.done = iser_reg_comp; } @@ -1020,44 +1019,28 @@ int iser_post_recvl(struct iser_conn *iser_conn) wr.num_sge = 1; wr.next = NULL; - ib_conn->post_recv_buf_count++; ib_ret = ib_post_recv(ib_conn->qp, &wr, &wr_failed); - if (ib_ret) { + if (ib_ret) iser_err("ib_post_recv failed ret=%d\n", ib_ret); - ib_conn->post_recv_buf_count--; - } return ib_ret; } -int iser_post_recvm(struct iser_conn *iser_conn, int count) +int iser_post_recvm(struct iser_conn *iser_conn, struct iser_rx_desc *rx_desc) { struct ib_conn *ib_conn = &iser_conn->ib_conn; - unsigned int my_rx_head = iser_conn->rx_desc_head; - struct iser_rx_desc *rx_desc; - struct ib_recv_wr *wr, *wr_failed; - int i, ib_ret; - - for (wr = ib_conn->rx_wr, i = 0; i < count; i++, wr++) { - rx_desc = &iser_conn->rx_descs[my_rx_head]; - rx_desc->cqe.done = iser_task_rsp; - wr->wr_cqe = &rx_desc->cqe; - wr->sg_list = &rx_desc->rx_sg; - wr->num_sge = 1; - wr->next = wr + 1; - my_rx_head = (my_rx_head + 1) & iser_conn->qp_max_recv_dtos_mask; - } + struct ib_recv_wr wr, *wr_failed; + int ib_ret; - wr--; - wr->next = NULL; /* mark end of work requests list */ + rx_desc->cqe.done = iser_task_rsp; + wr.wr_cqe = &rx_desc->cqe; + wr.sg_list = &rx_desc->rx_sg; + wr.num_sge = 1; + wr.next = NULL; - ib_conn->post_recv_buf_count += count; - ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &wr_failed); - if (ib_ret) { + ib_ret = ib_post_recv(ib_conn->qp, &wr, &wr_failed); + if (ib_ret) iser_err("ib_post_recv failed ret=%d\n", ib_ret); - ib_conn->post_recv_buf_count -= count; - } else - iser_conn->rx_desc_head = my_rx_head; return ib_ret; }