Message ID | 2da1db58d642789e8df154e34d622a37295d1ba3.1454709317.git.swise@chelsio.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
> On Feb 5, 2016, at 4:13 PM, Steve Wise <swise@opengridcomputing.com> wrote: > > From: Steve Wise <swise@opengridcomputing.com.com> > > Add provider-specific drain_qp function for providers needing special > drain logic. > > Add static function __ib_drain_qp() which posts noop WRs to the RQ and > SQ and blocks until their completions are processed. This ensures the > applications completions have all been processed. > > Add API function ib_drain_qp() which calls the provider-specific drain > if it exists or __ib_drain_qp(). > > Signed-off-by: Steve Wise <swise@opengridcomputing.com> > --- > drivers/infiniband/core/verbs.c | 72 +++++++++++++++++++++++++++++++++++++++++ > include/rdma/ib_verbs.h | 2 ++ > 2 files changed, 74 insertions(+) > > diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c > index 5af6d02..31b82cd 100644 > --- a/drivers/infiniband/core/verbs.c > +++ b/drivers/infiniband/core/verbs.c > @@ -1657,3 +1657,75 @@ next_page: > return i; > } > EXPORT_SYMBOL(ib_sg_to_pages); > + > +struct ib_drain_cqe { > + struct ib_cqe cqe; > + struct completion done; > +}; > + > +static void ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc) > +{ > + struct ib_drain_cqe *cqe = container_of(wc->wr_cqe, struct ib_drain_cqe, > + cqe); > + > + complete(&cqe->done); > +} > + > +/* > + * Post a WR and block until its completion is reaped for both the RQ and SQ. > + */ > +static void __ib_drain_qp(struct ib_qp *qp) > +{ > + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; > + struct ib_drain_cqe rdrain, sdrain; > + struct ib_recv_wr rwr = {}, *bad_rwr; > + struct ib_send_wr swr = {}, *bad_swr; > + int ret; > + > + rwr.wr_cqe = &rdrain.cqe; > + rdrain.cqe.done = ib_drain_qp_done; > + init_completion(&rdrain.done); > + > + swr.wr_cqe = &sdrain.cqe; > + sdrain.cqe.done = ib_drain_qp_done; OK. ib_cqe is what hooks the completion events for these blank WRs, so those completions are never exposed to the RDMA consumer. But does a consumer have to bump its SQE and RQE count when allocating its CQs, or is that done automatically by ib_alloc_cq() ? > + init_completion(&sdrain.done); > + > + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); > + if (ret) { > + WARN_ONCE(ret, "failed to drain QP: %d\n", ret); > + return; > + } > + > + ret = ib_post_recv(qp, &rwr, &bad_rwr); > + if (ret) { > + WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); > + return; > + } > + > + ret = ib_post_send(qp, &swr, &bad_swr); > + if (ret) { > + WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); > + return; > + } > + > + wait_for_completion(&rdrain.done); > + wait_for_completion(&sdrain.done); > +} > + > +/** > + * ib_drain_qp() - Block until all CQEs have been consumed by the > + * application. > + * @qp: queue pair to drain > + * > + * If the device has a provider-specific drain function, then > + * call that. Otherwise call the generic drain function > + * __ib_drain_qp(). > + */ > +void ib_drain_qp(struct ib_qp *qp) > +{ > + if (qp->device->drain_qp) > + qp->device->drain_qp(qp); > + else > + __ib_drain_qp(qp); > +} > +EXPORT_SYMBOL(ib_drain_qp); > diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h > index 284b00c..d8533ab 100644 > --- a/include/rdma/ib_verbs.h > +++ b/include/rdma/ib_verbs.h > @@ -1846,6 +1846,7 @@ struct ib_device { > int (*check_mr_status)(struct ib_mr *mr, u32 check_mask, > struct ib_mr_status *mr_status); > void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); > + void (*drain_qp)(struct ib_qp *qp); > > struct ib_dma_mapping_ops *dma_ops; > > @@ -3094,4 +3095,5 @@ int ib_sg_to_pages(struct ib_mr *mr, > int sg_nents, > int (*set_page)(struct ib_mr *, u64)); > > +void ib_drain_qp(struct ib_qp *qp); > #endif /* IB_VERBS_H */ > -- > 2.7.0 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-rdma" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- Chuck Lever -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> > From: Steve Wise <swise@opengridcomputing.com.com> > > > > Add provider-specific drain_qp function for providers needing special > > drain logic. > > > > Add static function __ib_drain_qp() which posts noop WRs to the RQ and > > SQ and blocks until their completions are processed. This ensures the > > applications completions have all been processed. > > > > Add API function ib_drain_qp() which calls the provider-specific drain > > if it exists or __ib_drain_qp(). > > > > Signed-off-by: Steve Wise <swise@opengridcomputing.com> > > --- > > drivers/infiniband/core/verbs.c | 72 +++++++++++++++++++++++++++++++++++++++++ > > include/rdma/ib_verbs.h | 2 ++ > > 2 files changed, 74 insertions(+) > > > > diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c > > index 5af6d02..31b82cd 100644 > > --- a/drivers/infiniband/core/verbs.c > > +++ b/drivers/infiniband/core/verbs.c > > @@ -1657,3 +1657,75 @@ next_page: > > return i; > > } > > EXPORT_SYMBOL(ib_sg_to_pages); > > + > > +struct ib_drain_cqe { > > + struct ib_cqe cqe; > > + struct completion done; > > +}; > > + > > +static void ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc) > > +{ > > + struct ib_drain_cqe *cqe = container_of(wc->wr_cqe, struct ib_drain_cqe, > > + cqe); > > + > > + complete(&cqe->done); > > +} > > + > > +/* > > + * Post a WR and block until its completion is reaped for both the RQ and SQ. > > + */ > > +static void __ib_drain_qp(struct ib_qp *qp) > > +{ > > + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; > > + struct ib_drain_cqe rdrain, sdrain; > > + struct ib_recv_wr rwr = {}, *bad_rwr; > > + struct ib_send_wr swr = {}, *bad_swr; > > + int ret; > > + > > + rwr.wr_cqe = &rdrain.cqe; > > + rdrain.cqe.done = ib_drain_qp_done; > > + init_completion(&rdrain.done); > > + > > + swr.wr_cqe = &sdrain.cqe; > > + sdrain.cqe.done = ib_drain_qp_done; > > OK. ib_cqe is what hooks the completion events for these > blank WRs, so those completions are never exposed to the > RDMA consumer. > Right, which means only consumers that use the new style CQ processing can make use of this. > But does a consumer have to bump its SQE and RQE count > when allocating its CQs, or is that done automatically > by ib_alloc_cq() ? > The consumer has to make sure there is room in the SQ, RQ and CQ. Going forward, we could enhance QP and CQ allocation to allow the consumer to specify it wants drain capability so the consumer doesn't have to do this. It could be done under the covers. In fact, if we did that, then ib_destroy_qp() could do the drain if need be. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> On Feb 5, 2016, at 4:53 PM, Steve Wise <swise@opengridcomputing.com> wrote: > >>> From: Steve Wise <swise@opengridcomputing.com.com> >>> >>> Add provider-specific drain_qp function for providers needing special >>> drain logic. >>> >>> Add static function __ib_drain_qp() which posts noop WRs to the RQ and >>> SQ and blocks until their completions are processed. This ensures the >>> applications completions have all been processed. >>> >>> Add API function ib_drain_qp() which calls the provider-specific drain >>> if it exists or __ib_drain_qp(). >>> >>> Signed-off-by: Steve Wise <swise@opengridcomputing.com> >>> --- >>> drivers/infiniband/core/verbs.c | 72 +++++++++++++++++++++++++++++++++++++++++ >>> include/rdma/ib_verbs.h | 2 ++ >>> 2 files changed, 74 insertions(+) >>> >>> diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c >>> index 5af6d02..31b82cd 100644 >>> --- a/drivers/infiniband/core/verbs.c >>> +++ b/drivers/infiniband/core/verbs.c >>> @@ -1657,3 +1657,75 @@ next_page: >>> return i; >>> } >>> EXPORT_SYMBOL(ib_sg_to_pages); >>> + >>> +struct ib_drain_cqe { >>> + struct ib_cqe cqe; >>> + struct completion done; >>> +}; >>> + >>> +static void ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc) >>> +{ >>> + struct ib_drain_cqe *cqe = container_of(wc->wr_cqe, struct ib_drain_cqe, >>> + cqe); >>> + >>> + complete(&cqe->done); >>> +} >>> + >>> +/* >>> + * Post a WR and block until its completion is reaped for both the RQ and SQ. >>> + */ >>> +static void __ib_drain_qp(struct ib_qp *qp) >>> +{ >>> + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; >>> + struct ib_drain_cqe rdrain, sdrain; >>> + struct ib_recv_wr rwr = {}, *bad_rwr; >>> + struct ib_send_wr swr = {}, *bad_swr; >>> + int ret; >>> + >>> + rwr.wr_cqe = &rdrain.cqe; >>> + rdrain.cqe.done = ib_drain_qp_done; >>> + init_completion(&rdrain.done); >>> + >>> + swr.wr_cqe = &sdrain.cqe; >>> + sdrain.cqe.done = ib_drain_qp_done; >> >> OK. ib_cqe is what hooks the completion events for these >> blank WRs, so those completions are never exposed to the >> RDMA consumer. >> > > Right, which means only consumers that use the new style CQ processing can make use of this. > >> But does a consumer have to bump its SQE and RQE count >> when allocating its CQs, or is that done automatically >> by ib_alloc_cq() ? >> > > The consumer has to make sure there is room in the SQ, RQ and CQ. The documenting comment in front of ib_drain_qp() should mention this, and it should also state the requirement to use ib_alloc_cq(). > Going forward, we could enhance QP and CQ allocation to allow the > consumer to specify it wants drain capability so the consumer doesn't have to do this. It could be done under the covers. In fact, > if we did that, then ib_destroy_qp() could do the drain if need be. -- Chuck Lever -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> On Feb 5, 2016, at 5:00 PM, Chuck Lever <chuck.lever@oracle.com> wrote: > >> >> On Feb 5, 2016, at 4:53 PM, Steve Wise <swise@opengridcomputing.com> wrote: >> >>>> From: Steve Wise <swise@opengridcomputing.com.com> >>>> >>>> Add provider-specific drain_qp function for providers needing special >>>> drain logic. >>>> >>>> Add static function __ib_drain_qp() which posts noop WRs to the RQ and >>>> SQ and blocks until their completions are processed. This ensures the >>>> applications completions have all been processed. >>>> >>>> Add API function ib_drain_qp() which calls the provider-specific drain >>>> if it exists or __ib_drain_qp(). >>>> >>>> Signed-off-by: Steve Wise <swise@opengridcomputing.com> >>>> --- >>>> drivers/infiniband/core/verbs.c | 72 +++++++++++++++++++++++++++++++++++++++++ >>>> include/rdma/ib_verbs.h | 2 ++ >>>> 2 files changed, 74 insertions(+) >>>> >>>> diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c >>>> index 5af6d02..31b82cd 100644 >>>> --- a/drivers/infiniband/core/verbs.c >>>> +++ b/drivers/infiniband/core/verbs.c >>>> @@ -1657,3 +1657,75 @@ next_page: >>>> return i; >>>> } >>>> EXPORT_SYMBOL(ib_sg_to_pages); >>>> + >>>> +struct ib_drain_cqe { >>>> + struct ib_cqe cqe; >>>> + struct completion done; >>>> +}; >>>> + >>>> +static void ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc) >>>> +{ >>>> + struct ib_drain_cqe *cqe = container_of(wc->wr_cqe, struct ib_drain_cqe, >>>> + cqe); >>>> + >>>> + complete(&cqe->done); >>>> +} >>>> + >>>> +/* >>>> + * Post a WR and block until its completion is reaped for both the RQ and SQ. >>>> + */ >>>> +static void __ib_drain_qp(struct ib_qp *qp) >>>> +{ >>>> + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; >>>> + struct ib_drain_cqe rdrain, sdrain; >>>> + struct ib_recv_wr rwr = {}, *bad_rwr; >>>> + struct ib_send_wr swr = {}, *bad_swr; >>>> + int ret; >>>> + >>>> + rwr.wr_cqe = &rdrain.cqe; >>>> + rdrain.cqe.done = ib_drain_qp_done; >>>> + init_completion(&rdrain.done); >>>> + >>>> + swr.wr_cqe = &sdrain.cqe; >>>> + sdrain.cqe.done = ib_drain_qp_done; >>> >>> OK. ib_cqe is what hooks the completion events for these >>> blank WRs, so those completions are never exposed to the >>> RDMA consumer. >>> >> >> Right, which means only consumers that use the new style CQ processing can make use of this. >> >>> But does a consumer have to bump its SQE and RQE count >>> when allocating its CQs, or is that done automatically >>> by ib_alloc_cq() ? >>> >> >> The consumer has to make sure there is room in the SQ, RQ and CQ. > > The documenting comment in front of ib_drain_qp() should > mention this, and it should also state the requirement to > use ib_alloc_cq(). And otherwise, I'm happy to see this work! Reviewed-by: Chuck Lever <chuck.lever@oracle.com> >> Going forward, we could enhance QP and CQ allocation to allow the >> consumer to specify it wants drain capability so the consumer doesn't have to do this. It could be done under the covers. In fact, >> if we did that, then ib_destroy_qp() could do the drain if need be. > > > -- > Chuck Lever > > > > > -- > To unsubscribe from this list: send the line "unsubscribe linux-rdma" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- Chuck Lever -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Steve, On Sat, Feb 6, 2016 at 2:43 AM, Steve Wise <swise@opengridcomputing.com> wrote: > From: Steve Wise <swise@opengridcomputing.com.com> > > Add provider-specific drain_qp function for providers needing special > drain logic. > > Add static function __ib_drain_qp() which posts noop WRs to the RQ and > SQ and blocks until their completions are processed. This ensures the > applications completions have all been processed. > > Add API function ib_drain_qp() which calls the provider-specific drain > if it exists or __ib_drain_qp(). > > Signed-off-by: Steve Wise <swise@opengridcomputing.com> > --- > drivers/infiniband/core/verbs.c | 72 +++++++++++++++++++++++++++++++++++++++++ > include/rdma/ib_verbs.h | 2 ++ > 2 files changed, 74 insertions(+) > > diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c > index 5af6d02..31b82cd 100644 > --- a/drivers/infiniband/core/verbs.c > +++ b/drivers/infiniband/core/verbs.c > @@ -1657,3 +1657,75 @@ next_page: > return i; > } > EXPORT_SYMBOL(ib_sg_to_pages); > + > +struct ib_drain_cqe { > + struct ib_cqe cqe; > + struct completion done; > +}; > + > +static void ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc) > +{ > + struct ib_drain_cqe *cqe = container_of(wc->wr_cqe, struct ib_drain_cqe, > + cqe); > + > + complete(&cqe->done); > +} > + > +/* > + * Post a WR and block until its completion is reaped for both the RQ and SQ. > + */ > +static void __ib_drain_qp(struct ib_qp *qp) > +{ > + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; > + struct ib_drain_cqe rdrain, sdrain; > + struct ib_recv_wr rwr = {}, *bad_rwr; > + struct ib_send_wr swr = {}, *bad_swr; > + int ret; > + > + rwr.wr_cqe = &rdrain.cqe; > + rdrain.cqe.done = ib_drain_qp_done; > + init_completion(&rdrain.done); > + > + swr.wr_cqe = &sdrain.cqe; > + sdrain.cqe.done = ib_drain_qp_done; > + init_completion(&sdrain.done); > + > + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); > + if (ret) { > + WARN_ONCE(ret, "failed to drain QP: %d\n", ret); > + return; > + } I was thinking if we really need this modify_qp here. generally drain_qp is called on an error'ed out QP. In a graceful tear down rdma_disconnect takes care to modify-qp to error. while in error state qp is already in error state. > + > + ret = ib_post_recv(qp, &rwr, &bad_rwr); > + if (ret) { > + WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); > + return; > + } > + > + ret = ib_post_send(qp, &swr, &bad_swr); > + if (ret) { > + WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); > + return; > + } > + > + wait_for_completion(&rdrain.done); > + wait_for_completion(&sdrain.done); > +} > + > +/** > + * ib_drain_qp() - Block until all CQEs have been consumed by the > + * application. > + * @qp: queue pair to drain > + * > + * If the device has a provider-specific drain function, then > + * call that. Otherwise call the generic drain function > + * __ib_drain_qp(). > + */ > +void ib_drain_qp(struct ib_qp *qp) > +{ > + if (qp->device->drain_qp) > + qp->device->drain_qp(qp); > + else > + __ib_drain_qp(qp); > +} > +EXPORT_SYMBOL(ib_drain_qp); > diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h > index 284b00c..d8533ab 100644 > --- a/include/rdma/ib_verbs.h > +++ b/include/rdma/ib_verbs.h > @@ -1846,6 +1846,7 @@ struct ib_device { > int (*check_mr_status)(struct ib_mr *mr, u32 check_mask, > struct ib_mr_status *mr_status); > void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); > + void (*drain_qp)(struct ib_qp *qp); > > struct ib_dma_mapping_ops *dma_ops; > > @@ -3094,4 +3095,5 @@ int ib_sg_to_pages(struct ib_mr *mr, > int sg_nents, > int (*set_page)(struct ib_mr *, u64)); > > +void ib_drain_qp(struct ib_qp *qp); > #endif /* IB_VERBS_H */ > -- > 2.7.0 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-rdma" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Feb 05, 2016 at 01:13:16PM -0800, Steve Wise wrote: > From: Steve Wise <swise@opengridcomputing.com.com> > > Add provider-specific drain_qp function for providers needing special > drain logic. > > Add static function __ib_drain_qp() which posts noop WRs to the RQ and > SQ and blocks until their completions are processed. This ensures the > applications completions have all been processed. > > Add API function ib_drain_qp() which calls the provider-specific drain > if it exists or __ib_drain_qp(). > > Signed-off-by: Steve Wise <swise@opengridcomputing.com> > --- > drivers/infiniband/core/verbs.c | 72 +++++++++++++++++++++++++++++++++++++++++ > include/rdma/ib_verbs.h | 2 ++ > 2 files changed, 74 insertions(+) > ... > + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); > + if (ret) { > + WARN_ONCE(ret, "failed to drain QP: %d\n", ret); > + return; > + } > + > + ret = ib_post_recv(qp, &rwr, &bad_rwr); > + if (ret) { > + WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); > + return; > + } > + > + ret = ib_post_send(qp, &swr, &bad_swr); > + if (ret) { > + WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); > + return; You are returning here despite the fact that recv queue drained successfully and you can wait for completion of rdrain safely. Is it done on purpose? > + } > + > + wait_for_completion(&rdrain.done); > + wait_for_completion(&sdrain.done); > +} > + -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
>> + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); >> + if (ret) { >> + WARN_ONCE(ret, "failed to drain QP: %d\n", ret); >> + return; >> + } >> + >> + ret = ib_post_recv(qp, &rwr, &bad_rwr); >> + if (ret) { >> + WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); >> + return; >> + } >> + >> + ret = ib_post_send(qp, &swr, &bad_swr); >> + if (ret) { >> + WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); >> + return; > > You are returning here despite the fact that recv queue drained > successfully and you can wait for completion of rdrain safely. > Is it done on purpose? Good catch, the completion structures are on-stack. Steve, you need to wait for the completion of the successful post in this case... -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
>> +/* >> + * Post a WR and block until its completion is reaped for both the RQ and SQ. >> + */ >> +static void __ib_drain_qp(struct ib_qp *qp) >> +{ >> + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; >> + struct ib_drain_cqe rdrain, sdrain; >> + struct ib_recv_wr rwr = {}, *bad_rwr; >> + struct ib_send_wr swr = {}, *bad_swr; >> + int ret; >> + >> + rwr.wr_cqe = &rdrain.cqe; >> + rdrain.cqe.done = ib_drain_qp_done; >> + init_completion(&rdrain.done); >> + >> + swr.wr_cqe = &sdrain.cqe; >> + sdrain.cqe.done = ib_drain_qp_done; >> + init_completion(&sdrain.done); >> + >> + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); >> + if (ret) { >> + WARN_ONCE(ret, "failed to drain QP: %d\n", ret); >> + return; >> + } > > I was thinking if we really need this modify_qp here. generally > drain_qp is called on > an error'ed out QP. In a graceful tear down rdma_disconnect takes care > to modify-qp > to error. while in error state qp is already in error state. We could get it conditional, but I don't see any harm done in having it as it would mean a passed in flag. It would be better to have the modify_qp implementers do nothing for a ERROR -> ERROR modify... -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sun, Feb 7, 2016 at 5:23 PM, Sagi Grimberg <sagig@dev.mellanox.co.il> wrote: > >>> +/* >>> + * Post a WR and block until its completion is reaped for both the RQ >>> and SQ. >>> + */ >>> +static void __ib_drain_qp(struct ib_qp *qp) >>> +{ >>> + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; >>> + struct ib_drain_cqe rdrain, sdrain; >>> + struct ib_recv_wr rwr = {}, *bad_rwr; >>> + struct ib_send_wr swr = {}, *bad_swr; >>> + int ret; >>> + >>> + rwr.wr_cqe = &rdrain.cqe; >>> + rdrain.cqe.done = ib_drain_qp_done; >>> + init_completion(&rdrain.done); >>> + >>> + swr.wr_cqe = &sdrain.cqe; >>> + sdrain.cqe.done = ib_drain_qp_done; >>> + init_completion(&sdrain.done); >>> + >>> + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); >>> + if (ret) { >>> + WARN_ONCE(ret, "failed to drain QP: %d\n", ret); >>> + return; >>> + } >> >> >> I was thinking if we really need this modify_qp here. generally >> drain_qp is called on >> an error'ed out QP. In a graceful tear down rdma_disconnect takes care >> to modify-qp >> to error. while in error state qp is already in error state. > > > We could get it conditional, but I don't see any harm done > in having it as it would mean a passed in flag. > Since Spec allows this, I would say okay let it be there. > It would be better to have the modify_qp implementers do > nothing for a ERROR -> ERROR modify... Driver implementer has a choice here which is okay. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> >> OK. ib_cqe is what hooks the completion events for these > >> blank WRs, so those completions are never exposed to the > >> RDMA consumer. > >> > > > > Right, which means only consumers that use the new style CQ processing can make use of this. > > > >> But does a consumer have to bump its SQE and RQE count > >> when allocating its CQs, or is that done automatically > >> by ib_alloc_cq() ? > >> > > > > The consumer has to make sure there is room in the SQ, RQ and CQ. > > The documenting comment in front of ib_drain_qp() should > mention this, and it should also state the requirement to > use ib_alloc_cq(). > Will do. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> -----Original Message----- > From: Sagi Grimberg [mailto:sagig@dev.mellanox.co.il] > Sent: Sunday, February 07, 2016 5:52 AM > To: Steve Wise; linux-rdma@vger.kernel.org > Subject: Re: [PATCH 1/3] IB: new common API for draining a queue pair > > > >> + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); > >> + if (ret) { > >> + WARN_ONCE(ret, "failed to drain QP: %d\n", ret); > >> + return; > >> + } > >> + > >> + ret = ib_post_recv(qp, &rwr, &bad_rwr); > >> + if (ret) { > >> + WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); > >> + return; > >> + } > >> + > >> + ret = ib_post_send(qp, &swr, &bad_swr); > >> + if (ret) { > >> + WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); > >> + return; > > > > You are returning here despite the fact that recv queue drained > > successfully and you can wait for completion of rdrain safely. > > Is it done on purpose? > > Good catch, the completion structures are on-stack. > > Steve, you need to wait for the completion of the > successful post in this case... Yup. Will do. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> >> +/* > >> + * Post a WR and block until its completion is reaped for both the RQ and SQ. > >> + */ > >> +static void __ib_drain_qp(struct ib_qp *qp) > >> +{ > >> + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; > >> + struct ib_drain_cqe rdrain, sdrain; > >> + struct ib_recv_wr rwr = {}, *bad_rwr; > >> + struct ib_send_wr swr = {}, *bad_swr; > >> + int ret; > >> + > >> + rwr.wr_cqe = &rdrain.cqe; > >> + rdrain.cqe.done = ib_drain_qp_done; > >> + init_completion(&rdrain.done); > >> + > >> + swr.wr_cqe = &sdrain.cqe; > >> + sdrain.cqe.done = ib_drain_qp_done; > >> + init_completion(&sdrain.done); > >> + > >> + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); > >> + if (ret) { > >> + WARN_ONCE(ret, "failed to drain QP: %d\n", ret); > >> + return; > >> + } > > > > I was thinking if we really need this modify_qp here. generally > > drain_qp is called on > > an error'ed out QP. In a graceful tear down rdma_disconnect takes care > > to modify-qp > > to error. while in error state qp is already in error state. > > We could get it conditional, but I don't see any harm done > in having it as it would mean a passed in flag. > > It would be better to have the modify_qp implementers do > nothing for a ERROR -> ERROR modify... The IBTA spec sez ERROR->ERROR is allowed, so nobody should be failing that transition. iWARP, however has to be different. :) It sez any attempt to transition out of ERROR to anything other than IDLE results in an immediate error. But since iWARP provider will need their own handler, I guess I think we should just leave the modify->ERROR in as-is. Or perhaps ignore the return code. Steve. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> On Sun, Feb 7, 2016 at 5:23 PM, Sagi Grimberg <sagig@dev.mellanox.co.il> wrote: > > > >>> +/* > >>> + * Post a WR and block until its completion is reaped for both the RQ > >>> and SQ. > >>> + */ > >>> +static void __ib_drain_qp(struct ib_qp *qp) > >>> +{ > >>> + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; > >>> + struct ib_drain_cqe rdrain, sdrain; > >>> + struct ib_recv_wr rwr = {}, *bad_rwr; > >>> + struct ib_send_wr swr = {}, *bad_swr; > >>> + int ret; > >>> + > >>> + rwr.wr_cqe = &rdrain.cqe; > >>> + rdrain.cqe.done = ib_drain_qp_done; > >>> + init_completion(&rdrain.done); > >>> + > >>> + swr.wr_cqe = &sdrain.cqe; > >>> + sdrain.cqe.done = ib_drain_qp_done; > >>> + init_completion(&sdrain.done); > >>> + > >>> + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); > >>> + if (ret) { > >>> + WARN_ONCE(ret, "failed to drain QP: %d\n", ret); > >>> + return; > >>> + } > >> > >> > >> I was thinking if we really need this modify_qp here. generally > >> drain_qp is called on > >> an error'ed out QP. In a graceful tear down rdma_disconnect takes care > >> to modify-qp > >> to error. while in error state qp is already in error state. > > > > > > We could get it conditional, but I don't see any harm done > > in having it as it would mean a passed in flag. > > > > Since Spec allows this, I would say okay let it be there. > > > It would be better to have the modify_qp implementers do > > nothing for a ERROR -> ERROR modify... > > Driver implementer has a choice here which is okay. Ok, I'll leave it as-is then. Thanks for the reviews! -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 5af6d02..31b82cd 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1657,3 +1657,75 @@ next_page: return i; } EXPORT_SYMBOL(ib_sg_to_pages); + +struct ib_drain_cqe { + struct ib_cqe cqe; + struct completion done; +}; + +static void ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct ib_drain_cqe *cqe = container_of(wc->wr_cqe, struct ib_drain_cqe, + cqe); + + complete(&cqe->done); +} + +/* + * Post a WR and block until its completion is reaped for both the RQ and SQ. + */ +static void __ib_drain_qp(struct ib_qp *qp) +{ + struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; + struct ib_drain_cqe rdrain, sdrain; + struct ib_recv_wr rwr = {}, *bad_rwr; + struct ib_send_wr swr = {}, *bad_swr; + int ret; + + rwr.wr_cqe = &rdrain.cqe; + rdrain.cqe.done = ib_drain_qp_done; + init_completion(&rdrain.done); + + swr.wr_cqe = &sdrain.cqe; + sdrain.cqe.done = ib_drain_qp_done; + init_completion(&sdrain.done); + + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); + if (ret) { + WARN_ONCE(ret, "failed to drain QP: %d\n", ret); + return; + } + + ret = ib_post_recv(qp, &rwr, &bad_rwr); + if (ret) { + WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); + return; + } + + ret = ib_post_send(qp, &swr, &bad_swr); + if (ret) { + WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); + return; + } + + wait_for_completion(&rdrain.done); + wait_for_completion(&sdrain.done); +} + +/** + * ib_drain_qp() - Block until all CQEs have been consumed by the + * application. + * @qp: queue pair to drain + * + * If the device has a provider-specific drain function, then + * call that. Otherwise call the generic drain function + * __ib_drain_qp(). + */ +void ib_drain_qp(struct ib_qp *qp) +{ + if (qp->device->drain_qp) + qp->device->drain_qp(qp); + else + __ib_drain_qp(qp); +} +EXPORT_SYMBOL(ib_drain_qp); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 284b00c..d8533ab 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1846,6 +1846,7 @@ struct ib_device { int (*check_mr_status)(struct ib_mr *mr, u32 check_mask, struct ib_mr_status *mr_status); void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); + void (*drain_qp)(struct ib_qp *qp); struct ib_dma_mapping_ops *dma_ops; @@ -3094,4 +3095,5 @@ int ib_sg_to_pages(struct ib_mr *mr, int sg_nents, int (*set_page)(struct ib_mr *, u64)); +void ib_drain_qp(struct ib_qp *qp); #endif /* IB_VERBS_H */