diff mbox

[rdma-core,2/2] libhns: Support cq record doorbell

Message ID 1516242961-154453-3-git-send-email-liuyixian@huawei.com (mailing list archive)
State Rejected
Delegated to: Leon Romanovsky
Headers show

Commit Message

Yixian Liu Jan. 18, 2018, 2:36 a.m. UTC
This patch updates to support cq record doorbell in
user space driver.

Signed-off-by: Yixian Liu <liuyixian@huawei.com>
Signed-off-by: Lijun Ou <oulijun@huawei.com>
Signed-off-by: Wei Hu (Xavier) <xavier.huwei@huawei.com>
Signed-off-by: Shaobo Xu <xushaobo2@huawei.com>
---
 providers/hns/hns_roce_u_hw_v2.c | 27 +++++----------------------
 providers/hns/hns_roce_u_verbs.c | 21 ++++++++++++++++++---
 2 files changed, 23 insertions(+), 25 deletions(-)

Comments

Jason Gunthorpe Jan. 18, 2018, 4:37 a.m. UTC | #1
On Thu, Jan 18, 2018 at 10:36:01AM +0800, Yixian Liu wrote:

> diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
> index cde8568..7037a1c 100644
> +++ b/providers/hns/hns_roce_u_verbs.c
> @@ -276,6 +276,16 @@ struct ibv_cq *hns_roce_u_create_cq(struct ibv_context *context, int cqe,
>  
>  	cmd.buf_addr = (uintptr_t) cq->buf.buf;
>  
> +	if (to_hr_dev(context->device)->hw_version != HNS_ROCE_HW_VER1) {
> +		cq->set_ci_db = hns_roce_alloc_db(to_hr_ctx(context),
> +						  HNS_ROCE_CQ_TYPE_DB);
> +		if (!cq->set_ci_db) {
> +			fprintf(stderr, "alloc cq db buffer failed!\n");
> +			goto err_buf;
> +		}
> +		cmd.db_addr  = (uintptr_t) cq->set_ci_db;

Uhh.. why does the userspace already have the 'db_addr' member of
hns_roce_create_cq when the kernel doesn't?

What is going on here? How does forward and backwards compatibility of
the kABI work?

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yixian Liu Jan. 18, 2018, 7:25 a.m. UTC | #2
On 2018/1/18 12:37, Jason Gunthorpe wrote:
> On Thu, Jan 18, 2018 at 10:36:01AM +0800, Yixian Liu wrote:
> 
>> diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
>> index cde8568..7037a1c 100644
>> +++ b/providers/hns/hns_roce_u_verbs.c
>> @@ -276,6 +276,16 @@ struct ibv_cq *hns_roce_u_create_cq(struct ibv_context *context, int cqe,
>>  
>>  	cmd.buf_addr = (uintptr_t) cq->buf.buf;
>>  
>> +	if (to_hr_dev(context->device)->hw_version != HNS_ROCE_HW_VER1) {
>> +		cq->set_ci_db = hns_roce_alloc_db(to_hr_ctx(context),
>> +						  HNS_ROCE_CQ_TYPE_DB);
>> +		if (!cq->set_ci_db) {
>> +			fprintf(stderr, "alloc cq db buffer failed!\n");
>> +			goto err_buf;
>> +		}
>> +		cmd.db_addr  = (uintptr_t) cq->set_ci_db;
> 
> Uhh.. why does the userspace already have the 'db_addr' member of
> hns_roce_create_cq when the kernel doesn't?
> 
> What is going on here? How does forward and backwards compatibility of
> the kABI work?
> 
> Jason
>
I have checked the history log, it seems that we have missed to add 'db_addr'
for the kernel when adding it for the userspace.
Up to now, we haven't referred this field in current driver both in kernel
and userspace, that's why we haven't found this bug.

Thanks for your doubt!

Eason








--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe Jan. 18, 2018, 4:19 p.m. UTC | #3
On Thu, Jan 18, 2018 at 03:25:04PM +0800, Liuyixian (Eason) wrote:
> 
> 
> On 2018/1/18 12:37, Jason Gunthorpe wrote:
> > On Thu, Jan 18, 2018 at 10:36:01AM +0800, Yixian Liu wrote:
> > 
> >> diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
> >> index cde8568..7037a1c 100644
> >> +++ b/providers/hns/hns_roce_u_verbs.c
> >> @@ -276,6 +276,16 @@ struct ibv_cq *hns_roce_u_create_cq(struct ibv_context *context, int cqe,
> >>  
> >>  	cmd.buf_addr = (uintptr_t) cq->buf.buf;
> >>  
> >> +	if (to_hr_dev(context->device)->hw_version != HNS_ROCE_HW_VER1) {
> >> +		cq->set_ci_db = hns_roce_alloc_db(to_hr_ctx(context),
> >> +						  HNS_ROCE_CQ_TYPE_DB);
> >> +		if (!cq->set_ci_db) {
> >> +			fprintf(stderr, "alloc cq db buffer failed!\n");
> >> +			goto err_buf;
> >> +		}
> >> +		cmd.db_addr  = (uintptr_t) cq->set_ci_db;
> > 
> > Uhh.. why does the userspace already have the 'db_addr' member of
> > hns_roce_create_cq when the kernel doesn't?
> > 
> > What is going on here? How does forward and backwards compatibility of
> > the kABI work?
> > 
> > Jason
> >
> I have checked the history log, it seems that we have missed to add 'db_addr'
> for the kernel when adding it for the userspace.
> Up to now, we haven't referred this field in current driver both in kernel
> and userspace, that's why we haven't found this bug.
> 
> Thanks for your doubt!

Please explain how forward and backwards compatibility of
the kABI will work with this new db_addr capability.

Normally we'd rely on the kernel seeing that the udata is longer to
signal that userspace supports an optional feature, but that is broken
in this case.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yixian Liu Jan. 19, 2018, 11:04 a.m. UTC | #4
On 2018/1/19 0:19, Jason Gunthorpe wrote:
> On Thu, Jan 18, 2018 at 03:25:04PM +0800, Liuyixian (Eason) wrote:
>>
>>
>> On 2018/1/18 12:37, Jason Gunthorpe wrote:
>>> On Thu, Jan 18, 2018 at 10:36:01AM +0800, Yixian Liu wrote:
>>>
>>>> diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
>>>> index cde8568..7037a1c 100644
>>>> +++ b/providers/hns/hns_roce_u_verbs.c
>>>> @@ -276,6 +276,16 @@ struct ibv_cq *hns_roce_u_create_cq(struct ibv_context *context, int cqe,
>>>>  
>>>>  	cmd.buf_addr = (uintptr_t) cq->buf.buf;
>>>>  
>>>> +	if (to_hr_dev(context->device)->hw_version != HNS_ROCE_HW_VER1) {
>>>> +		cq->set_ci_db = hns_roce_alloc_db(to_hr_ctx(context),
>>>> +						  HNS_ROCE_CQ_TYPE_DB);
>>>> +		if (!cq->set_ci_db) {
>>>> +			fprintf(stderr, "alloc cq db buffer failed!\n");
>>>> +			goto err_buf;
>>>> +		}
>>>> +		cmd.db_addr  = (uintptr_t) cq->set_ci_db;
>>>
>>> Uhh.. why does the userspace already have the 'db_addr' member of
>>> hns_roce_create_cq when the kernel doesn't?
>>>
>>> What is going on here? How does forward and backwards compatibility of
>>> the kABI work?
>>>
>>> Jason
>>>
>> I have checked the history log, it seems that we have missed to add 'db_addr'
>> for the kernel when adding it for the userspace.
>> Up to now, we haven't referred this field in current driver both in kernel
>> and userspace, that's why we haven't found this bug.
>>
>> Thanks for your doubt!
> 
> Please explain how forward and backwards compatibility of
> the kABI will work with this new db_addr capability.
> 
> Normally we'd rely on the kernel seeing that the udata is longer to
> signal that userspace supports an optional feature, but that is broken
> in this case.
> 
> Jason
> 
> 
Hi Jason,

In my understand, there are two possible aspects when you mentioned compatibility
for the new kernel driver with db_addr field:
1. the new kernel driver needed to support old user application using old userspace driver.
2. whether the new kernel driver can work well with the new userspace driver or not.

I am not quite sure about which case you care about.
For case 1, the new kernel driver can't support old user application or old userspace driver.
Because we use the record db solution (what this patchset does and why add new db_addr in kernel)
to substitute with operating register, which used by the old userspace driver.
Therefore, users need to update the userspace driver to the version which support record db.

For case 2, after we add db_addr field in the struct of hns_roce_ib_create_cq for the kernel,
it will have **the same length** with the userspace, that will be ok if we run an application
based on the new kernel and userspace driver, which both support record db.

Eason


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe Jan. 19, 2018, 8 p.m. UTC | #5
On Fri, Jan 19, 2018 at 07:04:42PM +0800, Liuyixian (Eason) wrote:

> For case 1, the new kernel driver can't support old user application or old userspace driver.
> Because we use the record db solution (what this patchset does and why add new db_addr in kernel)
> to substitute with operating register, which used by the old userspace driver.
> Therefore, users need to update the userspace driver to the version which support record db.

This is not acceptable for the kernel. You must maintain compatability
with old userspace.

Also, new userspace must work with older kernels.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yixian Liu Jan. 20, 2018, 11:41 a.m. UTC | #6
On 2018/1/20 4:00, Jason Gunthorpe wrote:
> On Fri, Jan 19, 2018 at 07:04:42PM +0800, Liuyixian (Eason) wrote:
> 
>> For case 1, the new kernel driver can't support old user application or old userspace driver.
>> Because we use the record db solution (what this patchset does and why add new db_addr in kernel)
>> to substitute with operating register, which used by the old userspace driver.
>> Therefore, users need to update the userspace driver to the version which support record db.
> 
> This is not acceptable for the kernel. You must maintain compatability
> with old userspace.
> 
> Also, new userspace must work with older kernels.
> 
> Jason

Hi Jason,

Maybe my previous email caused some confusions to people.
Let me illustrate the compatibility more clear.

For hip06, which not support record db, even while it creates cq in userspace
with hns_roce_create_cq (db_addr already there), it will do nothing with db_addr.
The kernel also will not access db_addr, both for old kernel and new kernel(adding db_addr).
Because hip06 pushes db by operating register, this manner keeps unchanged.
Thus, the old/new kernel support both old/new userspace for hip06,

For hip08, which is still under development to support record db, considering
forward compatibility that new kernel with old userspace, as old userspace operates
register and new kernel still provide those registers, there is no forward compatibility
problem. The reverse (backward compatibility) is not true. It is not possible to take
new userspace will still run correctly if the kernel is old. As new userspace use
db_addr substitute operating register, it can run correctly only on new kernel
(the patchset goes with this patchset submitted to for-next) and later kernel versions.
I think it is reasonable for this case.

With carefully release-management, there would be no compatibility problem
both for hip06 and hip08.

Eason

> 
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe Jan. 20, 2018, 4:46 p.m. UTC | #7
On Sat, Jan 20, 2018 at 07:41:03PM +0800, Liuyixian (Eason) wrote:

> new userspace will still run correctly if the kernel is old. As new userspace use
> db_addr substitute operating register, it can run correctly only on new kernel
> (the patchset goes with this patchset submitted to for-next) and later kernel versions.
> I think it is reasonable for this case.

Still not OK, we expect the user space to work with old kernels too.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Leon Romanovsky Jan. 21, 2018, 6:57 a.m. UTC | #8
On Sat, Jan 20, 2018 at 07:41:03PM +0800, Liuyixian (Eason) wrote:
>

<...>
>
> With carefully release-management, there would be no compatibility problem
> both for hip06 and hip08.

Sorry, for interrupting your discussion, but what does it mean "carefully
release-management"?

Thanks
Yixian Liu Jan. 22, 2018, 2:01 p.m. UTC | #9
On 2018/1/21 14:57, Leon Romanovsky wrote:
> On Sat, Jan 20, 2018 at 07:41:03PM +0800, Liuyixian (Eason) wrote:
>>
> 
> <...>
>>
>> With carefully release-management, there would be no compatibility problem
>> both for hip06 and hip08.
> 
> Sorry, for interrupting your discussion, but what does it mean "carefully
> release-management"?
> 
> Thanks
> 

Hi Leon,

Here release-management mainly means that our driver (kernel and userspace)
for hisilicon SoC is released progressively with linux kernel versions. Take
hip08 for example, our first version Roce driver for hip08 appear in linux
kernel 4.14, as more and more features are added in, some features may be
supported only in 4.16. I call this process as a release-management for
hisilicon SoC.

Eason

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Leon Romanovsky Jan. 22, 2018, 3:13 p.m. UTC | #10
On Mon, Jan 22, 2018 at 10:01:32PM +0800, Liuyixian (Eason) wrote:
>
>
> On 2018/1/21 14:57, Leon Romanovsky wrote:
> > On Sat, Jan 20, 2018 at 07:41:03PM +0800, Liuyixian (Eason) wrote:
> >>
> >
> > <...>
> >>
> >> With carefully release-management, there would be no compatibility problem
> >> both for hip06 and hip08.
> >
> > Sorry, for interrupting your discussion, but what does it mean "carefully
> > release-management"?
> >
> > Thanks
> >
>
> Hi Leon,
>
> Here release-management mainly means that our driver (kernel and userspace)
> for hisilicon SoC is released progressively with linux kernel versions. Take
> hip08 for example, our first version Roce driver for hip08 appear in linux
> kernel 4.14, as more and more features are added in, some features may be
> supported only in 4.16. I call this process as a release-management for
> hisilicon SoC.

Are you aware that rdma-core and kernel are independent and can be
installed in any combinations of old/new kernel/library mix?

Thanks

>
> Eason
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yixian Liu Jan. 23, 2018, 12:38 p.m. UTC | #11
On 2018/1/22 23:13, Leon Romanovsky wrote:
> On Mon, Jan 22, 2018 at 10:01:32PM +0800, Liuyixian (Eason) wrote:
>>
>>
>> On 2018/1/21 14:57, Leon Romanovsky wrote:
>>> On Sat, Jan 20, 2018 at 07:41:03PM +0800, Liuyixian (Eason) wrote:
>>>>
>>>
>>> <...>
>>>>
>>>> With carefully release-management, there would be no compatibility problem
>>>> both for hip06 and hip08.
>>>
>>> Sorry, for interrupting your discussion, but what does it mean "carefully
>>> release-management"?
>>>
>>> Thanks
>>>
>>
>> Hi Leon,
>>
>> Here release-management mainly means that our driver (kernel and userspace)
>> for hisilicon SoC is released progressively with linux kernel versions. Take
>> hip08 for example, our first version Roce driver for hip08 appear in linux
>> kernel 4.14, as more and more features are added in, some features may be
>> supported only in 4.16. I call this process as a release-management for
>> hisilicon SoC.
> 
> Are you aware that rdma-core and kernel are independent and can be
> installed in any combinations of old/new kernel/library mix?
> 
> Thanks
> 
Hi Leon,

Yes, I have already consider the mix case in my previous email responded to Jason.

The following table illustrates our compatibility cases.

+-----------------------+-------------------------------+
|			|	rdma-core release	|
|			+-------------------------------+
|			|   release 12	|   release 16	|
|			|     (old)	|     (new)	|
+-------+---------------+---------------+---------------+
| 	|     v4.9	|   compatible	| NOT compatible|
|kernel	|     (old)	|    (hip06)	|    (hip06)	|
|version|---------------+---------------+---------------+
|	|     v4.14	|   compatible	|  compatible	|
|	|     (new)	|    (hip06)	| (hip06/hip08)	|
+-------+---------------+---------------+---------------+

The above table illustrates that old/new kernel supports old rdma-core,
while the new rdma-core needs to update the kernel (driver) to new version.

I think this release-management strategy is reasonable.	What's your opinion?

Eason



--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yixian Liu Jan. 23, 2018, 12:51 p.m. UTC | #12
On 2018/1/21 0:46, Jason Gunthorpe wrote:
> On Sat, Jan 20, 2018 at 07:41:03PM +0800, Liuyixian (Eason) wrote:
> 
>> new userspace will still run correctly if the kernel is old. As new userspace use
>> db_addr substitute operating register, it can run correctly only on new kernel
>> (the patchset goes with this patchset submitted to for-next) and later kernel versions.
>> I think it is reasonable for this case.
> 
> Still not OK, we expect the user space to work with old kernels too.
> 

Hi Jason,

I just send an email to Leon for further illustrating my consideration
on the compatibility problem.

Could you please give me further comments on that email?  If you don't
agree with what I said in that email, can you give some suggestions
to resolve compatibility issue?

Thanks
Eason

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe Jan. 23, 2018, 3:04 p.m. UTC | #13
On Tue, Jan 23, 2018 at 08:51:09PM +0800, Liuyixian (Eason) wrote:
> Could you please give me further comments on that email?  If you don't
> agree with what I said in that email, can you give some suggestions
> to resolve compatibility issue?

You can't have a not compatible in your table.

You need to add some kind of mechanism for userspace to enable the new
features conditionally.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yixian Liu Jan. 24, 2018, 10:35 a.m. UTC | #14
On 2018/1/23 23:04, Jason Gunthorpe wrote:
> On Tue, Jan 23, 2018 at 08:51:09PM +0800, Liuyixian (Eason) wrote:
>> Could you please give me further comments on that email?  If you don't
>> agree with what I said in that email, can you give some suggestions
>> to resolve compatibility issue?
> 
> You can't have a not compatible in your table.
> 
> You need to add some kind of mechanism for userspace to enable the new
> features conditionally.
> 
> Jason
> 
Thanks, I will consider your suggestion in next version patch set.

Eason

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Leon Romanovsky Jan. 24, 2018, 11:51 a.m. UTC | #15
On Wed, Jan 24, 2018 at 06:35:20PM +0800, Liuyixian (Eason) wrote:
>
>
> On 2018/1/23 23:04, Jason Gunthorpe wrote:
> > On Tue, Jan 23, 2018 at 08:51:09PM +0800, Liuyixian (Eason) wrote:
> >> Could you please give me further comments on that email?  If you don't
> >> agree with what I said in that email, can you give some suggestions
> >> to resolve compatibility issue?
> >
> > You can't have a not compatible in your table.
> >
> > You need to add some kind of mechanism for userspace to enable the new
> > features conditionally.
> >
> > Jason
> >
> Thanks, I will consider your suggestion in next version patch set.

ABI_VERSION cam help you to achieve that.

Thanks

>
> Eason
>
Jason Gunthorpe Jan. 24, 2018, 4:30 p.m. UTC | #16
On Wed, Jan 24, 2018 at 01:51:41PM +0200, Leon Romanovsky wrote:
> On Wed, Jan 24, 2018 at 06:35:20PM +0800, Liuyixian (Eason) wrote:
> >
> >
> > On 2018/1/23 23:04, Jason Gunthorpe wrote:
> > > On Tue, Jan 23, 2018 at 08:51:09PM +0800, Liuyixian (Eason) wrote:
> > >> Could you please give me further comments on that email?  If you don't
> > >> agree with what I said in that email, can you give some suggestions
> > >> to resolve compatibility issue?
> > >
> > > You can't have a not compatible in your table.
> > >
> > > You need to add some kind of mechanism for userspace to enable the new
> > > features conditionally.
> > >
> > > Jason
> > >
> > Thanks, I will consider your suggestion in next version patch set.
> 
> ABI_VERSION cam help you to achieve that.

ABI_VERSION is mostly totally broken in verbs and breaks
compatability with. It is a last emergency resort.

Probably in this case you just need to lengthen the response udata
struct and the kernel can detect if the longer struct is provided to
enable the feature and userspace can detect !0 to determine if the
kernel supports it.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yixian Liu Jan. 25, 2018, 1:16 p.m. UTC | #17
On 2018/1/25 0:30, Jason Gunthorpe wrote:
> On Wed, Jan 24, 2018 at 01:51:41PM +0200, Leon Romanovsky wrote:
>> On Wed, Jan 24, 2018 at 06:35:20PM +0800, Liuyixian (Eason) wrote:
>>>
>>>
>>> On 2018/1/23 23:04, Jason Gunthorpe wrote:
>>>> On Tue, Jan 23, 2018 at 08:51:09PM +0800, Liuyixian (Eason) wrote:
>>>>> Could you please give me further comments on that email?  If you don't
>>>>> agree with what I said in that email, can you give some suggestions
>>>>> to resolve compatibility issue?
>>>>
>>>> You can't have a not compatible in your table.
>>>>
>>>> You need to add some kind of mechanism for userspace to enable the new
>>>> features conditionally.
>>>>
>>>> Jason
>>>>
>>> Thanks, I will consider your suggestion in next version patch set.
>>
>> ABI_VERSION cam help you to achieve that.
> 
> ABI_VERSION is mostly totally broken in verbs and breaks
> compatability with. It is a last emergency resort.
> 
> Probably in this case you just need to lengthen the response udata
> struct and the kernel can detect if the longer struct is provided to
> enable the feature and userspace can detect !0 to determine if the
> kernel supports it.
> 
> Jason

Okay, thanks both of you! I will fix the compatibility problem ASAP.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index c0e0f26..f009839 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -182,25 +182,10 @@  static void hns_roce_update_sq_db(struct hns_roce_context *ctx,
 	hns_roce_write64((uint32_t *)&sq_db, ctx, ROCEE_VF_DB_CFG0_OFFSET);
 }
 
-static void hns_roce_v2_update_cq_cons_index(struct hns_roce_context *ctx,
-					     struct hns_roce_cq *cq)
+static void hns_roce_v2_update_cq_cons_index(struct hns_roce_cq *cq)
 {
-	struct hns_roce_v2_cq_db cq_db;
-
-	cq_db.byte_4 = 0;
-	cq_db.parameter = 0;
-
-	roce_set_field(cq_db.byte_4, DB_BYTE_4_TAG_M, DB_BYTE_4_TAG_S, cq->cqn);
-	roce_set_field(cq_db.byte_4, DB_BYTE_4_CMD_M, DB_BYTE_4_CMD_S, 0x3);
-
-	roce_set_field(cq_db.parameter, CQ_DB_PARAMETER_CQ_CONSUMER_IDX_M,
-		       CQ_DB_PARAMETER_CQ_CONSUMER_IDX_S,
-		       cq->cons_index & ((cq->cq_depth << 1) - 1));
-	roce_set_field(cq_db.parameter, CQ_DB_PARAMETER_CMD_SN_M,
-		       CQ_DB_PARAMETER_CMD_SN_S, 1);
-	roce_set_bit(cq_db.parameter, CQ_DB_PARAMETER_NOTIFY_S, 0);
-
-	hns_roce_write64((uint32_t *)&cq_db, ctx, ROCEE_VF_DB_CFG0_OFFSET);
+	*cq->set_ci_db = (unsigned short)(cq->cons_index &
+					  ((cq->cq_depth << 1) - 1));
 }
 
 static struct hns_roce_qp *hns_roce_v2_find_qp(struct hns_roce_context *ctx,
@@ -450,7 +435,6 @@  static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
 	int err = V2_CQ_OK;
 	struct hns_roce_qp *qp = NULL;
 	struct hns_roce_cq *cq = to_hr_cq(ibvcq);
-	struct hns_roce_context *ctx = to_hr_ctx(ibvcq->context);
 
 	pthread_spin_lock(&cq->lock);
 
@@ -463,7 +447,7 @@  static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
 	if (npolled) {
 		mmio_ordered_writes_hack();
 
-		hns_roce_v2_update_cq_cons_index(ctx, cq);
+		hns_roce_v2_update_cq_cons_index(cq);
 	}
 
 	pthread_spin_unlock(&cq->lock);
@@ -830,7 +814,6 @@  static void __hns_roce_v2_cq_clean(struct hns_roce_cq *cq, uint32_t qpn,
 	uint32_t prod_index;
 	uint8_t owner_bit = 0;
 	struct hns_roce_v2_cqe *cqe, *dest;
-	struct hns_roce_context *ctx = to_hr_ctx(cq->ibv_cq.context);
 
 	for (prod_index = cq->cons_index; get_sw_cqe_v2(cq, prod_index);
 	     ++prod_index)
@@ -856,7 +839,7 @@  static void __hns_roce_v2_cq_clean(struct hns_roce_cq *cq, uint32_t qpn,
 	if (nfreed) {
 		cq->cons_index += nfreed;
 		udma_to_device_barrier();
-		hns_roce_v2_update_cq_cons_index(ctx, cq);
+		hns_roce_v2_update_cq_cons_index(cq);
 	}
 }
 
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index cde8568..7037a1c 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -276,6 +276,16 @@  struct ibv_cq *hns_roce_u_create_cq(struct ibv_context *context, int cqe,
 
 	cmd.buf_addr = (uintptr_t) cq->buf.buf;
 
+	if (to_hr_dev(context->device)->hw_version != HNS_ROCE_HW_VER1) {
+		cq->set_ci_db = hns_roce_alloc_db(to_hr_ctx(context),
+						  HNS_ROCE_CQ_TYPE_DB);
+		if (!cq->set_ci_db) {
+			fprintf(stderr, "alloc cq db buffer failed!\n");
+			goto err_buf;
+		}
+		cmd.db_addr  = (uintptr_t) cq->set_ci_db;
+	}
+
 	ret = ibv_cmd_create_cq(context, cqe, channel, comp_vector,
 				&cq->ibv_cq, &cmd.ibv_cmd, sizeof(cmd),
 				&resp.ibv_resp, sizeof(resp));
@@ -287,9 +297,6 @@  struct ibv_cq *hns_roce_u_create_cq(struct ibv_context *context, int cqe,
 
 	if (to_hr_dev(context->device)->hw_version == HNS_ROCE_HW_VER1)
 		cq->set_ci_db = to_hr_ctx(context)->cq_tptr_base + cq->cqn * 2;
-	else
-		cq->set_ci_db = to_hr_ctx(context)->uar +
-				ROCEE_VF_DB_CFG0_OFFSET;
 
 	cq->arm_db    = cq->set_ci_db;
 	cq->arm_sn    = 1;
@@ -299,6 +306,11 @@  struct ibv_cq *hns_roce_u_create_cq(struct ibv_context *context, int cqe,
 	return &cq->ibv_cq;
 
 err_db:
+	if (to_hr_dev(context->device)->hw_version != HNS_ROCE_HW_VER1)
+		hns_roce_free_db(to_hr_ctx(context), cq->set_ci_db,
+				 HNS_ROCE_CQ_TYPE_DB);
+
+err_buf:
 	hns_roce_free_buf(&cq->buf);
 
 err:
@@ -320,6 +332,9 @@  int hns_roce_u_destroy_cq(struct ibv_cq *cq)
 	if (ret)
 		return ret;
 
+	if (to_hr_dev(cq->context->device)->hw_version != HNS_ROCE_HW_VER1)
+		hns_roce_free_db(to_hr_ctx(cq->context),
+				 to_hr_cq(cq)->set_ci_db, HNS_ROCE_CQ_TYPE_DB);
 	hns_roce_free_buf(&to_hr_cq(cq)->buf);
 	free(to_hr_cq(cq));