diff mbox

[PATCHv2,1/1] IB/rxe: optimize the recv process

Message ID 1521429652-25352-1-git-send-email-yanjun.zhu@oracle.com (mailing list archive)
State Changes Requested
Headers show

Commit Message

Zhu Yanjun March 19, 2018, 3:20 a.m. UTC
In mcast recv process, the function skb_clone is used. In fact,
the refcount can be increased to replace cloning a new skb since
the original skb will not be modified before it is freed.

This can make the performance better and save the memory.

CC: Srinivas Eeda <srinivas.eeda@oracle.com>
CC: Junxiao Bi <junxiao.bi@oracle.com>
Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
---
V1->V2: Following Yuval's advice, skb test is removed.
---
 drivers/infiniband/sw/rxe/rxe_recv.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

Comments

Yuval Shaia March 19, 2018, 7:25 a.m. UTC | #1
On Sun, Mar 18, 2018 at 11:20:52PM -0400, Zhu Yanjun wrote:
> In mcast recv process, the function skb_clone is used. In fact,
> the refcount can be increased to replace cloning a new skb since
> the original skb will not be modified before it is freed.

Even in the recv flow?

> 
> This can make the performance better and save the memory.
> 
> CC: Srinivas Eeda <srinivas.eeda@oracle.com>
> CC: Junxiao Bi <junxiao.bi@oracle.com>
> Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
> ---
> V1->V2: Following Yuval's advice, skb test is removed.
> ---
>  drivers/infiniband/sw/rxe/rxe_recv.c | 16 +++++++++-------
>  1 file changed, 9 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
> index 4c3f899..e0e4202 100644
> --- a/drivers/infiniband/sw/rxe/rxe_recv.c
> +++ b/drivers/infiniband/sw/rxe/rxe_recv.c
> @@ -276,7 +276,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
>  {
>  	struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
>  	struct rxe_mc_grp *mcg;
> -	struct sk_buff *skb_copy;
> +	struct sk_buff *skb_reference;
>  	struct rxe_mc_elem *mce;
>  	struct rxe_qp *qp;
>  	union ib_gid dgid;
> @@ -309,16 +309,19 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
>  			continue;
>  
>  		/* if *not* the last qp in the list
> -		 * make a copy of the skb to post to the next qp
> +		 * make a reference of the skb to post to the next qp
>  		 */
> -		skb_copy = (mce->qp_list.next != &mcg->qp_list) ?
> -				skb_clone(skb, GFP_ATOMIC) : NULL;
> +		skb_reference = NULL;
> +		if (mce->qp_list.next != &mcg->qp_list) {
> +			skb_reference = skb;
> +			refcount_inc(&skb->users);
> +		}

My question from v1 remains, i don't see why we need this temp variable
(skb_clone or skb_reference).
As i see it (unless i'm wrong), code can be something like:

	if (mce->qp_list.next != &mcg->qp_list)
		refcount_inc(&skb->users);

Then do the recv and skb remains as it was since we didn't cloned, just inc
ref.

>  
>  		pkt->qp = qp;
>  		rxe_add_ref(qp);
>  		rxe_rcv_pkt(rxe, pkt, skb);
>  
> -		skb = skb_copy;
> +		skb = skb_reference;
>  		if (!skb)
>  			break;

Also, this check i believe covers only the case where skb_clone fails to
allocate memory since the exit criteria (loop on QP list) is enough and
because we no longer have skb_clone then skb **can never** be NULL.

>  	}
> @@ -328,8 +331,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
>  	rxe_drop_ref(mcg);	/* drop ref from rxe_pool_get_key. */
>  
>  err1:
> -	if (skb)
> -		kfree_skb(skb);
> +	kfree_skb(skb);
>  }
>  
>  static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb)
> -- 
> 2.7.4
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
yonatan cohen March 19, 2018, 12:29 p.m. UTC | #2
On 3/19/2018 9:25 AM, Yuval Shaia wrote:
> On Sun, Mar 18, 2018 at 11:20:52PM -0400, Zhu Yanjun wrote:
>> In mcast recv process, the function skb_clone is used. In fact,
>> the refcount can be increased to replace cloning a new skb since
>> the original skb will not be modified before it is freed.
> 
> Even in the recv flow?
> 
>>
>> This can make the performance better and save the memory.
>>
>> CC: Srinivas Eeda <srinivas.eeda@oracle.com>
>> CC: Junxiao Bi <junxiao.bi@oracle.com>
>> Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
>> ---
>> V1->V2: Following Yuval's advice, skb test is removed.
>> ---
>>   drivers/infiniband/sw/rxe/rxe_recv.c | 16 +++++++++-------
>>   1 file changed, 9 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
>> index 4c3f899..e0e4202 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_recv.c
>> +++ b/drivers/infiniband/sw/rxe/rxe_recv.c
>> @@ -276,7 +276,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
>>   {
>>   	struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
>>   	struct rxe_mc_grp *mcg;
>> -	struct sk_buff *skb_copy;
>> +	struct sk_buff *skb_reference;
>>   	struct rxe_mc_elem *mce;
>>   	struct rxe_qp *qp;
>>   	union ib_gid dgid;
>> @@ -309,16 +309,19 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
>>   			continue;
>>   
>>   		/* if *not* the last qp in the list
>> -		 * make a copy of the skb to post to the next qp
>> +		 * make a reference of the skb to post to the next qp
>>   		 */
>> -		skb_copy = (mce->qp_list.next != &mcg->qp_list) ?
>> -				skb_clone(skb, GFP_ATOMIC) : NULL;
>> +		skb_reference = NULL;
>> +		if (mce->qp_list.next != &mcg->qp_list) {
>> +			skb_reference = skb;
>> +			refcount_inc(&skb->users);
>> +		}
> 
> My question from v1 remains, i don't see why we need this temp variable
> (skb_clone or skb_reference).
> As i see it (unless i'm wrong), code can be something like:
> 
> 	if (mce->qp_list.next != &mcg->qp_list)
> 		refcount_inc(&skb->users);
> 
> Then do the recv and skb remains as it was since we didn't cloned, just inc
> ref.
agreed. then we break from the loop when the qp list ends.
> 
>>   
>>   		pkt->qp = qp;
>>   		rxe_add_ref(qp);
>>   		rxe_rcv_pkt(rxe, pkt, skb);
>>   
>> -		skb = skb_copy;
>> +		skb = skb_reference;
>>   		if (!skb)
>>   			break;
> 
> Also, this check i believe covers only the case where skb_clone fails to
> allocate memory since the exit criteria (loop on QP list) is enough and
> because we no longer have skb_clone then skb **can never** be NULL.
agree here also. it can not be NULL although i dont see any harm in 
making sure that it isnt NULL.
> 
>>   	}
>> @@ -328,8 +331,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
>>   	rxe_drop_ref(mcg);	/* drop ref from rxe_pool_get_key. */
>>   
>>   err1:
>> -	if (skb)
>> -		kfree_skb(skb);
>> +	kfree_skb(skb);
>>   }
>>   
>>   static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb)
>> -- 
>> 2.7.4
>>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhu Yanjun March 20, 2018, 7:04 a.m. UTC | #3
On 2018/3/19 20:29, Yonatan Cohen wrote:
> On 3/19/2018 9:25 AM, Yuval Shaia wrote:
>> On Sun, Mar 18, 2018 at 11:20:52PM -0400, Zhu Yanjun wrote:
>>> In mcast recv process, the function skb_clone is used. In fact,
>>> the refcount can be increased to replace cloning a new skb since
>>> the original skb will not be modified before it is freed.
>>
>> Even in the recv flow?
>>
>>>
>>> This can make the performance better and save the memory.
>>>
>>> CC: Srinivas Eeda <srinivas.eeda@oracle.com>
>>> CC: Junxiao Bi <junxiao.bi@oracle.com>
>>> Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
>>> ---
>>> V1->V2: Following Yuval's advice, skb test is removed.
>>> ---
>>>   drivers/infiniband/sw/rxe/rxe_recv.c | 16 +++++++++-------
>>>   1 file changed, 9 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c 
>>> b/drivers/infiniband/sw/rxe/rxe_recv.c
>>> index 4c3f899..e0e4202 100644
>>> --- a/drivers/infiniband/sw/rxe/rxe_recv.c
>>> +++ b/drivers/infiniband/sw/rxe/rxe_recv.c
>>> @@ -276,7 +276,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev 
>>> *rxe, struct sk_buff *skb)
>>>   {
>>>       struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
>>>       struct rxe_mc_grp *mcg;
>>> -    struct sk_buff *skb_copy;
>>> +    struct sk_buff *skb_reference;
>>>       struct rxe_mc_elem *mce;
>>>       struct rxe_qp *qp;
>>>       union ib_gid dgid;
>>> @@ -309,16 +309,19 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev 
>>> *rxe, struct sk_buff *skb)
>>>               continue;
>>>             /* if *not* the last qp in the list
>>> -         * make a copy of the skb to post to the next qp
>>> +         * make a reference of the skb to post to the next qp
>>>            */
>>> -        skb_copy = (mce->qp_list.next != &mcg->qp_list) ?
>>> -                skb_clone(skb, GFP_ATOMIC) : NULL;
>>> +        skb_reference = NULL;
>>> +        if (mce->qp_list.next != &mcg->qp_list) {
>>> +            skb_reference = skb;
>>> +            refcount_inc(&skb->users);
>>> +        }
>>
>> My question from v1 remains, i don't see why we need this temp variable
>> (skb_clone or skb_reference).
>> As i see it (unless i'm wrong), code can be something like:
>>
>>     if (mce->qp_list.next != &mcg->qp_list)
>>         refcount_inc(&skb->users);
>>
>> Then do the recv and skb remains as it was since we didn't cloned, 
>> just inc
>> ref.
> agreed. then we break from the loop when the qp list ends.
Thanks a lot. I will make a new patch very soon.

Zhu Yanjun
>>
>>>             pkt->qp = qp;
>>>           rxe_add_ref(qp);
>>>           rxe_rcv_pkt(rxe, pkt, skb);
>>>   -        skb = skb_copy;
>>> +        skb = skb_reference;
>>>           if (!skb)
>>>               break;
>>
>> Also, this check i believe covers only the case where skb_clone fails to
>> allocate memory since the exit criteria (loop on QP list) is enough and
>> because we no longer have skb_clone then skb **can never** be NULL.
> agree here also. it can not be NULL although i dont see any harm in 
> making sure that it isnt NULL.
>>
>>>       }
>>> @@ -328,8 +331,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev 
>>> *rxe, struct sk_buff *skb)
>>>       rxe_drop_ref(mcg);    /* drop ref from rxe_pool_get_key. */
>>>     err1:
>>> -    if (skb)
>>> -        kfree_skb(skb);
>>> +    kfree_skb(skb);
>>>   }
>>>     static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb)
>>> -- 
>>> 2.7.4
>>>
>> -- 
>> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>>
>
>

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index 4c3f899..e0e4202 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -276,7 +276,7 @@  static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
 {
 	struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
 	struct rxe_mc_grp *mcg;
-	struct sk_buff *skb_copy;
+	struct sk_buff *skb_reference;
 	struct rxe_mc_elem *mce;
 	struct rxe_qp *qp;
 	union ib_gid dgid;
@@ -309,16 +309,19 @@  static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
 			continue;
 
 		/* if *not* the last qp in the list
-		 * make a copy of the skb to post to the next qp
+		 * make a reference of the skb to post to the next qp
 		 */
-		skb_copy = (mce->qp_list.next != &mcg->qp_list) ?
-				skb_clone(skb, GFP_ATOMIC) : NULL;
+		skb_reference = NULL;
+		if (mce->qp_list.next != &mcg->qp_list) {
+			skb_reference = skb;
+			refcount_inc(&skb->users);
+		}
 
 		pkt->qp = qp;
 		rxe_add_ref(qp);
 		rxe_rcv_pkt(rxe, pkt, skb);
 
-		skb = skb_copy;
+		skb = skb_reference;
 		if (!skb)
 			break;
 	}
@@ -328,8 +331,7 @@  static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
 	rxe_drop_ref(mcg);	/* drop ref from rxe_pool_get_key. */
 
 err1:
-	if (skb)
-		kfree_skb(skb);
+	kfree_skb(skb);
 }
 
 static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb)