diff mbox series

ksmbd: fix missing RDMA-capable flag for IPoIB device in ksmbd_rdma_capable_netdev()

Message ID 20231015144536.9100-1-linkinjeon@kernel.org (mailing list archive)
State New, archived
Headers show
Series ksmbd: fix missing RDMA-capable flag for IPoIB device in ksmbd_rdma_capable_netdev() | expand

Commit Message

Namjae Jeon Oct. 15, 2023, 2:45 p.m. UTC
From: Kangjing Huang <huangkangjing@gmail.com>

Physical ib_device does not have an underlying net_device, thus its
association with IPoIB net_device cannot be retrieved via
ops.get_netdev() or ib_device_get_by_netdev(). ksmbd reads physical
ib_device port GUID from the lower 16 bytes of the hardware addresses on
IPoIB net_device and match its underlying ib_device using ib_find_gid()

Signed-off-by: Kangjing Huang <huangkangjing@gmail.com>
Acked-by: Namjae Jeon <linkinjeon@kernel.org>
---
 fs/smb/server/transport_rdma.c | 39 +++++++++++++++++++++++++---------
 1 file changed, 29 insertions(+), 10 deletions(-)

Comments

Tom Talpey Oct. 17, 2023, 2:06 p.m. UTC | #1
On 10/15/2023 10:45 AM, Namjae Jeon wrote:
> From: Kangjing Huang <huangkangjing@gmail.com>
> 
> Physical ib_device does not have an underlying net_device, thus its
> association with IPoIB net_device cannot be retrieved via
> ops.get_netdev() or ib_device_get_by_netdev(). ksmbd reads physical
> ib_device port GUID from the lower 16 bytes of the hardware addresses on
> IPoIB net_device and match its underlying ib_device using ib_find_gid()
> 
> Signed-off-by: Kangjing Huang <huangkangjing@gmail.com>
> Acked-by: Namjae Jeon <linkinjeon@kernel.org>
> ---
>   fs/smb/server/transport_rdma.c | 39 +++++++++++++++++++++++++---------
>   1 file changed, 29 insertions(+), 10 deletions(-)
> 
> diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c
> index 3b269e1f523a..a82131f7dd83 100644
> --- a/fs/smb/server/transport_rdma.c
> +++ b/fs/smb/server/transport_rdma.c
> @@ -2140,8 +2140,7 @@ static int smb_direct_ib_client_add(struct ib_device *ib_dev)
>   	if (ib_dev->node_type != RDMA_NODE_IB_CA)
>   		smb_direct_port = SMB_DIRECT_PORT_IWARP;
>   
> -	if (!ib_dev->ops.get_netdev ||
> -	    !rdma_frwr_is_supported(&ib_dev->attrs))
> +	if (!rdma_frwr_is_supported(&ib_dev->attrs))
>   		return 0;
>   
>   	smb_dev = kzalloc(sizeof(*smb_dev), GFP_KERNEL);
> @@ -2241,17 +2240,37 @@ bool ksmbd_rdma_capable_netdev(struct net_device *netdev)
>   		for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) {
>   			struct net_device *ndev;
>   
> -			ndev = smb_dev->ib_dev->ops.get_netdev(smb_dev->ib_dev,
> -							       i + 1);
> -			if (!ndev)
> -				continue;
> +			/* RoCE and iWRAP ib_dev is backed by a netdev */
> +			if (smb_dev->ib_dev->ops.get_netdev) {

The "IWRAP" is a typo, but IMO the comment is misleading. This is simply
looking up the target netdev, it's not limited to these two rdma types.
I suggest deleting the comment.

> +				ndev = smb_dev->ib_dev->ops.get_netdev(
> +					smb_dev->ib_dev, i + 1);
> +				if (!ndev)
> +					continue;
>   
> -			if (ndev == netdev) {
> +				if (ndev == netdev) {
> +					dev_put(ndev);
> +					rdma_capable = true;
> +					goto out;
> +				}
>   				dev_put(ndev);

Why not move this dev_put up above the if (ndev == netdev) test? It's
needed in both cases, so it's confusing to have two copies.

> -				rdma_capable = true;
> -				goto out;
> +			/* match physical ib_dev with IPoIB netdev by GUID */

Add more information to this comment, perhaps:

   /* if no exact netdev match, check for matching Infiniband GUID */

> +			} else if (netdev->type == ARPHRD_INFINIBAND) {
> +				struct netdev_hw_addr *ha;
> +				union ib_gid gid;
> +				u32 port_num;
> +				int ret;
> +
> +				netdev_hw_addr_list_for_each(
> +					ha, &netdev->dev_addrs) {
> +					memcpy(&gid, ha->addr + 4, sizeof(gid));
> +					ret = ib_find_gid(smb_dev->ib_dev, &gid,
> +							  &port_num, NULL);
> +					if (!ret) {
> +						rdma_capable = true;
> +						goto out;

Won't this leak the ndev? It needs a dev_put(ndev) before breaking
the loop, too, right?

> +					}
> +				}
>   			}
> -			dev_put(ndev);
>   		}
>   	}
>   out:
Kangjing Huang Oct. 19, 2023, 6:01 a.m. UTC | #2
Thank you for the review. Also thanks a lot to Namjae for submitting
this patch on my behalf.


On Tue, Oct 17, 2023 at 10:07 AM Tom Talpey <tom@talpey.com> wrote:
>
> On 10/15/2023 10:45 AM, Namjae Jeon wrote:
> > From: Kangjing Huang <huangkangjing@gmail.com>
> >
> > Physical ib_device does not have an underlying net_device, thus its
> > association with IPoIB net_device cannot be retrieved via
> > ops.get_netdev() or ib_device_get_by_netdev(). ksmbd reads physical
> > ib_device port GUID from the lower 16 bytes of the hardware addresses on
> > IPoIB net_device and match its underlying ib_device using ib_find_gid()
> >
> > Signed-off-by: Kangjing Huang <huangkangjing@gmail.com>
> > Acked-by: Namjae Jeon <linkinjeon@kernel.org>
> > ---
> >   fs/smb/server/transport_rdma.c | 39 +++++++++++++++++++++++++---------
> >   1 file changed, 29 insertions(+), 10 deletions(-)
> >
> > diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c
> > index 3b269e1f523a..a82131f7dd83 100644
> > --- a/fs/smb/server/transport_rdma.c
> > +++ b/fs/smb/server/transport_rdma.c
> > @@ -2140,8 +2140,7 @@ static int smb_direct_ib_client_add(struct ib_device *ib_dev)
> >       if (ib_dev->node_type != RDMA_NODE_IB_CA)
> >               smb_direct_port = SMB_DIRECT_PORT_IWARP;
> >
> > -     if (!ib_dev->ops.get_netdev ||
> > -         !rdma_frwr_is_supported(&ib_dev->attrs))
> > +     if (!rdma_frwr_is_supported(&ib_dev->attrs))
> >               return 0;
> >
> >       smb_dev = kzalloc(sizeof(*smb_dev), GFP_KERNEL);
> > @@ -2241,17 +2240,37 @@ bool ksmbd_rdma_capable_netdev(struct net_device *netdev)
> >               for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) {
> >                       struct net_device *ndev;
> >
> > -                     ndev = smb_dev->ib_dev->ops.get_netdev(smb_dev->ib_dev,
> > -                                                            i + 1);
> > -                     if (!ndev)
> > -                             continue;
> > +                     /* RoCE and iWRAP ib_dev is backed by a netdev */
> > +                     if (smb_dev->ib_dev->ops.get_netdev) {
>
> The "IWRAP" is a typo, but IMO the comment is misleading. This is simply
> looking up the target netdev, it's not limited to these two rdma types.
> I suggest deleting the comment.
>

I agree with this point and will update this comment in version 2 of the patch.

> > +                             ndev = smb_dev->ib_dev->ops.get_netdev(
> > +                                     smb_dev->ib_dev, i + 1);
> > +                             if (!ndev)
> > +                                     continue;
> >
> > -                     if (ndev == netdev) {
> > +                             if (ndev == netdev) {
> > +                                     dev_put(ndev);
> > +                                     rdma_capable = true;
> > +                                     goto out;
> > +                             }
> >                               dev_put(ndev);
>
> Why not move this dev_put up above the if (ndev == netdev) test? It's
> needed in both cases, so it's confusing to have two copies.

I do agree that these two puts are very confusing. However, this code
structure comes from the original ksmbd_rdma_capable_netdev() and
this patch only indents it one more level and puts it in the if test for
get_netdev.

Also, while two puts look very weird, IMO putting it before the if statement
is equally unintuitive as well since that would be testing the pointer after its
reference is released. Although since no de-reference is happening here, it
might be fine. Please confirm this is good coding-style-wise and I will include
this change in v2 of the patch.

>
> > -                             rdma_capable = true;
> > -                             goto out;
> > +                     /* match physical ib_dev with IPoIB netdev by GUID */
>
> Add more information to this comment, perhaps:
>
>    /* if no exact netdev match, check for matching Infiniband GUID */
>

Fair point, will update this comment in v2.

> > +                     } else if (netdev->type == ARPHRD_INFINIBAND) {
> > +                             struct netdev_hw_addr *ha;
> > +                             union ib_gid gid;
> > +                             u32 port_num;
> > +                             int ret;
> > +
> > +                             netdev_hw_addr_list_for_each(
> > +                                     ha, &netdev->dev_addrs) {
> > +                                     memcpy(&gid, ha->addr + 4, sizeof(gid));
> > +                                     ret = ib_find_gid(smb_dev->ib_dev, &gid,
> > +                                                       &port_num, NULL);
> > +                                     if (!ret) {
> > +                                             rdma_capable = true;
> > +                                             goto out;
>
> Won't this leak the ndev? It needs a dev_put(ndev) before breaking
> the loop, too, right?
>

This will not leak the ndev. Please look closer, this else branch matches with
the if test on the existence of ops.get_netdev of the current ib_dev. And ndev
is acquired only through that op. In the else branch, ndev is just not acquired.
The original code was indented one more level here so the patch might look
a bit confusing, but one of the if before this block is a deleted(-) line.

> > +                                     }
> > +                             }
> >                       }
> > -                     dev_put(ndev);
> >               }
> >       }
> >   out:



--
Kangjing "Chaser" Huang
Namjae Jeon Oct. 19, 2023, 10:37 p.m. UTC | #3
2023-10-19 15:01 GMT+09:00, Kangjing (Chaser) Huang <huangkangjing@gmail.com>:
> Thank you for the review. Also thanks a lot to Namjae for submitting
> this patch on my behalf.
>
>
> On Tue, Oct 17, 2023 at 10:07 AM Tom Talpey <tom@talpey.com> wrote:
>>
>> On 10/15/2023 10:45 AM, Namjae Jeon wrote:
>> > From: Kangjing Huang <huangkangjing@gmail.com>
>> >
>> > Physical ib_device does not have an underlying net_device, thus its
>> > association with IPoIB net_device cannot be retrieved via
>> > ops.get_netdev() or ib_device_get_by_netdev(). ksmbd reads physical
>> > ib_device port GUID from the lower 16 bytes of the hardware addresses
>> > on
>> > IPoIB net_device and match its underlying ib_device using ib_find_gid()
>> >
>> > Signed-off-by: Kangjing Huang <huangkangjing@gmail.com>
>> > Acked-by: Namjae Jeon <linkinjeon@kernel.org>
>> > ---
>> >   fs/smb/server/transport_rdma.c | 39
>> > +++++++++++++++++++++++++---------
>> >   1 file changed, 29 insertions(+), 10 deletions(-)
>> >
>> > diff --git a/fs/smb/server/transport_rdma.c
>> > b/fs/smb/server/transport_rdma.c
>> > index 3b269e1f523a..a82131f7dd83 100644
>> > --- a/fs/smb/server/transport_rdma.c
>> > +++ b/fs/smb/server/transport_rdma.c
>> > @@ -2140,8 +2140,7 @@ static int smb_direct_ib_client_add(struct
>> > ib_device *ib_dev)
>> >       if (ib_dev->node_type != RDMA_NODE_IB_CA)
>> >               smb_direct_port = SMB_DIRECT_PORT_IWARP;
>> >
>> > -     if (!ib_dev->ops.get_netdev ||
>> > -         !rdma_frwr_is_supported(&ib_dev->attrs))
>> > +     if (!rdma_frwr_is_supported(&ib_dev->attrs))
>> >               return 0;
>> >
>> >       smb_dev = kzalloc(sizeof(*smb_dev), GFP_KERNEL);
>> > @@ -2241,17 +2240,37 @@ bool ksmbd_rdma_capable_netdev(struct net_device
>> > *netdev)
>> >               for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) {
>> >                       struct net_device *ndev;
>> >
>> > -                     ndev =
>> > smb_dev->ib_dev->ops.get_netdev(smb_dev->ib_dev,
>> > -                                                            i + 1);
>> > -                     if (!ndev)
>> > -                             continue;
>> > +                     /* RoCE and iWRAP ib_dev is backed by a netdev */
>> > +                     if (smb_dev->ib_dev->ops.get_netdev) {
>>
>> The "IWRAP" is a typo, but IMO the comment is misleading. This is simply
>> looking up the target netdev, it's not limited to these two rdma types.
>> I suggest deleting the comment.
>>
>
> I agree with this point and will update this comment in version 2 of the
> patch.
>
>> > +                             ndev = smb_dev->ib_dev->ops.get_netdev(
>> > +                                     smb_dev->ib_dev, i + 1);
>> > +                             if (!ndev)
>> > +                                     continue;
>> >
>> > -                     if (ndev == netdev) {
>> > +                             if (ndev == netdev) {
>> > +                                     dev_put(ndev);
>> > +                                     rdma_capable = true;
>> > +                                     goto out;
>> > +                             }
>> >                               dev_put(ndev);
>>
>> Why not move this dev_put up above the if (ndev == netdev) test? It's
>> needed in both cases, so it's confusing to have two copies.
>
> I do agree that these two puts are very confusing. However, this code
> structure comes from the original ksmbd_rdma_capable_netdev() and
> this patch only indents it one more level and puts it in the if test for
> get_netdev.
>
> Also, while two puts look very weird, IMO putting it before the if
> statement
> is equally unintuitive as well since that would be testing the pointer after
> its
> reference is released. Although since no de-reference is happening here, it
> might be fine. Please confirm this is good coding-style-wise and I will
> include
> this change in v2 of the patch.
There is no need to update code that does not have problems.
>
>>
>> > -                             rdma_capable = true;
>> > -                             goto out;
>> > +                     /* match physical ib_dev with IPoIB netdev by GUID
>> > */
>>
>> Add more information to this comment, perhaps:
>>
>>    /* if no exact netdev match, check for matching Infiniband GUID */
>>
>
> Fair point, will update this comment in v2.
>
>> > +                     } else if (netdev->type == ARPHRD_INFINIBAND) {
>> > +                             struct netdev_hw_addr *ha;
>> > +                             union ib_gid gid;
>> > +                             u32 port_num;
>> > +                             int ret;
>> > +
>> > +                             netdev_hw_addr_list_for_each(
>> > +                                     ha, &netdev->dev_addrs) {
>> > +                                     memcpy(&gid, ha->addr + 4,
>> > sizeof(gid));
>> > +                                     ret = ib_find_gid(smb_dev->ib_dev,
>> > &gid,
>> > +                                                       &port_num,
>> > NULL);
>> > +                                     if (!ret) {
>> > +                                             rdma_capable = true;
>> > +                                             goto out;
>>
>> Won't this leak the ndev? It needs a dev_put(ndev) before breaking
>> the loop, too, right?
>>
>
> This will not leak the ndev. Please look closer, this else branch matches
> with
> the if test on the existence of ops.get_netdev of the current ib_dev. And
> ndev
> is acquired only through that op. In the else branch, ndev is just not
> acquired.
> The original code was indented one more level here so the patch might look
> a bit confusing, but one of the if before this block is a deleted(-) line.
You're right.

Please send v2 patch after adding comments that Tom pointed out.

Thanks!
>
>> > +                                     }
>> > +                             }
>> >                       }
>> > -                     dev_put(ndev);
>> >               }
>> >       }
>> >   out:
>
>
>
> --
> Kangjing "Chaser" Huang
>
Tom Talpey Oct. 20, 2023, 1:14 a.m. UTC | #4
On 10/19/2023 6:37 PM, Namjae Jeon wrote:
> 2023-10-19 15:01 GMT+09:00, Kangjing (Chaser) Huang <huangkangjing@gmail.com>:
>> Thank you for the review. Also thanks a lot to Namjae for submitting
>> this patch on my behalf.
>>
>>
>> On Tue, Oct 17, 2023 at 10:07 AM Tom Talpey <tom@talpey.com> wrote:
>>>
>>> On 10/15/2023 10:45 AM, Namjae Jeon wrote:
>>>> From: Kangjing Huang <huangkangjing@gmail.com>
>>>>
>>>> Physical ib_device does not have an underlying net_device, thus its
>>>> association with IPoIB net_device cannot be retrieved via
>>>> ops.get_netdev() or ib_device_get_by_netdev(). ksmbd reads physical
>>>> ib_device port GUID from the lower 16 bytes of the hardware addresses
>>>> on
>>>> IPoIB net_device and match its underlying ib_device using ib_find_gid()
>>>>
>>>> Signed-off-by: Kangjing Huang <huangkangjing@gmail.com>
>>>> Acked-by: Namjae Jeon <linkinjeon@kernel.org>
>>>> ---
>>>>    fs/smb/server/transport_rdma.c | 39
>>>> +++++++++++++++++++++++++---------
>>>>    1 file changed, 29 insertions(+), 10 deletions(-)
>>>>
>>>> diff --git a/fs/smb/server/transport_rdma.c
>>>> b/fs/smb/server/transport_rdma.c
>>>> index 3b269e1f523a..a82131f7dd83 100644
>>>> --- a/fs/smb/server/transport_rdma.c
>>>> +++ b/fs/smb/server/transport_rdma.c
>>>> @@ -2140,8 +2140,7 @@ static int smb_direct_ib_client_add(struct
>>>> ib_device *ib_dev)
>>>>        if (ib_dev->node_type != RDMA_NODE_IB_CA)
>>>>                smb_direct_port = SMB_DIRECT_PORT_IWARP;
>>>>
>>>> -     if (!ib_dev->ops.get_netdev ||
>>>> -         !rdma_frwr_is_supported(&ib_dev->attrs))
>>>> +     if (!rdma_frwr_is_supported(&ib_dev->attrs))
>>>>                return 0;
>>>>
>>>>        smb_dev = kzalloc(sizeof(*smb_dev), GFP_KERNEL);
>>>> @@ -2241,17 +2240,37 @@ bool ksmbd_rdma_capable_netdev(struct net_device
>>>> *netdev)
>>>>                for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) {
>>>>                        struct net_device *ndev;
>>>>
>>>> -                     ndev =
>>>> smb_dev->ib_dev->ops.get_netdev(smb_dev->ib_dev,
>>>> -                                                            i + 1);
>>>> -                     if (!ndev)
>>>> -                             continue;
>>>> +                     /* RoCE and iWRAP ib_dev is backed by a netdev */
>>>> +                     if (smb_dev->ib_dev->ops.get_netdev) {
>>>
>>> The "IWRAP" is a typo, but IMO the comment is misleading. This is simply
>>> looking up the target netdev, it's not limited to these two rdma types.
>>> I suggest deleting the comment.
>>>
>>
>> I agree with this point and will update this comment in version 2 of the
>> patch.
>>
>>>> +                             ndev = smb_dev->ib_dev->ops.get_netdev(
>>>> +                                     smb_dev->ib_dev, i + 1);
>>>> +                             if (!ndev)
>>>> +                                     continue;
>>>>
>>>> -                     if (ndev == netdev) {
>>>> +                             if (ndev == netdev) {
>>>> +                                     dev_put(ndev);
>>>> +                                     rdma_capable = true;
>>>> +                                     goto out;
>>>> +                             }
>>>>                                dev_put(ndev);
>>>
>>> Why not move this dev_put up above the if (ndev == netdev) test? It's
>>> needed in both cases, so it's confusing to have two copies.
>>
>> I do agree that these two puts are very confusing. However, this code
>> structure comes from the original ksmbd_rdma_capable_netdev() and
>> this patch only indents it one more level and puts it in the if test for
>> get_netdev.
>>
>> Also, while two puts look very weird, IMO putting it before the if
>> statement
>> is equally unintuitive as well since that would be testing the pointer after
>> its
>> reference is released. Although since no de-reference is happening here, it
>> might be fine. Please confirm this is good coding-style-wise and I will
>> include
>> this change in v2 of the patch.
> There is no need to update code that does not have problems.

Well, there are now at least half a dozen stanzas of
	dev_put
	rdma_capable = <T|f>
	goto out

and I don't see why these can't be simplified to

	goto out_capable|out_notcapable

It woud be a lot clearer, at least to this reviewer. And more reliable
to maintain.

>>
>>>
>>>> -                             rdma_capable = true;
>>>> -                             goto out;
>>>> +                     /* match physical ib_dev with IPoIB netdev by GUID
>>>> */
>>>
>>> Add more information to this comment, perhaps:
>>>
>>>     /* if no exact netdev match, check for matching Infiniband GUID */
>>>
>>
>> Fair point, will update this comment in v2.
>>
>>>> +                     } else if (netdev->type == ARPHRD_INFINIBAND) {
>>>> +                             struct netdev_hw_addr *ha;
>>>> +                             union ib_gid gid;
>>>> +                             u32 port_num;
>>>> +                             int ret;
>>>> +
>>>> +                             netdev_hw_addr_list_for_each(
>>>> +                                     ha, &netdev->dev_addrs) {
>>>> +                                     memcpy(&gid, ha->addr + 4,
>>>> sizeof(gid));
>>>> +                                     ret = ib_find_gid(smb_dev->ib_dev,
>>>> &gid,
>>>> +                                                       &port_num,
>>>> NULL);
>>>> +                                     if (!ret) {
>>>> +                                             rdma_capable = true;
>>>> +                                             goto out;
>>>
>>> Won't this leak the ndev? It needs a dev_put(ndev) before breaking
>>> the loop, too, right?
>>>
>>
>> This will not leak the ndev. Please look closer, this else branch matches
>> with
>> the if test on the existence of ops.get_netdev of the current ib_dev. And
>> ndev
>> is acquired only through that op. In the else branch, ndev is just not
>> acquired.
>> The original code was indented one more level here so the patch might look
>> a bit confusing, but one of the if before this block is a deleted(-) line.
> You're right.

Ok, so I repeat my comment above!

Tom.

> 
> Please send v2 patch after adding comments that Tom pointed out.
> 
> Thanks!
>>
>>>> +                                     }
>>>> +                             }
>>>>                        }
>>>> -                     dev_put(ndev);
>>>>                }
>>>>        }
>>>>    out:
>>
>>
>>
>> --
>> Kangjing "Chaser" Huang
>>
>
Kangjing Huang Oct. 20, 2023, 11:38 a.m. UTC | #5
On Thu, Oct 19, 2023 at 9:14 PM Tom Talpey <tom@talpey.com> wrote:
>
> On 10/19/2023 6:37 PM, Namjae Jeon wrote:
> > 2023-10-19 15:01 GMT+09:00, Kangjing (Chaser) Huang <huangkangjing@gmail.com>:
> >> Thank you for the review. Also thanks a lot to Namjae for submitting
> >> this patch on my behalf.
> >>
> >>
> >> On Tue, Oct 17, 2023 at 10:07 AM Tom Talpey <tom@talpey.com> wrote:
> >>>
> >>> On 10/15/2023 10:45 AM, Namjae Jeon wrote:
> >>>> From: Kangjing Huang <huangkangjing@gmail.com>
> >>>>
> >>>> Physical ib_device does not have an underlying net_device, thus its
> >>>> association with IPoIB net_device cannot be retrieved via
> >>>> ops.get_netdev() or ib_device_get_by_netdev(). ksmbd reads physical
> >>>> ib_device port GUID from the lower 16 bytes of the hardware addresses
> >>>> on
> >>>> IPoIB net_device and match its underlying ib_device using ib_find_gid()
> >>>>
> >>>> Signed-off-by: Kangjing Huang <huangkangjing@gmail.com>
> >>>> Acked-by: Namjae Jeon <linkinjeon@kernel.org>
> >>>> ---
> >>>>    fs/smb/server/transport_rdma.c | 39
> >>>> +++++++++++++++++++++++++---------
> >>>>    1 file changed, 29 insertions(+), 10 deletions(-)
> >>>>
> >>>> diff --git a/fs/smb/server/transport_rdma.c
> >>>> b/fs/smb/server/transport_rdma.c
> >>>> index 3b269e1f523a..a82131f7dd83 100644
> >>>> --- a/fs/smb/server/transport_rdma.c
> >>>> +++ b/fs/smb/server/transport_rdma.c
> >>>> @@ -2140,8 +2140,7 @@ static int smb_direct_ib_client_add(struct
> >>>> ib_device *ib_dev)
> >>>>        if (ib_dev->node_type != RDMA_NODE_IB_CA)
> >>>>                smb_direct_port = SMB_DIRECT_PORT_IWARP;
> >>>>
> >>>> -     if (!ib_dev->ops.get_netdev ||
> >>>> -         !rdma_frwr_is_supported(&ib_dev->attrs))
> >>>> +     if (!rdma_frwr_is_supported(&ib_dev->attrs))
> >>>>                return 0;
> >>>>
> >>>>        smb_dev = kzalloc(sizeof(*smb_dev), GFP_KERNEL);
> >>>> @@ -2241,17 +2240,37 @@ bool ksmbd_rdma_capable_netdev(struct net_device
> >>>> *netdev)
> >>>>                for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) {
> >>>>                        struct net_device *ndev;
> >>>>
> >>>> -                     ndev =
> >>>> smb_dev->ib_dev->ops.get_netdev(smb_dev->ib_dev,
> >>>> -                                                            i + 1);
> >>>> -                     if (!ndev)
> >>>> -                             continue;
> >>>> +                     /* RoCE and iWRAP ib_dev is backed by a netdev */
> >>>> +                     if (smb_dev->ib_dev->ops.get_netdev) {
> >>>
> >>> The "IWRAP" is a typo, but IMO the comment is misleading. This is simply
> >>> looking up the target netdev, it's not limited to these two rdma types.
> >>> I suggest deleting the comment.
> >>>
> >>
> >> I agree with this point and will update this comment in version 2 of the
> >> patch.
> >>
> >>>> +                             ndev = smb_dev->ib_dev->ops.get_netdev(
> >>>> +                                     smb_dev->ib_dev, i + 1);
> >>>> +                             if (!ndev)
> >>>> +                                     continue;
> >>>>
> >>>> -                     if (ndev == netdev) {
> >>>> +                             if (ndev == netdev) {
> >>>> +                                     dev_put(ndev);
> >>>> +                                     rdma_capable = true;
> >>>> +                                     goto out;
> >>>> +                             }
> >>>>                                dev_put(ndev);
> >>>
> >>> Why not move this dev_put up above the if (ndev == netdev) test? It's
> >>> needed in both cases, so it's confusing to have two copies.
> >>
> >> I do agree that these two puts are very confusing. However, this code
> >> structure comes from the original ksmbd_rdma_capable_netdev() and
> >> this patch only indents it one more level and puts it in the if test for
> >> get_netdev.
> >>
> >> Also, while two puts look very weird, IMO putting it before the if
> >> statement
> >> is equally unintuitive as well since that would be testing the pointer after
> >> its
> >> reference is released. Although since no de-reference is happening here, it
> >> might be fine. Please confirm this is good coding-style-wise and I will
> >> include
> >> this change in v2 of the patch.
> > There is no need to update code that does not have problems.
>

> Well, there are now at least half a dozen stanzas of
>         dev_put
>         rdma_capable = <T|f>
>         goto out
>
> and I don't see why these can't be simplified to
>
>         goto out_capable|out_notcapable
>
> It woud be a lot clearer, at least to this reviewer. And more reliable
> to maintain.
>

I agree, but this consolidation would be impossible without a
major overhaul of the code structure in
ksmbd_rdma_capable_netdev(). There are several clean-up
calls that need to be made (read_unlock(&smb_direct_device_lock),
dev_put, ib_device_put) and the biggest problem is brought in
by the block of code that is doing a reverse lookup on ib_dev
structs right after the patched loop.

I don't know why it is necessary for such a reverse lookup, but given
that these code depends on the behaviors of ib device drivers, which
can be very erratic. I feel that removing that block could break
something with some other devices.

In conclusion, I feel addressing these issues in this function is
beyond the scope of this patch and they should be addressed
separately. I will move forward with the comment change in v2.



> >>
> >>>
> >>>> -                             rdma_capable = true;
> >>>> -                             goto out;
> >>>> +                     /* match physical ib_dev with IPoIB netdev by GUID
> >>>> */
> >>>
> >>> Add more information to this comment, perhaps:
> >>>
> >>>     /* if no exact netdev match, check for matching Infiniband GUID */
> >>>
> >>
> >> Fair point, will update this comment in v2.
> >>
> >>>> +                     } else if (netdev->type == ARPHRD_INFINIBAND) {
> >>>> +                             struct netdev_hw_addr *ha;
> >>>> +                             union ib_gid gid;
> >>>> +                             u32 port_num;
> >>>> +                             int ret;
> >>>> +
> >>>> +                             netdev_hw_addr_list_for_each(
> >>>> +                                     ha, &netdev->dev_addrs) {
> >>>> +                                     memcpy(&gid, ha->addr + 4,
> >>>> sizeof(gid));
> >>>> +                                     ret = ib_find_gid(smb_dev->ib_dev,
> >>>> &gid,
> >>>> +                                                       &port_num,
> >>>> NULL);
> >>>> +                                     if (!ret) {
> >>>> +                                             rdma_capable = true;
> >>>> +                                             goto out;
> >>>
> >>> Won't this leak the ndev? It needs a dev_put(ndev) before breaking
> >>> the loop, too, right?
> >>>
> >>
> >> This will not leak the ndev. Please look closer, this else branch matches
> >> with
> >> the if test on the existence of ops.get_netdev of the current ib_dev. And
> >> ndev
> >> is acquired only through that op. In the else branch, ndev is just not
> >> acquired.
> >> The original code was indented one more level here so the patch might look
> >> a bit confusing, but one of the if before this block is a deleted(-) line.
> > You're right.
>
> Ok, so I repeat my comment above!
>
> Tom.
>
> >
> > Please send v2 patch after adding comments that Tom pointed out.
> >
> > Thanks!
> >>
> >>>> +                                     }
> >>>> +                             }
> >>>>                        }
> >>>> -                     dev_put(ndev);
> >>>>                }
> >>>>        }
> >>>>    out:
> >>
> >>
> >>
> >> --
> >> Kangjing "Chaser" Huang
> >>
> >



--
Kangjing "Chaser" Huang
diff mbox series

Patch

diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c
index 3b269e1f523a..a82131f7dd83 100644
--- a/fs/smb/server/transport_rdma.c
+++ b/fs/smb/server/transport_rdma.c
@@ -2140,8 +2140,7 @@  static int smb_direct_ib_client_add(struct ib_device *ib_dev)
 	if (ib_dev->node_type != RDMA_NODE_IB_CA)
 		smb_direct_port = SMB_DIRECT_PORT_IWARP;
 
-	if (!ib_dev->ops.get_netdev ||
-	    !rdma_frwr_is_supported(&ib_dev->attrs))
+	if (!rdma_frwr_is_supported(&ib_dev->attrs))
 		return 0;
 
 	smb_dev = kzalloc(sizeof(*smb_dev), GFP_KERNEL);
@@ -2241,17 +2240,37 @@  bool ksmbd_rdma_capable_netdev(struct net_device *netdev)
 		for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) {
 			struct net_device *ndev;
 
-			ndev = smb_dev->ib_dev->ops.get_netdev(smb_dev->ib_dev,
-							       i + 1);
-			if (!ndev)
-				continue;
+			/* RoCE and iWRAP ib_dev is backed by a netdev */
+			if (smb_dev->ib_dev->ops.get_netdev) {
+				ndev = smb_dev->ib_dev->ops.get_netdev(
+					smb_dev->ib_dev, i + 1);
+				if (!ndev)
+					continue;
 
-			if (ndev == netdev) {
+				if (ndev == netdev) {
+					dev_put(ndev);
+					rdma_capable = true;
+					goto out;
+				}
 				dev_put(ndev);
-				rdma_capable = true;
-				goto out;
+			/* match physical ib_dev with IPoIB netdev by GUID */
+			} else if (netdev->type == ARPHRD_INFINIBAND) {
+				struct netdev_hw_addr *ha;
+				union ib_gid gid;
+				u32 port_num;
+				int ret;
+
+				netdev_hw_addr_list_for_each(
+					ha, &netdev->dev_addrs) {
+					memcpy(&gid, ha->addr + 4, sizeof(gid));
+					ret = ib_find_gid(smb_dev->ib_dev, &gid,
+							  &port_num, NULL);
+					if (!ret) {
+						rdma_capable = true;
+						goto out;
+					}
+				}
 			}
-			dev_put(ndev);
 		}
 	}
 out: