Message ID | 20241210130351.406603-1-bmt@zurich.ibm.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | RDMA/siw: Remove direct link to net_device | expand |
On 10.12.24 14:03, Bernard Metzler wrote: > Maintain needed network interface information locally, but > remove a direct link to net_device which can become stale. > Accessing a stale net_device link was causing a 'KASAN: > slab-use-after-free' exception during siw_query_port() > call. > > Fixes: bdcf26bf9b3a ("rdma/siw: network and RDMA core interface") > Reported-by: syzbot+4b87489410b4efd181bf@syzkaller.appspotmail.com > Link: https://syzkaller.appspot.com/bug?extid=4b87489410b4efd181bf Thanks, Bernard. The similar problem also exists in rxe. The link is https://syzkaller.appspot.com/bug?extid=4b87489410b4efd181bf And I delved into this problem, it seems that the wq event was queued in ib_wq for a long time. Then before the event was executed, the netdev was freed. Then this problem occured. I hope that this commit can fix this problem.^_^ Best Regards, Zhu Yanjun > Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com> > --- > drivers/infiniband/sw/siw/siw.h | 11 +++++++---- > drivers/infiniband/sw/siw/siw_cm.c | 4 ++-- > drivers/infiniband/sw/siw/siw_main.c | 18 ++++++++++++------ > drivers/infiniband/sw/siw/siw_verbs.c | 11 ++++++----- > 4 files changed, 27 insertions(+), 17 deletions(-) > > diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h > index 86d4d6a2170e..c8f75527b513 100644 > --- a/drivers/infiniband/sw/siw/siw.h > +++ b/drivers/infiniband/sw/siw/siw.h > @@ -69,16 +69,19 @@ struct siw_pd { > > struct siw_device { > struct ib_device base_dev; > - struct net_device *netdev; > struct siw_dev_cap attrs; > > u32 vendor_part_id; > + struct { > + int ifindex; > + enum ib_port_state state; > + enum ib_mtu mtu; > + enum ib_mtu max_mtu; > + } ifinfo; > + > int numa_node; > char raw_gid[ETH_ALEN]; > > - /* physical port state (only one port per device) */ > - enum ib_port_state state; > - > spinlock_t lock; > > struct xarray qp_xa; > diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c > index 86323918a570..451b50d92f7f 100644 > --- a/drivers/infiniband/sw/siw/siw_cm.c > +++ b/drivers/infiniband/sw/siw/siw_cm.c > @@ -1780,7 +1780,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) > > /* For wildcard addr, limit binding to current device only */ > if (ipv4_is_zeronet(laddr->sin_addr.s_addr)) > - s->sk->sk_bound_dev_if = sdev->netdev->ifindex; > + s->sk->sk_bound_dev_if = sdev->ifinfo.ifindex; > > rv = s->ops->bind(s, (struct sockaddr *)laddr, > sizeof(struct sockaddr_in)); > @@ -1798,7 +1798,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) > > /* For wildcard addr, limit binding to current device only */ > if (ipv6_addr_any(&laddr->sin6_addr)) > - s->sk->sk_bound_dev_if = sdev->netdev->ifindex; > + s->sk->sk_bound_dev_if = sdev->ifinfo.ifindex; > > rv = s->ops->bind(s, (struct sockaddr *)laddr, > sizeof(struct sockaddr_in6)); > diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c > index 17abef48abcd..4db10bdfb515 100644 > --- a/drivers/infiniband/sw/siw/siw_main.c > +++ b/drivers/infiniband/sw/siw/siw_main.c > @@ -287,7 +287,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev) > return NULL; > > base_dev = &sdev->base_dev; > - sdev->netdev = netdev; > > if (netdev->addr_len) { > memcpy(sdev->raw_gid, netdev->dev_addr, > @@ -354,6 +353,10 @@ static struct siw_device *siw_device_create(struct net_device *netdev) > atomic_set(&sdev->num_mr, 0); > atomic_set(&sdev->num_pd, 0); > > + sdev->ifinfo.max_mtu = ib_mtu_int_to_enum(netdev->max_mtu); > + sdev->ifinfo.mtu = ib_mtu_int_to_enum(READ_ONCE(netdev->mtu)); > + sdev->ifinfo.ifindex = netdev->ifindex; > + > sdev->numa_node = dev_to_node(&netdev->dev); > spin_lock_init(&sdev->lock); > > @@ -381,12 +384,12 @@ static int siw_netdev_event(struct notifier_block *nb, unsigned long event, > > switch (event) { > case NETDEV_UP: > - sdev->state = IB_PORT_ACTIVE; > + sdev->ifinfo.state = IB_PORT_ACTIVE; > siw_port_event(sdev, 1, IB_EVENT_PORT_ACTIVE); > break; > > case NETDEV_DOWN: > - sdev->state = IB_PORT_DOWN; > + sdev->ifinfo.state = IB_PORT_DOWN; > siw_port_event(sdev, 1, IB_EVENT_PORT_ERR); > break; > > @@ -406,10 +409,13 @@ static int siw_netdev_event(struct notifier_block *nb, unsigned long event, > case NETDEV_CHANGEADDR: > siw_port_event(sdev, 1, IB_EVENT_LID_CHANGE); > break; > + > + case NETDEV_CHANGEMTU: > + sdev->ifinfo.mtu = ib_mtu_int_to_enum(READ_ONCE(netdev->mtu)); > + break; > /* > * Todo: Below netdev events are currently not handled. > */ > - case NETDEV_CHANGEMTU: > case NETDEV_CHANGE: > break; > > @@ -444,9 +450,9 @@ static int siw_newlink(const char *basedev_name, struct net_device *netdev) > dev_dbg(&netdev->dev, "siw: new device\n"); > > if (netif_running(netdev) && netif_carrier_ok(netdev)) > - sdev->state = IB_PORT_ACTIVE; > + sdev->ifinfo.state = IB_PORT_ACTIVE; > else > - sdev->state = IB_PORT_DOWN; > + sdev->ifinfo.state = IB_PORT_DOWN; > > ib_mark_name_assigned_by_user(&sdev->base_dev); > rv = siw_device_register(sdev, basedev_name); > diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c > index 986666c19378..3ab9c5170637 100644 > --- a/drivers/infiniband/sw/siw/siw_verbs.c > +++ b/drivers/infiniband/sw/siw/siw_verbs.c > @@ -178,14 +178,15 @@ int siw_query_port(struct ib_device *base_dev, u32 port, > > rv = ib_get_eth_speed(base_dev, port, &attr->active_speed, > &attr->active_width); > + > attr->gid_tbl_len = 1; > attr->max_msg_sz = -1; > - attr->max_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu); > - attr->active_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu); > - attr->phys_state = sdev->state == IB_PORT_ACTIVE ? > + attr->max_mtu = sdev->ifinfo.max_mtu; > + attr->active_mtu = sdev->ifinfo.mtu; > + attr->phys_state = sdev->ifinfo.state == IB_PORT_ACTIVE ? > IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED; > attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP; > - attr->state = sdev->state; > + attr->state = sdev->ifinfo.state; > /* > * All zero > * > @@ -519,7 +520,7 @@ int siw_query_qp(struct ib_qp *base_qp, struct ib_qp_attr *qp_attr, > qp_attr->cap.max_send_sge = qp->attrs.sq_max_sges; > qp_attr->cap.max_recv_wr = qp->attrs.rq_size; > qp_attr->cap.max_recv_sge = qp->attrs.rq_max_sges; > - qp_attr->path_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu); > + qp_attr->path_mtu = sdev->ifinfo.mtu; > qp_attr->max_rd_atomic = qp->attrs.irq_size; > qp_attr->max_dest_rd_atomic = qp->attrs.orq_size; >
On Tue, Dec 10, 2024 at 02:03:51PM +0100, Bernard Metzler wrote: > diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h > index 86d4d6a2170e..c8f75527b513 100644 > --- a/drivers/infiniband/sw/siw/siw.h > +++ b/drivers/infiniband/sw/siw/siw.h > @@ -69,16 +69,19 @@ struct siw_pd { > > struct siw_device { > struct ib_device base_dev; > - struct net_device *netdev; > struct siw_dev_cap attrs; > > u32 vendor_part_id; > + struct { > + int ifindex; ifindex is only stable so long as you are holding a reference on the netdev.. > --- a/drivers/infiniband/sw/siw/siw_main.c > +++ b/drivers/infiniband/sw/siw/siw_main.c > @@ -287,7 +287,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev) > return NULL; > > base_dev = &sdev->base_dev; > - sdev->netdev = netdev; Like here needed to grab a reference before storing the pointer in the sdev struct. Jason
On 10.12.24 14:03, Bernard Metzler wrote: > Maintain needed network interface information locally, but > remove a direct link to net_device which can become stale. > Accessing a stale net_device link was causing a 'KASAN: > slab-use-after-free' exception during siw_query_port() > call. > > Fixes: bdcf26bf9b3a ("rdma/siw: network and RDMA core interface") > Reported-by: syzbot+4b87489410b4efd181bf@syzkaller.appspotmail.com > Link: https://syzkaller.appspot.com/bug?extid=4b87489410b4efd181bf > Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com> > --- > drivers/infiniband/sw/siw/siw.h | 11 +++++++---- > drivers/infiniband/sw/siw/siw_cm.c | 4 ++-- > drivers/infiniband/sw/siw/siw_main.c | 18 ++++++++++++------ > drivers/infiniband/sw/siw/siw_verbs.c | 11 ++++++----- > 4 files changed, 27 insertions(+), 17 deletions(-) > > diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h > index 86d4d6a2170e..c8f75527b513 100644 > --- a/drivers/infiniband/sw/siw/siw.h > +++ b/drivers/infiniband/sw/siw/siw.h > @@ -69,16 +69,19 @@ struct siw_pd { > > struct siw_device { > struct ib_device base_dev; > - struct net_device *netdev; > struct siw_dev_cap attrs; > > u32 vendor_part_id; > + struct { > + int ifindex; > + enum ib_port_state state; > + enum ib_mtu mtu; > + enum ib_mtu max_mtu; > + } ifinfo; > + > int numa_node; > char raw_gid[ETH_ALEN]; > > - /* physical port state (only one port per device) */ > - enum ib_port_state state; > - > spinlock_t lock; > > struct xarray qp_xa; > diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c > index 86323918a570..451b50d92f7f 100644 > --- a/drivers/infiniband/sw/siw/siw_cm.c > +++ b/drivers/infiniband/sw/siw/siw_cm.c > @@ -1780,7 +1780,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) > > /* For wildcard addr, limit binding to current device only */ > if (ipv4_is_zeronet(laddr->sin_addr.s_addr)) > - s->sk->sk_bound_dev_if = sdev->netdev->ifindex; > + s->sk->sk_bound_dev_if = sdev->ifinfo.ifindex; > > rv = s->ops->bind(s, (struct sockaddr *)laddr, > sizeof(struct sockaddr_in)); > @@ -1798,7 +1798,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) > > /* For wildcard addr, limit binding to current device only */ > if (ipv6_addr_any(&laddr->sin6_addr)) > - s->sk->sk_bound_dev_if = sdev->netdev->ifindex; > + s->sk->sk_bound_dev_if = sdev->ifinfo.ifindex; > > rv = s->ops->bind(s, (struct sockaddr *)laddr, > sizeof(struct sockaddr_in6)); > diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c > index 17abef48abcd..4db10bdfb515 100644 > --- a/drivers/infiniband/sw/siw/siw_main.c > +++ b/drivers/infiniband/sw/siw/siw_main.c > @@ -287,7 +287,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev) > return NULL; > > base_dev = &sdev->base_dev; > - sdev->netdev = netdev; > > if (netdev->addr_len) { > memcpy(sdev->raw_gid, netdev->dev_addr, > @@ -354,6 +353,10 @@ static struct siw_device *siw_device_create(struct net_device *netdev) > atomic_set(&sdev->num_mr, 0); > atomic_set(&sdev->num_pd, 0); > > + sdev->ifinfo.max_mtu = ib_mtu_int_to_enum(netdev->max_mtu); > + sdev->ifinfo.mtu = ib_mtu_int_to_enum(READ_ONCE(netdev->mtu)); > + sdev->ifinfo.ifindex = netdev->ifindex; > + > sdev->numa_node = dev_to_node(&netdev->dev); > spin_lock_init(&sdev->lock); > > @@ -381,12 +384,12 @@ static int siw_netdev_event(struct notifier_block *nb, unsigned long event, > > switch (event) { > case NETDEV_UP: > - sdev->state = IB_PORT_ACTIVE; > + sdev->ifinfo.state = IB_PORT_ACTIVE; > siw_port_event(sdev, 1, IB_EVENT_PORT_ACTIVE); > break; > > case NETDEV_DOWN: > - sdev->state = IB_PORT_DOWN; > + sdev->ifinfo.state = IB_PORT_DOWN; > siw_port_event(sdev, 1, IB_EVENT_PORT_ERR); > break; > > @@ -406,10 +409,13 @@ static int siw_netdev_event(struct notifier_block *nb, unsigned long event, > case NETDEV_CHANGEADDR: > siw_port_event(sdev, 1, IB_EVENT_LID_CHANGE); > break; > + > + case NETDEV_CHANGEMTU: > + sdev->ifinfo.mtu = ib_mtu_int_to_enum(READ_ONCE(netdev->mtu)); > + break; > /* > * Todo: Below netdev events are currently not handled. > */ > - case NETDEV_CHANGEMTU: > case NETDEV_CHANGE: > break; > > @@ -444,9 +450,9 @@ static int siw_newlink(const char *basedev_name, struct net_device *netdev) > dev_dbg(&netdev->dev, "siw: new device\n"); > > if (netif_running(netdev) && netif_carrier_ok(netdev)) > - sdev->state = IB_PORT_ACTIVE; > + sdev->ifinfo.state = IB_PORT_ACTIVE; > else > - sdev->state = IB_PORT_DOWN; > + sdev->ifinfo.state = IB_PORT_DOWN; > > ib_mark_name_assigned_by_user(&sdev->base_dev); > rv = siw_device_register(sdev, basedev_name); > diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c > index 986666c19378..3ab9c5170637 100644 > --- a/drivers/infiniband/sw/siw/siw_verbs.c > +++ b/drivers/infiniband/sw/siw/siw_verbs.c > @@ -178,14 +178,15 @@ int siw_query_port(struct ib_device *base_dev, u32 port, > > rv = ib_get_eth_speed(base_dev, port, &attr->active_speed, > &attr->active_width); > + > attr->gid_tbl_len = 1; > attr->max_msg_sz = -1; > - attr->max_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu); > - attr->active_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu); > - attr->phys_state = sdev->state == IB_PORT_ACTIVE ? > + attr->max_mtu = sdev->ifinfo.max_mtu; > + attr->active_mtu = sdev->ifinfo.mtu; > + attr->phys_state = sdev->ifinfo.state == IB_PORT_ACTIVE ? > IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED; > attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP; > - attr->state = sdev->state; > + attr->state = sdev->ifinfo.state; > /* > * All zero > * > @@ -519,7 +520,7 @@ int siw_query_qp(struct ib_qp *base_qp, struct ib_qp_attr *qp_attr, > qp_attr->cap.max_send_sge = qp->attrs.sq_max_sges; > qp_attr->cap.max_recv_wr = qp->attrs.rq_size; > qp_attr->cap.max_recv_sge = qp->attrs.rq_max_sges; > - qp_attr->path_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu); > + qp_attr->path_mtu = sdev->ifinfo.mtu; The followings are my fix to this kind of problem https://syzkaller.appspot.com/bug?extid=4b87489410b4efd181bf It seems that it can also fix this problem. After I delved into your commit, I wonder what happen if the netdev will be handled in the future after xxx_query_port? Thanks, diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 5c18f7e342f2..7c73eb9115f1 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -57,7 +57,7 @@ static int rxe_query_port(struct ib_device *ibdev, if (attr->state == IB_PORT_ACTIVE) attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP; - else if (dev_get_flags(rxe->ndev) & IFF_UP) + else if (rxe && rxe->ndev && (dev_get_flags(rxe->ndev) & IFF_UP)) attr->phys_state = IB_PORT_PHYS_STATE_POLLING; else attr->phys_state = IB_PORT_PHYS_STATE_DISABLED; Zhu Yanjun > qp_attr->max_rd_atomic = qp->attrs.irq_size; > qp_attr->max_dest_rd_atomic = qp->attrs.orq_size; >
> -----Original Message----- > From: Jason Gunthorpe <jgg@ziepe.ca> > Sent: Tuesday, December 10, 2024 3:56 PM > To: Bernard Metzler <BMT@zurich.ibm.com> > Cc: linux-rdma@vger.kernel.org; leon@kernel.org; linux- > kernel@vger.kernel.org; netdev@vger.kernel.org; syzkaller- > bugs@googlegroups.com; zyjzyj2000@gmail.com; > syzbot+4b87489410b4efd181bf@syzkaller.appspotmail.com > Subject: [EXTERNAL] Re: [PATCH] RDMA/siw: Remove direct link to net_device > > On Tue, Dec 10, 2024 at 02:03:51PM +0100, Bernard Metzler wrote: > > diff --git a/drivers/infiniband/sw/siw/siw.h > b/drivers/infiniband/sw/siw/siw.h > > index 86d4d6a2170e..c8f75527b513 100644 > > --- a/drivers/infiniband/sw/siw/siw.h > > +++ b/drivers/infiniband/sw/siw/siw.h > > @@ -69,16 +69,19 @@ struct siw_pd { > > > > struct siw_device { > > struct ib_device base_dev; > > - struct net_device *netdev; > > struct siw_dev_cap attrs; > > > > u32 vendor_part_id; > > + struct { > > + int ifindex; > > ifindex is only stable so long as you are holding a reference on the > netdev.. > > --- a/drivers/infiniband/sw/siw/siw_main.c > > +++ b/drivers/infiniband/sw/siw/siw_main.c > > @@ -287,7 +287,6 @@ static struct siw_device *siw_device_create(struct > net_device *netdev) > > return NULL; > > > > base_dev = &sdev->base_dev; > > - sdev->netdev = netdev; > > Like here needed to grab a reference before storing the pointer in the > sdev struct. > This patch was supposed to remove siw's link to netdev. So no reference to netdev would be needed. I did it under the assumption siw can locally keep all needed information up to date via netdev_event(). But it seems the netdev itself can change during the lifetime of a siw device? With that ifindex would become wrong. If the netdev can change without the siw driver being informed, holding a netdev pointer or reference seems useless. So it would be best to always use ib_device_get_netdev() to get a valid netdev pointer, if current netdev info is needed by the driver? I just wanted to avoid the costly ib_device_get_netdev() call during query_qp(), query_port() and listen(). Thank you! Bernard.
On Tue, Dec 10, 2024 at 05:08:51PM +0000, Bernard Metzler wrote: > > > > -----Original Message----- > > From: Jason Gunthorpe <jgg@ziepe.ca> > > Sent: Tuesday, December 10, 2024 3:56 PM > > To: Bernard Metzler <BMT@zurich.ibm.com> > > Cc: linux-rdma@vger.kernel.org; leon@kernel.org; linux- > > kernel@vger.kernel.org; netdev@vger.kernel.org; syzkaller- > > bugs@googlegroups.com; zyjzyj2000@gmail.com; > > syzbot+4b87489410b4efd181bf@syzkaller.appspotmail.com > > Subject: [EXTERNAL] Re: [PATCH] RDMA/siw: Remove direct link to net_device > > > > On Tue, Dec 10, 2024 at 02:03:51PM +0100, Bernard Metzler wrote: > > > diff --git a/drivers/infiniband/sw/siw/siw.h > > b/drivers/infiniband/sw/siw/siw.h > > > index 86d4d6a2170e..c8f75527b513 100644 > > > --- a/drivers/infiniband/sw/siw/siw.h > > > +++ b/drivers/infiniband/sw/siw/siw.h > > > @@ -69,16 +69,19 @@ struct siw_pd { > > > > > > struct siw_device { > > > struct ib_device base_dev; > > > - struct net_device *netdev; > > > struct siw_dev_cap attrs; > > > > > > u32 vendor_part_id; > > > + struct { > > > + int ifindex; > > > > ifindex is only stable so long as you are holding a reference on the > > netdev.. > > > --- a/drivers/infiniband/sw/siw/siw_main.c > > > +++ b/drivers/infiniband/sw/siw/siw_main.c > > > @@ -287,7 +287,6 @@ static struct siw_device *siw_device_create(struct > > net_device *netdev) > > > return NULL; > > > > > > base_dev = &sdev->base_dev; > > > - sdev->netdev = netdev; > > > > Like here needed to grab a reference before storing the pointer in the > > sdev struct. > > > > This patch was supposed to remove siw's link to netdev. So no > reference to netdev would be needed. I did it under the > assumption siw can locally keep all needed information up to > date via netdev_event(). > But it seems the netdev itself can change during the lifetime of > a siw device? With that ifindex would become wrong. > > If the netdev can change without the siw driver being informed, > holding a netdev pointer or reference seems useless. > > So it would be best to always use ib_device_get_netdev() to get > a valid netdev pointer, if current netdev info is needed by the > driver? Or call to dev_hold(netdev) in siw_device_create(). It will make sure that netdev is stable. Thanks BTW, Need to check, maybe IB/core layer already called to dev_hold. > > I just wanted to avoid the costly ib_device_get_netdev() call > during query_qp(), query_port() and listen(). > > Thank you! > Bernard. >
On Tue, 10 Dec 2024 10:56:27 -0400 Jason Gunthorpe wrote: > > struct siw_device { > > struct ib_device base_dev; > > - struct net_device *netdev; > > struct siw_dev_cap attrs; > > > > u32 vendor_part_id; > > + struct { > > + int ifindex; > > ifindex is only stable so long as you are holding a reference on the > netdev.. Does not compute. Can you elaborate what you mean, Jason?
On Tue, Dec 10, 2024 at 05:52:37PM -0800, Jakub Kicinski wrote: > On Tue, 10 Dec 2024 10:56:27 -0400 Jason Gunthorpe wrote: > > > struct siw_device { > > > struct ib_device base_dev; > > > - struct net_device *netdev; > > > struct siw_dev_cap attrs; > > > > > > u32 vendor_part_id; > > > + struct { > > > + int ifindex; > > > > ifindex is only stable so long as you are holding a reference on the > > netdev.. > > Does not compute. Can you elaborate what you mean, Jason? I mean you can't replace a netdev pointer with an ifindex, you can't reliably get back to the same netdev from ifindex alone. Jason
On Wed, 11 Dec 2024 12:00:55 -0400 Jason Gunthorpe wrote: > > > ifindex is only stable so long as you are holding a reference on the > > > netdev.. > > > > Does not compute. Can you elaborate what you mean, Jason? > > I mean you can't replace a netdev pointer with an ifindex, you can't > reliably get back to the same netdev from ifindex alone. With the right use of locking and the netdev notifier the ifindex is as good as a pointer. I just wanted to point out that taking a reference makes no difference here.
> -----Original Message----- > From: Leon Romanovsky <leon@kernel.org> > Sent: Tuesday, December 10, 2024 8:13 PM > To: Bernard Metzler <BMT@zurich.ibm.com> > Cc: Jason Gunthorpe <jgg@ziepe.ca>; linux-rdma@vger.kernel.org > Subject: [EXTERNAL] Re: [PATCH] RDMA/siw: Remove direct link to net_device > > On Tue, Dec 10, 2024 at 05:08:51PM +0000, Bernard Metzler wrote: > > > > > > > -----Original Message----- > > > From: Jason Gunthorpe <jgg@ziepe.ca> > > > Sent: Tuesday, December 10, 2024 3:56 PM > > > To: Bernard Metzler <BMT@zurich.ibm.com> > > > Cc: linux-rdma@vger.kernel.org; leon@kernel.org; linux- > > > kernel@vger.kernel.org; netdev@vger.kernel.org; syzkaller- > > > bugs@googlegroups.com; zyjzyj2000@gmail.com; > > > syzbot+4b87489410b4efd181bf@syzkaller.appspotmail.com > > > Subject: [EXTERNAL] Re: [PATCH] RDMA/siw: Remove direct link to > net_device > > > > > > On Tue, Dec 10, 2024 at 02:03:51PM +0100, Bernard Metzler wrote: > > > > diff --git a/drivers/infiniband/sw/siw/siw.h > > > b/drivers/infiniband/sw/siw/siw.h > > > > index 86d4d6a2170e..c8f75527b513 100644 > > > > --- a/drivers/infiniband/sw/siw/siw.h > > > > +++ b/drivers/infiniband/sw/siw/siw.h > > > > @@ -69,16 +69,19 @@ struct siw_pd { > > > > > > > > struct siw_device { > > > > struct ib_device base_dev; > > > > - struct net_device *netdev; > > > > struct siw_dev_cap attrs; > > > > > > > > u32 vendor_part_id; > > > > + struct { > > > > + int ifindex; > > > > > > ifindex is only stable so long as you are holding a reference on the > > > netdev.. > > > > --- a/drivers/infiniband/sw/siw/siw_main.c > > > > +++ b/drivers/infiniband/sw/siw/siw_main.c > > > > @@ -287,7 +287,6 @@ static struct siw_device > *siw_device_create(struct > > > net_device *netdev) > > > > return NULL; > > > > > > > > base_dev = &sdev->base_dev; > > > > - sdev->netdev = netdev; > > > > > > Like here needed to grab a reference before storing the pointer in the > > > sdev struct. > > > > > > > This patch was supposed to remove siw's link to netdev. So no > > reference to netdev would be needed. I did it under the > > assumption siw can locally keep all needed information up to > > date via netdev_event(). > > But it seems the netdev itself can change during the lifetime of > > a siw device? With that ifindex would become wrong. > > > > If the netdev can change without the siw driver being informed, > > holding a netdev pointer or reference seems useless. > > > > So it would be best to always use ib_device_get_netdev() to get > > a valid netdev pointer, if current netdev info is needed by the > > driver? > > Or call to dev_hold(netdev) in siw_device_create(). It will make sure > that netdev is stable. > > Thanks > > BTW, Need to check, maybe IB/core layer already called to dev_hold. > Yes, drivers are calling ib_device_set_netdev(ibdev, netdev, port) during newlink(), which assigns the netdev to the ib_device's port info. The ibdev takes a hold on the ports netdev and handles the pointer assignment, clearing etc. appropriately. Unlinking is done via ib_device_set_netdev(, NULL, ), or ib_unregister_device() which unlinks and puts netdevs as well. Given we have an instance taking care of the netdev, it is probably best to remove all direct netdev references from the driver - just to avoid replicating all its complex handling. So siw will use ib_device_get_netdev() whenever netdev info is required. It comes with some extra overhead, but its's more consistent. BTW: The rdma_link interface is asymmetric, lacking an unlink() call. For using software only drivers it might be beneficial to allow explicit unlinking a driver from a device. Any thoughts on that? Best, Bernard. > > > > I just wanted to avoid the costly ib_device_get_netdev() call > > during query_qp(), query_port() and listen(). > > > > Thank you! > > Bernard. > >
在 2024/12/12 11:20, Bernard Metzler 写道: > > >> -----Original Message----- >> From: Leon Romanovsky <leon@kernel.org> >> Sent: Tuesday, December 10, 2024 8:13 PM >> To: Bernard Metzler <BMT@zurich.ibm.com> >> Cc: Jason Gunthorpe <jgg@ziepe.ca>; linux-rdma@vger.kernel.org >> Subject: [EXTERNAL] Re: [PATCH] RDMA/siw: Remove direct link to net_device >> >> On Tue, Dec 10, 2024 at 05:08:51PM +0000, Bernard Metzler wrote: >>> >>> >>>> -----Original Message----- >>>> From: Jason Gunthorpe <jgg@ziepe.ca> >>>> Sent: Tuesday, December 10, 2024 3:56 PM >>>> To: Bernard Metzler <BMT@zurich.ibm.com> >>>> Cc: linux-rdma@vger.kernel.org; leon@kernel.org; linux- >>>> kernel@vger.kernel.org; netdev@vger.kernel.org; syzkaller- >>>> bugs@googlegroups.com; zyjzyj2000@gmail.com; >>>> syzbot+4b87489410b4efd181bf@syzkaller.appspotmail.com >>>> Subject: [EXTERNAL] Re: [PATCH] RDMA/siw: Remove direct link to >> net_device >>>> >>>> On Tue, Dec 10, 2024 at 02:03:51PM +0100, Bernard Metzler wrote: >>>>> diff --git a/drivers/infiniband/sw/siw/siw.h >>>> b/drivers/infiniband/sw/siw/siw.h >>>>> index 86d4d6a2170e..c8f75527b513 100644 >>>>> --- a/drivers/infiniband/sw/siw/siw.h >>>>> +++ b/drivers/infiniband/sw/siw/siw.h >>>>> @@ -69,16 +69,19 @@ struct siw_pd { >>>>> >>>>> struct siw_device { >>>>> struct ib_device base_dev; >>>>> - struct net_device *netdev; >>>>> struct siw_dev_cap attrs; >>>>> >>>>> u32 vendor_part_id; >>>>> + struct { >>>>> + int ifindex; >>>> >>>> ifindex is only stable so long as you are holding a reference on the >>>> netdev.. >>>>> --- a/drivers/infiniband/sw/siw/siw_main.c >>>>> +++ b/drivers/infiniband/sw/siw/siw_main.c >>>>> @@ -287,7 +287,6 @@ static struct siw_device >> *siw_device_create(struct >>>> net_device *netdev) >>>>> return NULL; >>>>> >>>>> base_dev = &sdev->base_dev; >>>>> - sdev->netdev = netdev; >>>> >>>> Like here needed to grab a reference before storing the pointer in the >>>> sdev struct. >>>> >>> >>> This patch was supposed to remove siw's link to netdev. So no >>> reference to netdev would be needed. I did it under the >>> assumption siw can locally keep all needed information up to >>> date via netdev_event(). >>> But it seems the netdev itself can change during the lifetime of >>> a siw device? With that ifindex would become wrong. >>> >>> If the netdev can change without the siw driver being informed, >>> holding a netdev pointer or reference seems useless. >>> >>> So it would be best to always use ib_device_get_netdev() to get >>> a valid netdev pointer, if current netdev info is needed by the >>> driver? >> >> Or call to dev_hold(netdev) in siw_device_create(). It will make sure >> that netdev is stable. >> >> Thanks >> >> BTW, Need to check, maybe IB/core layer already called to dev_hold. >> > > Yes, drivers are calling ib_device_set_netdev(ibdev, netdev, port) > during newlink(), which assigns the netdev to the ib_device's port > info. The ibdev takes a hold on the ports netdev and handles the > pointer assignment, clearing etc. appropriately. Unlinking is done > via ib_device_set_netdev(, NULL, ), or ib_unregister_device() > which unlinks and puts netdevs as well. > > Given we have an instance taking care of the netdev, it is > probably best to remove all direct netdev references from the > driver - just to avoid replicating all its complex handling. > So siw will use ib_device_get_netdev() whenever netdev info > is required. It comes with some extra overhead, but its's more > consistent. > > BTW: The rdma_link interface is asymmetric, lacking an unlink() > call. For using software only drivers it might be beneficial to > allow explicit unlinking a driver from a device. Any thoughts > on that? I agree with you. In the following link, I add del_link call. If you all agree, I can separate this patch from the patchset https://patchwork.kernel.org/project/linux-rdma/cover/20230624073927.707915-1-yanjun.zhu@intel.com/. This is the link to add unlink call. https://patchwork.kernel.org/project/linux-rdma/patch/20230624073927.707915-4-yanjun.zhu@intel.com/ Zhu Yanjun > > Best, > Bernard. >>> >>> I just wanted to avoid the costly ib_device_get_netdev() call >>> during query_qp(), query_port() and listen(). >>> >>> Thank you! >>> Bernard. >>>
> -----Original Message----- > From: Zhu Yanjun <yanjun.zhu@linux.dev> > Sent: Thursday, December 12, 2024 2:26 PM > To: Bernard Metzler <BMT@zurich.ibm.com>; Leon Romanovsky <leon@kernel.org> > Cc: Jason Gunthorpe <jgg@ziepe.ca>; linux-rdma@vger.kernel.org > Subject: [EXTERNAL] Re: [PATCH] RDMA/siw: Remove direct link to net_device > > 在 2024/12/12 11:20, Bernard Metzler 写道: > > > > > >> -----Original Message----- > >> From: Leon Romanovsky <leon@kernel.org> > >> Sent: Tuesday, December 10, 2024 8:13 PM > >> To: Bernard Metzler <BMT@zurich.ibm.com> > >> Cc: Jason Gunthorpe <jgg@ziepe.ca>; linux-rdma@vger.kernel.org > >> Subject: [EXTERNAL] Re: [PATCH] RDMA/siw: Remove direct link to > net_device > >> > >> On Tue, Dec 10, 2024 at 05:08:51PM +0000, Bernard Metzler wrote: > >>> > >>> > >>>> -----Original Message----- > >>>> From: Jason Gunthorpe <jgg@ziepe.ca> > >>>> Sent: Tuesday, December 10, 2024 3:56 PM > >>>> To: Bernard Metzler <BMT@zurich.ibm.com> > >>>> Cc: linux-rdma@vger.kernel.org; leon@kernel.org; linux- > >>>> kernel@vger.kernel.org; netdev@vger.kernel.org; syzkaller- > >>>> bugs@googlegroups.com; zyjzyj2000@gmail.com; > >>>> syzbot+4b87489410b4efd181bf@syzkaller.appspotmail.com > >>>> Subject: [EXTERNAL] Re: [PATCH] RDMA/siw: Remove direct link to > >> net_device > >>>> > >>>> On Tue, Dec 10, 2024 at 02:03:51PM +0100, Bernard Metzler wrote: > >>>>> diff --git a/drivers/infiniband/sw/siw/siw.h > >>>> b/drivers/infiniband/sw/siw/siw.h > >>>>> index 86d4d6a2170e..c8f75527b513 100644 > >>>>> --- a/drivers/infiniband/sw/siw/siw.h > >>>>> +++ b/drivers/infiniband/sw/siw/siw.h > >>>>> @@ -69,16 +69,19 @@ struct siw_pd { > >>>>> > >>>>> struct siw_device { > >>>>> struct ib_device base_dev; > >>>>> - struct net_device *netdev; > >>>>> struct siw_dev_cap attrs; > >>>>> > >>>>> u32 vendor_part_id; > >>>>> + struct { > >>>>> + int ifindex; > >>>> > >>>> ifindex is only stable so long as you are holding a reference on the > >>>> netdev.. > >>>>> --- a/drivers/infiniband/sw/siw/siw_main.c > >>>>> +++ b/drivers/infiniband/sw/siw/siw_main.c > >>>>> @@ -287,7 +287,6 @@ static struct siw_device > >> *siw_device_create(struct > >>>> net_device *netdev) > >>>>> return NULL; > >>>>> > >>>>> base_dev = &sdev->base_dev; > >>>>> - sdev->netdev = netdev; > >>>> > >>>> Like here needed to grab a reference before storing the pointer in the > >>>> sdev struct. > >>>> > >>> > >>> This patch was supposed to remove siw's link to netdev. So no > >>> reference to netdev would be needed. I did it under the > >>> assumption siw can locally keep all needed information up to > >>> date via netdev_event(). > >>> But it seems the netdev itself can change during the lifetime of > >>> a siw device? With that ifindex would become wrong. > >>> > >>> If the netdev can change without the siw driver being informed, > >>> holding a netdev pointer or reference seems useless. > >>> > >>> So it would be best to always use ib_device_get_netdev() to get > >>> a valid netdev pointer, if current netdev info is needed by the > >>> driver? > >> > >> Or call to dev_hold(netdev) in siw_device_create(). It will make sure > >> that netdev is stable. > >> > >> Thanks > >> > >> BTW, Need to check, maybe IB/core layer already called to dev_hold. > >> > > > > Yes, drivers are calling ib_device_set_netdev(ibdev, netdev, port) > > during newlink(), which assigns the netdev to the ib_device's port > > info. The ibdev takes a hold on the ports netdev and handles the > > pointer assignment, clearing etc. appropriately. Unlinking is done > > via ib_device_set_netdev(, NULL, ), or ib_unregister_device() > > which unlinks and puts netdevs as well. > > > > Given we have an instance taking care of the netdev, it is > > probably best to remove all direct netdev references from the > > driver - just to avoid replicating all its complex handling. > > So siw will use ib_device_get_netdev() whenever netdev info > > is required. It comes with some extra overhead, but its's more > > consistent. > > > > BTW: The rdma_link interface is asymmetric, lacking an unlink() > > call. For using software only drivers it might be beneficial to > > allow explicit unlinking a driver from a device. Any thoughts > > on that? > > I agree with you. In the following link, I add del_link call. If you all > agree, I can separate this patch from the patchset > https% > 3A__patchwork.kernel.org_project_linux-2Drdma_cover_20230624073927.707915- > 2D1-2Dyanjun.zhu- > 40intel.com_&d=DwIDaQ&c=BSDicqBQBDjDI9RkVyTcHQ&r=4ynb4Sj_4MUcZXbhvovE4tYSbq > xyOwdSiLedP4yO55g&m=ZZJw4ol3n9SvffK8DJ1lwRkQXBPZ-qpbQp6X_oCD_cew0- > 58ycdbtP0xYixv2ZHU&s=4VqxUZZs6UW1_XUZD01FoLNvoG1V-ETdi64KqLONDeI&e= . > > Ah, nldev dellink is already there but no rdma_link_ops.dellink(). It just calls ib_unregister_device(), which is sufficient for siw, but rxe want to do some extra work. Thanks, Bernard.
diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index 86d4d6a2170e..c8f75527b513 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -69,16 +69,19 @@ struct siw_pd { struct siw_device { struct ib_device base_dev; - struct net_device *netdev; struct siw_dev_cap attrs; u32 vendor_part_id; + struct { + int ifindex; + enum ib_port_state state; + enum ib_mtu mtu; + enum ib_mtu max_mtu; + } ifinfo; + int numa_node; char raw_gid[ETH_ALEN]; - /* physical port state (only one port per device) */ - enum ib_port_state state; - spinlock_t lock; struct xarray qp_xa; diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index 86323918a570..451b50d92f7f 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -1780,7 +1780,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) /* For wildcard addr, limit binding to current device only */ if (ipv4_is_zeronet(laddr->sin_addr.s_addr)) - s->sk->sk_bound_dev_if = sdev->netdev->ifindex; + s->sk->sk_bound_dev_if = sdev->ifinfo.ifindex; rv = s->ops->bind(s, (struct sockaddr *)laddr, sizeof(struct sockaddr_in)); @@ -1798,7 +1798,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog) /* For wildcard addr, limit binding to current device only */ if (ipv6_addr_any(&laddr->sin6_addr)) - s->sk->sk_bound_dev_if = sdev->netdev->ifindex; + s->sk->sk_bound_dev_if = sdev->ifinfo.ifindex; rv = s->ops->bind(s, (struct sockaddr *)laddr, sizeof(struct sockaddr_in6)); diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index 17abef48abcd..4db10bdfb515 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -287,7 +287,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev) return NULL; base_dev = &sdev->base_dev; - sdev->netdev = netdev; if (netdev->addr_len) { memcpy(sdev->raw_gid, netdev->dev_addr, @@ -354,6 +353,10 @@ static struct siw_device *siw_device_create(struct net_device *netdev) atomic_set(&sdev->num_mr, 0); atomic_set(&sdev->num_pd, 0); + sdev->ifinfo.max_mtu = ib_mtu_int_to_enum(netdev->max_mtu); + sdev->ifinfo.mtu = ib_mtu_int_to_enum(READ_ONCE(netdev->mtu)); + sdev->ifinfo.ifindex = netdev->ifindex; + sdev->numa_node = dev_to_node(&netdev->dev); spin_lock_init(&sdev->lock); @@ -381,12 +384,12 @@ static int siw_netdev_event(struct notifier_block *nb, unsigned long event, switch (event) { case NETDEV_UP: - sdev->state = IB_PORT_ACTIVE; + sdev->ifinfo.state = IB_PORT_ACTIVE; siw_port_event(sdev, 1, IB_EVENT_PORT_ACTIVE); break; case NETDEV_DOWN: - sdev->state = IB_PORT_DOWN; + sdev->ifinfo.state = IB_PORT_DOWN; siw_port_event(sdev, 1, IB_EVENT_PORT_ERR); break; @@ -406,10 +409,13 @@ static int siw_netdev_event(struct notifier_block *nb, unsigned long event, case NETDEV_CHANGEADDR: siw_port_event(sdev, 1, IB_EVENT_LID_CHANGE); break; + + case NETDEV_CHANGEMTU: + sdev->ifinfo.mtu = ib_mtu_int_to_enum(READ_ONCE(netdev->mtu)); + break; /* * Todo: Below netdev events are currently not handled. */ - case NETDEV_CHANGEMTU: case NETDEV_CHANGE: break; @@ -444,9 +450,9 @@ static int siw_newlink(const char *basedev_name, struct net_device *netdev) dev_dbg(&netdev->dev, "siw: new device\n"); if (netif_running(netdev) && netif_carrier_ok(netdev)) - sdev->state = IB_PORT_ACTIVE; + sdev->ifinfo.state = IB_PORT_ACTIVE; else - sdev->state = IB_PORT_DOWN; + sdev->ifinfo.state = IB_PORT_DOWN; ib_mark_name_assigned_by_user(&sdev->base_dev); rv = siw_device_register(sdev, basedev_name); diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 986666c19378..3ab9c5170637 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -178,14 +178,15 @@ int siw_query_port(struct ib_device *base_dev, u32 port, rv = ib_get_eth_speed(base_dev, port, &attr->active_speed, &attr->active_width); + attr->gid_tbl_len = 1; attr->max_msg_sz = -1; - attr->max_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu); - attr->active_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu); - attr->phys_state = sdev->state == IB_PORT_ACTIVE ? + attr->max_mtu = sdev->ifinfo.max_mtu; + attr->active_mtu = sdev->ifinfo.mtu; + attr->phys_state = sdev->ifinfo.state == IB_PORT_ACTIVE ? IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED; attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP; - attr->state = sdev->state; + attr->state = sdev->ifinfo.state; /* * All zero * @@ -519,7 +520,7 @@ int siw_query_qp(struct ib_qp *base_qp, struct ib_qp_attr *qp_attr, qp_attr->cap.max_send_sge = qp->attrs.sq_max_sges; qp_attr->cap.max_recv_wr = qp->attrs.rq_size; qp_attr->cap.max_recv_sge = qp->attrs.rq_max_sges; - qp_attr->path_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu); + qp_attr->path_mtu = sdev->ifinfo.mtu; qp_attr->max_rd_atomic = qp->attrs.irq_size; qp_attr->max_dest_rd_atomic = qp->attrs.orq_size;
Maintain needed network interface information locally, but remove a direct link to net_device which can become stale. Accessing a stale net_device link was causing a 'KASAN: slab-use-after-free' exception during siw_query_port() call. Fixes: bdcf26bf9b3a ("rdma/siw: network and RDMA core interface") Reported-by: syzbot+4b87489410b4efd181bf@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?extid=4b87489410b4efd181bf Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com> --- drivers/infiniband/sw/siw/siw.h | 11 +++++++---- drivers/infiniband/sw/siw/siw_cm.c | 4 ++-- drivers/infiniband/sw/siw/siw_main.c | 18 ++++++++++++------ drivers/infiniband/sw/siw/siw_verbs.c | 11 ++++++----- 4 files changed, 27 insertions(+), 17 deletions(-)