diff mbox series

RDMA/siw: Remove direct link to net_device

Message ID 20241210130351.406603-1-bmt@zurich.ibm.com (mailing list archive)
State Superseded
Headers show
Series RDMA/siw: Remove direct link to net_device | expand

Commit Message

Bernard Metzler Dec. 10, 2024, 1:03 p.m. UTC
Maintain needed network interface information locally, but
remove a direct link to net_device which can become stale.
Accessing a stale net_device link was causing a 'KASAN:
slab-use-after-free' exception during siw_query_port()
call.

Fixes: bdcf26bf9b3a ("rdma/siw: network and RDMA core interface")
Reported-by: syzbot+4b87489410b4efd181bf@syzkaller.appspotmail.com
Link: https://syzkaller.appspot.com/bug?extid=4b87489410b4efd181bf
Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>
---
 drivers/infiniband/sw/siw/siw.h       | 11 +++++++----
 drivers/infiniband/sw/siw/siw_cm.c    |  4 ++--
 drivers/infiniband/sw/siw/siw_main.c  | 18 ++++++++++++------
 drivers/infiniband/sw/siw/siw_verbs.c | 11 ++++++-----
 4 files changed, 27 insertions(+), 17 deletions(-)

Comments

Zhu Yanjun Dec. 10, 2024, 2:34 p.m. UTC | #1
On 10.12.24 14:03, Bernard Metzler wrote:
> Maintain needed network interface information locally, but
> remove a direct link to net_device which can become stale.
> Accessing a stale net_device link was causing a 'KASAN:
> slab-use-after-free' exception during siw_query_port()
> call.
> 
> Fixes: bdcf26bf9b3a ("rdma/siw: network and RDMA core interface")
> Reported-by: syzbot+4b87489410b4efd181bf@syzkaller.appspotmail.com
> Link: https://syzkaller.appspot.com/bug?extid=4b87489410b4efd181bf

Thanks, Bernard.
The similar problem also exists in rxe.
The link is https://syzkaller.appspot.com/bug?extid=4b87489410b4efd181bf

And I delved into this problem, it seems that the wq event was queued in 
ib_wq for a long time. Then before the event was executed, the netdev 
was freed. Then this problem occured.

I hope that this commit can fix this problem.^_^

Best Regards,
Zhu Yanjun

> Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>
> ---
>   drivers/infiniband/sw/siw/siw.h       | 11 +++++++----
>   drivers/infiniband/sw/siw/siw_cm.c    |  4 ++--
>   drivers/infiniband/sw/siw/siw_main.c  | 18 ++++++++++++------
>   drivers/infiniband/sw/siw/siw_verbs.c | 11 ++++++-----
>   4 files changed, 27 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h
> index 86d4d6a2170e..c8f75527b513 100644
> --- a/drivers/infiniband/sw/siw/siw.h
> +++ b/drivers/infiniband/sw/siw/siw.h
> @@ -69,16 +69,19 @@ struct siw_pd {
>   
>   struct siw_device {
>   	struct ib_device base_dev;
> -	struct net_device *netdev;
>   	struct siw_dev_cap attrs;
>   
>   	u32 vendor_part_id;
> +	struct {
> +		int ifindex;
> +		enum ib_port_state state;
> +		enum ib_mtu mtu;
> +		enum ib_mtu max_mtu;
> +	} ifinfo;
> +
>   	int numa_node;
>   	char raw_gid[ETH_ALEN];
>   
> -	/* physical port state (only one port per device) */
> -	enum ib_port_state state;
> -
>   	spinlock_t lock;
>   
>   	struct xarray qp_xa;
> diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c
> index 86323918a570..451b50d92f7f 100644
> --- a/drivers/infiniband/sw/siw/siw_cm.c
> +++ b/drivers/infiniband/sw/siw/siw_cm.c
> @@ -1780,7 +1780,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
>   
>   		/* For wildcard addr, limit binding to current device only */
>   		if (ipv4_is_zeronet(laddr->sin_addr.s_addr))
> -			s->sk->sk_bound_dev_if = sdev->netdev->ifindex;
> +			s->sk->sk_bound_dev_if = sdev->ifinfo.ifindex;
>   
>   		rv = s->ops->bind(s, (struct sockaddr *)laddr,
>   				  sizeof(struct sockaddr_in));
> @@ -1798,7 +1798,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
>   
>   		/* For wildcard addr, limit binding to current device only */
>   		if (ipv6_addr_any(&laddr->sin6_addr))
> -			s->sk->sk_bound_dev_if = sdev->netdev->ifindex;
> +			s->sk->sk_bound_dev_if = sdev->ifinfo.ifindex;
>   
>   		rv = s->ops->bind(s, (struct sockaddr *)laddr,
>   				  sizeof(struct sockaddr_in6));
> diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
> index 17abef48abcd..4db10bdfb515 100644
> --- a/drivers/infiniband/sw/siw/siw_main.c
> +++ b/drivers/infiniband/sw/siw/siw_main.c
> @@ -287,7 +287,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev)
>   		return NULL;
>   
>   	base_dev = &sdev->base_dev;
> -	sdev->netdev = netdev;
>   
>   	if (netdev->addr_len) {
>   		memcpy(sdev->raw_gid, netdev->dev_addr,
> @@ -354,6 +353,10 @@ static struct siw_device *siw_device_create(struct net_device *netdev)
>   	atomic_set(&sdev->num_mr, 0);
>   	atomic_set(&sdev->num_pd, 0);
>   
> +	sdev->ifinfo.max_mtu = ib_mtu_int_to_enum(netdev->max_mtu);
> +	sdev->ifinfo.mtu = ib_mtu_int_to_enum(READ_ONCE(netdev->mtu));
> +	sdev->ifinfo.ifindex = netdev->ifindex;
> +
>   	sdev->numa_node = dev_to_node(&netdev->dev);
>   	spin_lock_init(&sdev->lock);
>   
> @@ -381,12 +384,12 @@ static int siw_netdev_event(struct notifier_block *nb, unsigned long event,
>   
>   	switch (event) {
>   	case NETDEV_UP:
> -		sdev->state = IB_PORT_ACTIVE;
> +		sdev->ifinfo.state = IB_PORT_ACTIVE;
>   		siw_port_event(sdev, 1, IB_EVENT_PORT_ACTIVE);
>   		break;
>   
>   	case NETDEV_DOWN:
> -		sdev->state = IB_PORT_DOWN;
> +		sdev->ifinfo.state = IB_PORT_DOWN;
>   		siw_port_event(sdev, 1, IB_EVENT_PORT_ERR);
>   		break;
>   
> @@ -406,10 +409,13 @@ static int siw_netdev_event(struct notifier_block *nb, unsigned long event,
>   	case NETDEV_CHANGEADDR:
>   		siw_port_event(sdev, 1, IB_EVENT_LID_CHANGE);
>   		break;
> +
> +	case NETDEV_CHANGEMTU:
> +		sdev->ifinfo.mtu = ib_mtu_int_to_enum(READ_ONCE(netdev->mtu));
> +		break;
>   	/*
>   	 * Todo: Below netdev events are currently not handled.
>   	 */
> -	case NETDEV_CHANGEMTU:
>   	case NETDEV_CHANGE:
>   		break;
>   
> @@ -444,9 +450,9 @@ static int siw_newlink(const char *basedev_name, struct net_device *netdev)
>   		dev_dbg(&netdev->dev, "siw: new device\n");
>   
>   		if (netif_running(netdev) && netif_carrier_ok(netdev))
> -			sdev->state = IB_PORT_ACTIVE;
> +			sdev->ifinfo.state = IB_PORT_ACTIVE;
>   		else
> -			sdev->state = IB_PORT_DOWN;
> +			sdev->ifinfo.state = IB_PORT_DOWN;
>   
>   		ib_mark_name_assigned_by_user(&sdev->base_dev);
>   		rv = siw_device_register(sdev, basedev_name);
> diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
> index 986666c19378..3ab9c5170637 100644
> --- a/drivers/infiniband/sw/siw/siw_verbs.c
> +++ b/drivers/infiniband/sw/siw/siw_verbs.c
> @@ -178,14 +178,15 @@ int siw_query_port(struct ib_device *base_dev, u32 port,
>   
>   	rv = ib_get_eth_speed(base_dev, port, &attr->active_speed,
>   			 &attr->active_width);
> +
>   	attr->gid_tbl_len = 1;
>   	attr->max_msg_sz = -1;
> -	attr->max_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
> -	attr->active_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
> -	attr->phys_state = sdev->state == IB_PORT_ACTIVE ?
> +	attr->max_mtu = sdev->ifinfo.max_mtu;
> +	attr->active_mtu = sdev->ifinfo.mtu;
> +	attr->phys_state = sdev->ifinfo.state == IB_PORT_ACTIVE ?
>   		IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED;
>   	attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP;
> -	attr->state = sdev->state;
> +	attr->state = sdev->ifinfo.state;
>   	/*
>   	 * All zero
>   	 *
> @@ -519,7 +520,7 @@ int siw_query_qp(struct ib_qp *base_qp, struct ib_qp_attr *qp_attr,
>   	qp_attr->cap.max_send_sge = qp->attrs.sq_max_sges;
>   	qp_attr->cap.max_recv_wr = qp->attrs.rq_size;
>   	qp_attr->cap.max_recv_sge = qp->attrs.rq_max_sges;
> -	qp_attr->path_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
> +	qp_attr->path_mtu = sdev->ifinfo.mtu;
>   	qp_attr->max_rd_atomic = qp->attrs.irq_size;
>   	qp_attr->max_dest_rd_atomic = qp->attrs.orq_size;
>
Jason Gunthorpe Dec. 10, 2024, 2:56 p.m. UTC | #2
On Tue, Dec 10, 2024 at 02:03:51PM +0100, Bernard Metzler wrote:
> diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h
> index 86d4d6a2170e..c8f75527b513 100644
> --- a/drivers/infiniband/sw/siw/siw.h
> +++ b/drivers/infiniband/sw/siw/siw.h
> @@ -69,16 +69,19 @@ struct siw_pd {
>  
>  struct siw_device {
>  	struct ib_device base_dev;
> -	struct net_device *netdev;
>  	struct siw_dev_cap attrs;
>  
>  	u32 vendor_part_id;
> +	struct {
> +		int ifindex;

ifindex is only stable so long as you are holding a reference on the
netdev..
> --- a/drivers/infiniband/sw/siw/siw_main.c
> +++ b/drivers/infiniband/sw/siw/siw_main.c
> @@ -287,7 +287,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev)
>  		return NULL;
>  
>  	base_dev = &sdev->base_dev;
> -	sdev->netdev = netdev;

Like here needed to grab a reference before storing the pointer in the
sdev struct.

Jason
Zhu Yanjun Dec. 10, 2024, 3:44 p.m. UTC | #3
On 10.12.24 14:03, Bernard Metzler wrote:
> Maintain needed network interface information locally, but
> remove a direct link to net_device which can become stale.
> Accessing a stale net_device link was causing a 'KASAN:
> slab-use-after-free' exception during siw_query_port()
> call.
> 
> Fixes: bdcf26bf9b3a ("rdma/siw: network and RDMA core interface")
> Reported-by: syzbot+4b87489410b4efd181bf@syzkaller.appspotmail.com
> Link: https://syzkaller.appspot.com/bug?extid=4b87489410b4efd181bf
> Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>
> ---
>   drivers/infiniband/sw/siw/siw.h       | 11 +++++++----
>   drivers/infiniband/sw/siw/siw_cm.c    |  4 ++--
>   drivers/infiniband/sw/siw/siw_main.c  | 18 ++++++++++++------
>   drivers/infiniband/sw/siw/siw_verbs.c | 11 ++++++-----
>   4 files changed, 27 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h
> index 86d4d6a2170e..c8f75527b513 100644
> --- a/drivers/infiniband/sw/siw/siw.h
> +++ b/drivers/infiniband/sw/siw/siw.h
> @@ -69,16 +69,19 @@ struct siw_pd {
>   
>   struct siw_device {
>   	struct ib_device base_dev;
> -	struct net_device *netdev;
>   	struct siw_dev_cap attrs;
>   
>   	u32 vendor_part_id;
> +	struct {
> +		int ifindex;
> +		enum ib_port_state state;
> +		enum ib_mtu mtu;
> +		enum ib_mtu max_mtu;
> +	} ifinfo;
> +
>   	int numa_node;
>   	char raw_gid[ETH_ALEN];
>   
> -	/* physical port state (only one port per device) */
> -	enum ib_port_state state;
> -
>   	spinlock_t lock;
>   
>   	struct xarray qp_xa;
> diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c
> index 86323918a570..451b50d92f7f 100644
> --- a/drivers/infiniband/sw/siw/siw_cm.c
> +++ b/drivers/infiniband/sw/siw/siw_cm.c
> @@ -1780,7 +1780,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
>   
>   		/* For wildcard addr, limit binding to current device only */
>   		if (ipv4_is_zeronet(laddr->sin_addr.s_addr))
> -			s->sk->sk_bound_dev_if = sdev->netdev->ifindex;
> +			s->sk->sk_bound_dev_if = sdev->ifinfo.ifindex;
>   
>   		rv = s->ops->bind(s, (struct sockaddr *)laddr,
>   				  sizeof(struct sockaddr_in));
> @@ -1798,7 +1798,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
>   
>   		/* For wildcard addr, limit binding to current device only */
>   		if (ipv6_addr_any(&laddr->sin6_addr))
> -			s->sk->sk_bound_dev_if = sdev->netdev->ifindex;
> +			s->sk->sk_bound_dev_if = sdev->ifinfo.ifindex;
>   
>   		rv = s->ops->bind(s, (struct sockaddr *)laddr,
>   				  sizeof(struct sockaddr_in6));
> diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
> index 17abef48abcd..4db10bdfb515 100644
> --- a/drivers/infiniband/sw/siw/siw_main.c
> +++ b/drivers/infiniband/sw/siw/siw_main.c
> @@ -287,7 +287,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev)
>   		return NULL;
>   
>   	base_dev = &sdev->base_dev;
> -	sdev->netdev = netdev;
>   
>   	if (netdev->addr_len) {
>   		memcpy(sdev->raw_gid, netdev->dev_addr,
> @@ -354,6 +353,10 @@ static struct siw_device *siw_device_create(struct net_device *netdev)
>   	atomic_set(&sdev->num_mr, 0);
>   	atomic_set(&sdev->num_pd, 0);
>   
> +	sdev->ifinfo.max_mtu = ib_mtu_int_to_enum(netdev->max_mtu);
> +	sdev->ifinfo.mtu = ib_mtu_int_to_enum(READ_ONCE(netdev->mtu));
> +	sdev->ifinfo.ifindex = netdev->ifindex;
> +
>   	sdev->numa_node = dev_to_node(&netdev->dev);
>   	spin_lock_init(&sdev->lock);
>   
> @@ -381,12 +384,12 @@ static int siw_netdev_event(struct notifier_block *nb, unsigned long event,
>   
>   	switch (event) {
>   	case NETDEV_UP:
> -		sdev->state = IB_PORT_ACTIVE;
> +		sdev->ifinfo.state = IB_PORT_ACTIVE;
>   		siw_port_event(sdev, 1, IB_EVENT_PORT_ACTIVE);
>   		break;
>   
>   	case NETDEV_DOWN:
> -		sdev->state = IB_PORT_DOWN;
> +		sdev->ifinfo.state = IB_PORT_DOWN;
>   		siw_port_event(sdev, 1, IB_EVENT_PORT_ERR);
>   		break;
>   
> @@ -406,10 +409,13 @@ static int siw_netdev_event(struct notifier_block *nb, unsigned long event,
>   	case NETDEV_CHANGEADDR:
>   		siw_port_event(sdev, 1, IB_EVENT_LID_CHANGE);
>   		break;
> +
> +	case NETDEV_CHANGEMTU:
> +		sdev->ifinfo.mtu = ib_mtu_int_to_enum(READ_ONCE(netdev->mtu));
> +		break;
>   	/*
>   	 * Todo: Below netdev events are currently not handled.
>   	 */
> -	case NETDEV_CHANGEMTU:
>   	case NETDEV_CHANGE:
>   		break;
>   
> @@ -444,9 +450,9 @@ static int siw_newlink(const char *basedev_name, struct net_device *netdev)
>   		dev_dbg(&netdev->dev, "siw: new device\n");
>   
>   		if (netif_running(netdev) && netif_carrier_ok(netdev))
> -			sdev->state = IB_PORT_ACTIVE;
> +			sdev->ifinfo.state = IB_PORT_ACTIVE;
>   		else
> -			sdev->state = IB_PORT_DOWN;
> +			sdev->ifinfo.state = IB_PORT_DOWN;
>   
>   		ib_mark_name_assigned_by_user(&sdev->base_dev);
>   		rv = siw_device_register(sdev, basedev_name);
> diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
> index 986666c19378..3ab9c5170637 100644
> --- a/drivers/infiniband/sw/siw/siw_verbs.c
> +++ b/drivers/infiniband/sw/siw/siw_verbs.c
> @@ -178,14 +178,15 @@ int siw_query_port(struct ib_device *base_dev, u32 port,
>   
>   	rv = ib_get_eth_speed(base_dev, port, &attr->active_speed,
>   			 &attr->active_width);
> +
>   	attr->gid_tbl_len = 1;
>   	attr->max_msg_sz = -1;
> -	attr->max_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
> -	attr->active_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
> -	attr->phys_state = sdev->state == IB_PORT_ACTIVE ?
> +	attr->max_mtu = sdev->ifinfo.max_mtu;
> +	attr->active_mtu = sdev->ifinfo.mtu;
> +	attr->phys_state = sdev->ifinfo.state == IB_PORT_ACTIVE ?
>   		IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED;
>   	attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP;
> -	attr->state = sdev->state;
> +	attr->state = sdev->ifinfo.state;
>   	/*
>   	 * All zero
>   	 *
> @@ -519,7 +520,7 @@ int siw_query_qp(struct ib_qp *base_qp, struct ib_qp_attr *qp_attr,
>   	qp_attr->cap.max_send_sge = qp->attrs.sq_max_sges;
>   	qp_attr->cap.max_recv_wr = qp->attrs.rq_size;
>   	qp_attr->cap.max_recv_sge = qp->attrs.rq_max_sges;
> -	qp_attr->path_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
> +	qp_attr->path_mtu = sdev->ifinfo.mtu;

The followings are my fix to this kind of problem 
https://syzkaller.appspot.com/bug?extid=4b87489410b4efd181bf

It seems that it can also fix this problem.
After I delved into your commit, I wonder what happen if the netdev will 
be handled in the future after xxx_query_port?

Thanks,

diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c 
b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 5c18f7e342f2..7c73eb9115f1 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -57,7 +57,7 @@ static int rxe_query_port(struct ib_device *ibdev,

         if (attr->state == IB_PORT_ACTIVE)
                 attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
-       else if (dev_get_flags(rxe->ndev) & IFF_UP)
+       else if (rxe && rxe->ndev && (dev_get_flags(rxe->ndev) & IFF_UP))
                 attr->phys_state = IB_PORT_PHYS_STATE_POLLING;
         else
                 attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;

Zhu Yanjun

>   	qp_attr->max_rd_atomic = qp->attrs.irq_size;
>   	qp_attr->max_dest_rd_atomic = qp->attrs.orq_size;
>
Bernard Metzler Dec. 10, 2024, 5:08 p.m. UTC | #4
> -----Original Message-----
> From: Jason Gunthorpe <jgg@ziepe.ca>
> Sent: Tuesday, December 10, 2024 3:56 PM
> To: Bernard Metzler <BMT@zurich.ibm.com>
> Cc: linux-rdma@vger.kernel.org; leon@kernel.org; linux-
> kernel@vger.kernel.org; netdev@vger.kernel.org; syzkaller-
> bugs@googlegroups.com; zyjzyj2000@gmail.com;
> syzbot+4b87489410b4efd181bf@syzkaller.appspotmail.com
> Subject: [EXTERNAL] Re: [PATCH] RDMA/siw: Remove direct link to net_device
> 
> On Tue, Dec 10, 2024 at 02:03:51PM +0100, Bernard Metzler wrote:
> > diff --git a/drivers/infiniband/sw/siw/siw.h
> b/drivers/infiniband/sw/siw/siw.h
> > index 86d4d6a2170e..c8f75527b513 100644
> > --- a/drivers/infiniband/sw/siw/siw.h
> > +++ b/drivers/infiniband/sw/siw/siw.h
> > @@ -69,16 +69,19 @@ struct siw_pd {
> >
> >  struct siw_device {
> >  	struct ib_device base_dev;
> > -	struct net_device *netdev;
> >  	struct siw_dev_cap attrs;
> >
> >  	u32 vendor_part_id;
> > +	struct {
> > +		int ifindex;
> 
> ifindex is only stable so long as you are holding a reference on the
> netdev..
> > --- a/drivers/infiniband/sw/siw/siw_main.c
> > +++ b/drivers/infiniband/sw/siw/siw_main.c
> > @@ -287,7 +287,6 @@ static struct siw_device *siw_device_create(struct
> net_device *netdev)
> >  		return NULL;
> >
> >  	base_dev = &sdev->base_dev;
> > -	sdev->netdev = netdev;
> 
> Like here needed to grab a reference before storing the pointer in the
> sdev struct.
> 

This patch was supposed to remove siw's link to netdev. So no
reference to netdev would be needed. I did it under the
assumption siw can locally keep all needed information up to
date via netdev_event().
But it seems the netdev itself can change during the lifetime of
a siw device? With that ifindex would become wrong.

If the netdev can change without the siw driver being informed,
holding a netdev pointer or reference seems useless.
 
So it would be best to always use ib_device_get_netdev() to get
a valid netdev pointer, if current netdev info is needed by the
driver?

I just wanted to avoid the costly ib_device_get_netdev() call
during query_qp(), query_port() and listen().

Thank you!
Bernard.
Leon Romanovsky Dec. 10, 2024, 7:13 p.m. UTC | #5
On Tue, Dec 10, 2024 at 05:08:51PM +0000, Bernard Metzler wrote:
> 
> 
> > -----Original Message-----
> > From: Jason Gunthorpe <jgg@ziepe.ca>
> > Sent: Tuesday, December 10, 2024 3:56 PM
> > To: Bernard Metzler <BMT@zurich.ibm.com>
> > Cc: linux-rdma@vger.kernel.org; leon@kernel.org; linux-
> > kernel@vger.kernel.org; netdev@vger.kernel.org; syzkaller-
> > bugs@googlegroups.com; zyjzyj2000@gmail.com;
> > syzbot+4b87489410b4efd181bf@syzkaller.appspotmail.com
> > Subject: [EXTERNAL] Re: [PATCH] RDMA/siw: Remove direct link to net_device
> > 
> > On Tue, Dec 10, 2024 at 02:03:51PM +0100, Bernard Metzler wrote:
> > > diff --git a/drivers/infiniband/sw/siw/siw.h
> > b/drivers/infiniband/sw/siw/siw.h
> > > index 86d4d6a2170e..c8f75527b513 100644
> > > --- a/drivers/infiniband/sw/siw/siw.h
> > > +++ b/drivers/infiniband/sw/siw/siw.h
> > > @@ -69,16 +69,19 @@ struct siw_pd {
> > >
> > >  struct siw_device {
> > >  	struct ib_device base_dev;
> > > -	struct net_device *netdev;
> > >  	struct siw_dev_cap attrs;
> > >
> > >  	u32 vendor_part_id;
> > > +	struct {
> > > +		int ifindex;
> > 
> > ifindex is only stable so long as you are holding a reference on the
> > netdev..
> > > --- a/drivers/infiniband/sw/siw/siw_main.c
> > > +++ b/drivers/infiniband/sw/siw/siw_main.c
> > > @@ -287,7 +287,6 @@ static struct siw_device *siw_device_create(struct
> > net_device *netdev)
> > >  		return NULL;
> > >
> > >  	base_dev = &sdev->base_dev;
> > > -	sdev->netdev = netdev;
> > 
> > Like here needed to grab a reference before storing the pointer in the
> > sdev struct.
> > 
> 
> This patch was supposed to remove siw's link to netdev. So no
> reference to netdev would be needed. I did it under the
> assumption siw can locally keep all needed information up to
> date via netdev_event().
> But it seems the netdev itself can change during the lifetime of
> a siw device? With that ifindex would become wrong.
> 
> If the netdev can change without the siw driver being informed,
> holding a netdev pointer or reference seems useless.
>  
> So it would be best to always use ib_device_get_netdev() to get
> a valid netdev pointer, if current netdev info is needed by the
> driver?

Or call to dev_hold(netdev) in siw_device_create(). It will make sure
that netdev is stable.

Thanks

BTW, Need to check, maybe IB/core layer already called to dev_hold.

> 
> I just wanted to avoid the costly ib_device_get_netdev() call
> during query_qp(), query_port() and listen().
> 
> Thank you!
> Bernard.
>
Jakub Kicinski Dec. 11, 2024, 1:52 a.m. UTC | #6
On Tue, 10 Dec 2024 10:56:27 -0400 Jason Gunthorpe wrote:
> >  struct siw_device {
> >  	struct ib_device base_dev;
> > -	struct net_device *netdev;
> >  	struct siw_dev_cap attrs;
> >  
> >  	u32 vendor_part_id;
> > +	struct {
> > +		int ifindex;  
> 
> ifindex is only stable so long as you are holding a reference on the
> netdev..

Does not compute. Can you elaborate what you mean, Jason?
Jason Gunthorpe Dec. 11, 2024, 4 p.m. UTC | #7
On Tue, Dec 10, 2024 at 05:52:37PM -0800, Jakub Kicinski wrote:
> On Tue, 10 Dec 2024 10:56:27 -0400 Jason Gunthorpe wrote:
> > >  struct siw_device {
> > >  	struct ib_device base_dev;
> > > -	struct net_device *netdev;
> > >  	struct siw_dev_cap attrs;
> > >  
> > >  	u32 vendor_part_id;
> > > +	struct {
> > > +		int ifindex;  
> > 
> > ifindex is only stable so long as you are holding a reference on the
> > netdev..
> 
> Does not compute. Can you elaborate what you mean, Jason?

I mean you can't replace a netdev pointer with an ifindex, you can't
reliably get back to the same netdev from ifindex alone.

Jason
Jakub Kicinski Dec. 12, 2024, 2:37 a.m. UTC | #8
On Wed, 11 Dec 2024 12:00:55 -0400 Jason Gunthorpe wrote:
> > > ifindex is only stable so long as you are holding a reference on the
> > > netdev..  
> > 
> > Does not compute. Can you elaborate what you mean, Jason?  
> 
> I mean you can't replace a netdev pointer with an ifindex, you can't
> reliably get back to the same netdev from ifindex alone.

With the right use of locking and the netdev notifier the ifindex
is as good as a pointer. I just wanted to point out that taking 
a reference makes no difference here.
Bernard Metzler Dec. 12, 2024, 10:20 a.m. UTC | #9
> -----Original Message-----
> From: Leon Romanovsky <leon@kernel.org>
> Sent: Tuesday, December 10, 2024 8:13 PM
> To: Bernard Metzler <BMT@zurich.ibm.com>
> Cc: Jason Gunthorpe <jgg@ziepe.ca>; linux-rdma@vger.kernel.org
> Subject: [EXTERNAL] Re: [PATCH] RDMA/siw: Remove direct link to net_device
> 
> On Tue, Dec 10, 2024 at 05:08:51PM +0000, Bernard Metzler wrote:
> >
> >
> > > -----Original Message-----
> > > From: Jason Gunthorpe <jgg@ziepe.ca>
> > > Sent: Tuesday, December 10, 2024 3:56 PM
> > > To: Bernard Metzler <BMT@zurich.ibm.com>
> > > Cc: linux-rdma@vger.kernel.org; leon@kernel.org; linux-
> > > kernel@vger.kernel.org; netdev@vger.kernel.org; syzkaller-
> > > bugs@googlegroups.com; zyjzyj2000@gmail.com;
> > > syzbot+4b87489410b4efd181bf@syzkaller.appspotmail.com
> > > Subject: [EXTERNAL] Re: [PATCH] RDMA/siw: Remove direct link to
> net_device
> > >
> > > On Tue, Dec 10, 2024 at 02:03:51PM +0100, Bernard Metzler wrote:
> > > > diff --git a/drivers/infiniband/sw/siw/siw.h
> > > b/drivers/infiniband/sw/siw/siw.h
> > > > index 86d4d6a2170e..c8f75527b513 100644
> > > > --- a/drivers/infiniband/sw/siw/siw.h
> > > > +++ b/drivers/infiniband/sw/siw/siw.h
> > > > @@ -69,16 +69,19 @@ struct siw_pd {
> > > >
> > > >  struct siw_device {
> > > >  	struct ib_device base_dev;
> > > > -	struct net_device *netdev;
> > > >  	struct siw_dev_cap attrs;
> > > >
> > > >  	u32 vendor_part_id;
> > > > +	struct {
> > > > +		int ifindex;
> > >
> > > ifindex is only stable so long as you are holding a reference on the
> > > netdev..
> > > > --- a/drivers/infiniband/sw/siw/siw_main.c
> > > > +++ b/drivers/infiniband/sw/siw/siw_main.c
> > > > @@ -287,7 +287,6 @@ static struct siw_device
> *siw_device_create(struct
> > > net_device *netdev)
> > > >  		return NULL;
> > > >
> > > >  	base_dev = &sdev->base_dev;
> > > > -	sdev->netdev = netdev;
> > >
> > > Like here needed to grab a reference before storing the pointer in the
> > > sdev struct.
> > >
> >
> > This patch was supposed to remove siw's link to netdev. So no
> > reference to netdev would be needed. I did it under the
> > assumption siw can locally keep all needed information up to
> > date via netdev_event().
> > But it seems the netdev itself can change during the lifetime of
> > a siw device? With that ifindex would become wrong.
> >
> > If the netdev can change without the siw driver being informed,
> > holding a netdev pointer or reference seems useless.
> >
> > So it would be best to always use ib_device_get_netdev() to get
> > a valid netdev pointer, if current netdev info is needed by the
> > driver?
> 
> Or call to dev_hold(netdev) in siw_device_create(). It will make sure
> that netdev is stable.
> 
> Thanks
> 
> BTW, Need to check, maybe IB/core layer already called to dev_hold.
> 

Yes, drivers are calling ib_device_set_netdev(ibdev, netdev, port)
during newlink(), which assigns the netdev to the ib_device's port
info. The ibdev takes a hold on the ports netdev and handles the
pointer assignment, clearing etc. appropriately. Unlinking is done
via ib_device_set_netdev(, NULL, ), or ib_unregister_device()
which unlinks and puts netdevs as well.

Given we have an instance taking care of the netdev, it is
probably best to remove all direct netdev references from the
driver - just to avoid replicating all its complex handling.
So siw will use ib_device_get_netdev() whenever netdev info
is required. It comes with some extra overhead, but its's more
consistent.

BTW: The rdma_link interface is asymmetric, lacking an unlink()
call. For using software only drivers it might be beneficial to
allow explicit unlinking a driver from a device. Any thoughts
on that?

Best,
Bernard.
> >
> > I just wanted to avoid the costly ib_device_get_netdev() call
> > during query_qp(), query_port() and listen().
> >
> > Thank you!
> > Bernard.
> >
Zhu Yanjun Dec. 12, 2024, 1:25 p.m. UTC | #10
在 2024/12/12 11:20, Bernard Metzler 写道:
> 
> 
>> -----Original Message-----
>> From: Leon Romanovsky <leon@kernel.org>
>> Sent: Tuesday, December 10, 2024 8:13 PM
>> To: Bernard Metzler <BMT@zurich.ibm.com>
>> Cc: Jason Gunthorpe <jgg@ziepe.ca>; linux-rdma@vger.kernel.org
>> Subject: [EXTERNAL] Re: [PATCH] RDMA/siw: Remove direct link to net_device
>>
>> On Tue, Dec 10, 2024 at 05:08:51PM +0000, Bernard Metzler wrote:
>>>
>>>
>>>> -----Original Message-----
>>>> From: Jason Gunthorpe <jgg@ziepe.ca>
>>>> Sent: Tuesday, December 10, 2024 3:56 PM
>>>> To: Bernard Metzler <BMT@zurich.ibm.com>
>>>> Cc: linux-rdma@vger.kernel.org; leon@kernel.org; linux-
>>>> kernel@vger.kernel.org; netdev@vger.kernel.org; syzkaller-
>>>> bugs@googlegroups.com; zyjzyj2000@gmail.com;
>>>> syzbot+4b87489410b4efd181bf@syzkaller.appspotmail.com
>>>> Subject: [EXTERNAL] Re: [PATCH] RDMA/siw: Remove direct link to
>> net_device
>>>>
>>>> On Tue, Dec 10, 2024 at 02:03:51PM +0100, Bernard Metzler wrote:
>>>>> diff --git a/drivers/infiniband/sw/siw/siw.h
>>>> b/drivers/infiniband/sw/siw/siw.h
>>>>> index 86d4d6a2170e..c8f75527b513 100644
>>>>> --- a/drivers/infiniband/sw/siw/siw.h
>>>>> +++ b/drivers/infiniband/sw/siw/siw.h
>>>>> @@ -69,16 +69,19 @@ struct siw_pd {
>>>>>
>>>>>   struct siw_device {
>>>>>   	struct ib_device base_dev;
>>>>> -	struct net_device *netdev;
>>>>>   	struct siw_dev_cap attrs;
>>>>>
>>>>>   	u32 vendor_part_id;
>>>>> +	struct {
>>>>> +		int ifindex;
>>>>
>>>> ifindex is only stable so long as you are holding a reference on the
>>>> netdev..
>>>>> --- a/drivers/infiniband/sw/siw/siw_main.c
>>>>> +++ b/drivers/infiniband/sw/siw/siw_main.c
>>>>> @@ -287,7 +287,6 @@ static struct siw_device
>> *siw_device_create(struct
>>>> net_device *netdev)
>>>>>   		return NULL;
>>>>>
>>>>>   	base_dev = &sdev->base_dev;
>>>>> -	sdev->netdev = netdev;
>>>>
>>>> Like here needed to grab a reference before storing the pointer in the
>>>> sdev struct.
>>>>
>>>
>>> This patch was supposed to remove siw's link to netdev. So no
>>> reference to netdev would be needed. I did it under the
>>> assumption siw can locally keep all needed information up to
>>> date via netdev_event().
>>> But it seems the netdev itself can change during the lifetime of
>>> a siw device? With that ifindex would become wrong.
>>>
>>> If the netdev can change without the siw driver being informed,
>>> holding a netdev pointer or reference seems useless.
>>>
>>> So it would be best to always use ib_device_get_netdev() to get
>>> a valid netdev pointer, if current netdev info is needed by the
>>> driver?
>>
>> Or call to dev_hold(netdev) in siw_device_create(). It will make sure
>> that netdev is stable.
>>
>> Thanks
>>
>> BTW, Need to check, maybe IB/core layer already called to dev_hold.
>>
> 
> Yes, drivers are calling ib_device_set_netdev(ibdev, netdev, port)
> during newlink(), which assigns the netdev to the ib_device's port
> info. The ibdev takes a hold on the ports netdev and handles the
> pointer assignment, clearing etc. appropriately. Unlinking is done
> via ib_device_set_netdev(, NULL, ), or ib_unregister_device()
> which unlinks and puts netdevs as well.
> 
> Given we have an instance taking care of the netdev, it is
> probably best to remove all direct netdev references from the
> driver - just to avoid replicating all its complex handling.
> So siw will use ib_device_get_netdev() whenever netdev info
> is required. It comes with some extra overhead, but its's more
> consistent.
> 
> BTW: The rdma_link interface is asymmetric, lacking an unlink()
> call. For using software only drivers it might be beneficial to
> allow explicit unlinking a driver from a device. Any thoughts
> on that?

I agree with you. In the following link, I add del_link call. If you all 
agree, I can separate this patch from the patchset 
https://patchwork.kernel.org/project/linux-rdma/cover/20230624073927.707915-1-yanjun.zhu@intel.com/.

This is the link to add unlink call.

https://patchwork.kernel.org/project/linux-rdma/patch/20230624073927.707915-4-yanjun.zhu@intel.com/

Zhu Yanjun

> 
> Best,
> Bernard.
>>>
>>> I just wanted to avoid the costly ib_device_get_netdev() call
>>> during query_qp(), query_port() and listen().
>>>
>>> Thank you!
>>> Bernard.
>>>
Bernard Metzler Dec. 12, 2024, 2:03 p.m. UTC | #11
> -----Original Message-----
> From: Zhu Yanjun <yanjun.zhu@linux.dev>
> Sent: Thursday, December 12, 2024 2:26 PM
> To: Bernard Metzler <BMT@zurich.ibm.com>; Leon Romanovsky <leon@kernel.org>
> Cc: Jason Gunthorpe <jgg@ziepe.ca>; linux-rdma@vger.kernel.org
> Subject: [EXTERNAL] Re: [PATCH] RDMA/siw: Remove direct link to net_device
> 
> 在 2024/12/12 11:20, Bernard Metzler 写道:
> >
> >
> >> -----Original Message-----
> >> From: Leon Romanovsky <leon@kernel.org>
> >> Sent: Tuesday, December 10, 2024 8:13 PM
> >> To: Bernard Metzler <BMT@zurich.ibm.com>
> >> Cc: Jason Gunthorpe <jgg@ziepe.ca>; linux-rdma@vger.kernel.org
> >> Subject: [EXTERNAL] Re: [PATCH] RDMA/siw: Remove direct link to
> net_device
> >>
> >> On Tue, Dec 10, 2024 at 05:08:51PM +0000, Bernard Metzler wrote:
> >>>
> >>>
> >>>> -----Original Message-----
> >>>> From: Jason Gunthorpe <jgg@ziepe.ca>
> >>>> Sent: Tuesday, December 10, 2024 3:56 PM
> >>>> To: Bernard Metzler <BMT@zurich.ibm.com>
> >>>> Cc: linux-rdma@vger.kernel.org; leon@kernel.org; linux-
> >>>> kernel@vger.kernel.org; netdev@vger.kernel.org; syzkaller-
> >>>> bugs@googlegroups.com; zyjzyj2000@gmail.com;
> >>>> syzbot+4b87489410b4efd181bf@syzkaller.appspotmail.com
> >>>> Subject: [EXTERNAL] Re: [PATCH] RDMA/siw: Remove direct link to
> >> net_device
> >>>>
> >>>> On Tue, Dec 10, 2024 at 02:03:51PM +0100, Bernard Metzler wrote:
> >>>>> diff --git a/drivers/infiniband/sw/siw/siw.h
> >>>> b/drivers/infiniband/sw/siw/siw.h
> >>>>> index 86d4d6a2170e..c8f75527b513 100644
> >>>>> --- a/drivers/infiniband/sw/siw/siw.h
> >>>>> +++ b/drivers/infiniband/sw/siw/siw.h
> >>>>> @@ -69,16 +69,19 @@ struct siw_pd {
> >>>>>
> >>>>>   struct siw_device {
> >>>>>   	struct ib_device base_dev;
> >>>>> -	struct net_device *netdev;
> >>>>>   	struct siw_dev_cap attrs;
> >>>>>
> >>>>>   	u32 vendor_part_id;
> >>>>> +	struct {
> >>>>> +		int ifindex;
> >>>>
> >>>> ifindex is only stable so long as you are holding a reference on the
> >>>> netdev..
> >>>>> --- a/drivers/infiniband/sw/siw/siw_main.c
> >>>>> +++ b/drivers/infiniband/sw/siw/siw_main.c
> >>>>> @@ -287,7 +287,6 @@ static struct siw_device
> >> *siw_device_create(struct
> >>>> net_device *netdev)
> >>>>>   		return NULL;
> >>>>>
> >>>>>   	base_dev = &sdev->base_dev;
> >>>>> -	sdev->netdev = netdev;
> >>>>
> >>>> Like here needed to grab a reference before storing the pointer in the
> >>>> sdev struct.
> >>>>
> >>>
> >>> This patch was supposed to remove siw's link to netdev. So no
> >>> reference to netdev would be needed. I did it under the
> >>> assumption siw can locally keep all needed information up to
> >>> date via netdev_event().
> >>> But it seems the netdev itself can change during the lifetime of
> >>> a siw device? With that ifindex would become wrong.
> >>>
> >>> If the netdev can change without the siw driver being informed,
> >>> holding a netdev pointer or reference seems useless.
> >>>
> >>> So it would be best to always use ib_device_get_netdev() to get
> >>> a valid netdev pointer, if current netdev info is needed by the
> >>> driver?
> >>
> >> Or call to dev_hold(netdev) in siw_device_create(). It will make sure
> >> that netdev is stable.
> >>
> >> Thanks
> >>
> >> BTW, Need to check, maybe IB/core layer already called to dev_hold.
> >>
> >
> > Yes, drivers are calling ib_device_set_netdev(ibdev, netdev, port)
> > during newlink(), which assigns the netdev to the ib_device's port
> > info. The ibdev takes a hold on the ports netdev and handles the
> > pointer assignment, clearing etc. appropriately. Unlinking is done
> > via ib_device_set_netdev(, NULL, ), or ib_unregister_device()
> > which unlinks and puts netdevs as well.
> >
> > Given we have an instance taking care of the netdev, it is
> > probably best to remove all direct netdev references from the
> > driver - just to avoid replicating all its complex handling.
> > So siw will use ib_device_get_netdev() whenever netdev info
> > is required. It comes with some extra overhead, but its's more
> > consistent.
> >
> > BTW: The rdma_link interface is asymmetric, lacking an unlink()
> > call. For using software only drivers it might be beneficial to
> > allow explicit unlinking a driver from a device. Any thoughts
> > on that?
> 
> I agree with you. In the following link, I add del_link call. If you all
> agree, I can separate this patch from the patchset
> https% 
> 3A__patchwork.kernel.org_project_linux-2Drdma_cover_20230624073927.707915-
> 2D1-2Dyanjun.zhu-
> 40intel.com_&d=DwIDaQ&c=BSDicqBQBDjDI9RkVyTcHQ&r=4ynb4Sj_4MUcZXbhvovE4tYSbq
> xyOwdSiLedP4yO55g&m=ZZJw4ol3n9SvffK8DJ1lwRkQXBPZ-qpbQp6X_oCD_cew0-
> 58ycdbtP0xYixv2ZHU&s=4VqxUZZs6UW1_XUZD01FoLNvoG1V-ETdi64KqLONDeI&e= .
> 
> 

Ah, nldev dellink is already there but no rdma_link_ops.dellink().
It just calls ib_unregister_device(), which is sufficient for siw,
but rxe want to do some extra work.

Thanks,
Bernard.
diff mbox series

Patch

diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h
index 86d4d6a2170e..c8f75527b513 100644
--- a/drivers/infiniband/sw/siw/siw.h
+++ b/drivers/infiniband/sw/siw/siw.h
@@ -69,16 +69,19 @@  struct siw_pd {
 
 struct siw_device {
 	struct ib_device base_dev;
-	struct net_device *netdev;
 	struct siw_dev_cap attrs;
 
 	u32 vendor_part_id;
+	struct {
+		int ifindex;
+		enum ib_port_state state;
+		enum ib_mtu mtu;
+		enum ib_mtu max_mtu;
+	} ifinfo;
+
 	int numa_node;
 	char raw_gid[ETH_ALEN];
 
-	/* physical port state (only one port per device) */
-	enum ib_port_state state;
-
 	spinlock_t lock;
 
 	struct xarray qp_xa;
diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c
index 86323918a570..451b50d92f7f 100644
--- a/drivers/infiniband/sw/siw/siw_cm.c
+++ b/drivers/infiniband/sw/siw/siw_cm.c
@@ -1780,7 +1780,7 @@  int siw_create_listen(struct iw_cm_id *id, int backlog)
 
 		/* For wildcard addr, limit binding to current device only */
 		if (ipv4_is_zeronet(laddr->sin_addr.s_addr))
-			s->sk->sk_bound_dev_if = sdev->netdev->ifindex;
+			s->sk->sk_bound_dev_if = sdev->ifinfo.ifindex;
 
 		rv = s->ops->bind(s, (struct sockaddr *)laddr,
 				  sizeof(struct sockaddr_in));
@@ -1798,7 +1798,7 @@  int siw_create_listen(struct iw_cm_id *id, int backlog)
 
 		/* For wildcard addr, limit binding to current device only */
 		if (ipv6_addr_any(&laddr->sin6_addr))
-			s->sk->sk_bound_dev_if = sdev->netdev->ifindex;
+			s->sk->sk_bound_dev_if = sdev->ifinfo.ifindex;
 
 		rv = s->ops->bind(s, (struct sockaddr *)laddr,
 				  sizeof(struct sockaddr_in6));
diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
index 17abef48abcd..4db10bdfb515 100644
--- a/drivers/infiniband/sw/siw/siw_main.c
+++ b/drivers/infiniband/sw/siw/siw_main.c
@@ -287,7 +287,6 @@  static struct siw_device *siw_device_create(struct net_device *netdev)
 		return NULL;
 
 	base_dev = &sdev->base_dev;
-	sdev->netdev = netdev;
 
 	if (netdev->addr_len) {
 		memcpy(sdev->raw_gid, netdev->dev_addr,
@@ -354,6 +353,10 @@  static struct siw_device *siw_device_create(struct net_device *netdev)
 	atomic_set(&sdev->num_mr, 0);
 	atomic_set(&sdev->num_pd, 0);
 
+	sdev->ifinfo.max_mtu = ib_mtu_int_to_enum(netdev->max_mtu);
+	sdev->ifinfo.mtu = ib_mtu_int_to_enum(READ_ONCE(netdev->mtu));
+	sdev->ifinfo.ifindex = netdev->ifindex;
+
 	sdev->numa_node = dev_to_node(&netdev->dev);
 	spin_lock_init(&sdev->lock);
 
@@ -381,12 +384,12 @@  static int siw_netdev_event(struct notifier_block *nb, unsigned long event,
 
 	switch (event) {
 	case NETDEV_UP:
-		sdev->state = IB_PORT_ACTIVE;
+		sdev->ifinfo.state = IB_PORT_ACTIVE;
 		siw_port_event(sdev, 1, IB_EVENT_PORT_ACTIVE);
 		break;
 
 	case NETDEV_DOWN:
-		sdev->state = IB_PORT_DOWN;
+		sdev->ifinfo.state = IB_PORT_DOWN;
 		siw_port_event(sdev, 1, IB_EVENT_PORT_ERR);
 		break;
 
@@ -406,10 +409,13 @@  static int siw_netdev_event(struct notifier_block *nb, unsigned long event,
 	case NETDEV_CHANGEADDR:
 		siw_port_event(sdev, 1, IB_EVENT_LID_CHANGE);
 		break;
+
+	case NETDEV_CHANGEMTU:
+		sdev->ifinfo.mtu = ib_mtu_int_to_enum(READ_ONCE(netdev->mtu));
+		break;
 	/*
 	 * Todo: Below netdev events are currently not handled.
 	 */
-	case NETDEV_CHANGEMTU:
 	case NETDEV_CHANGE:
 		break;
 
@@ -444,9 +450,9 @@  static int siw_newlink(const char *basedev_name, struct net_device *netdev)
 		dev_dbg(&netdev->dev, "siw: new device\n");
 
 		if (netif_running(netdev) && netif_carrier_ok(netdev))
-			sdev->state = IB_PORT_ACTIVE;
+			sdev->ifinfo.state = IB_PORT_ACTIVE;
 		else
-			sdev->state = IB_PORT_DOWN;
+			sdev->ifinfo.state = IB_PORT_DOWN;
 
 		ib_mark_name_assigned_by_user(&sdev->base_dev);
 		rv = siw_device_register(sdev, basedev_name);
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index 986666c19378..3ab9c5170637 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -178,14 +178,15 @@  int siw_query_port(struct ib_device *base_dev, u32 port,
 
 	rv = ib_get_eth_speed(base_dev, port, &attr->active_speed,
 			 &attr->active_width);
+
 	attr->gid_tbl_len = 1;
 	attr->max_msg_sz = -1;
-	attr->max_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
-	attr->active_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
-	attr->phys_state = sdev->state == IB_PORT_ACTIVE ?
+	attr->max_mtu = sdev->ifinfo.max_mtu;
+	attr->active_mtu = sdev->ifinfo.mtu;
+	attr->phys_state = sdev->ifinfo.state == IB_PORT_ACTIVE ?
 		IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED;
 	attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP;
-	attr->state = sdev->state;
+	attr->state = sdev->ifinfo.state;
 	/*
 	 * All zero
 	 *
@@ -519,7 +520,7 @@  int siw_query_qp(struct ib_qp *base_qp, struct ib_qp_attr *qp_attr,
 	qp_attr->cap.max_send_sge = qp->attrs.sq_max_sges;
 	qp_attr->cap.max_recv_wr = qp->attrs.rq_size;
 	qp_attr->cap.max_recv_sge = qp->attrs.rq_max_sges;
-	qp_attr->path_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
+	qp_attr->path_mtu = sdev->ifinfo.mtu;
 	qp_attr->max_rd_atomic = qp->attrs.irq_size;
 	qp_attr->max_dest_rd_atomic = qp->attrs.orq_size;