Message ID | 20250302215444.3742072-1-yanjun.zhu@linux.dev (mailing list archive) |
---|---|
State | Accepted |
Headers | show |
Series | [rdma-rc,1/1] RDMA/rxe: Fix the failure of ibv_query_device() and ibv_query_device_ex() tests | expand |
On Mon, March 3, 2025 6:55 AM Zhu Yanjun <yanjun.zhu@linux.dev> wrote: > > In rdma-core, the following failures appear. > > " > $ ./build/bin/run_tests.py -k device > ssssssss....FF........s > ====================================================================== > FAIL: test_query_device (tests.test_device.DeviceTest.test_query_device) > Test ibv_query_device() > ---------------------------------------------------------------------- > Traceback (most recent call last): > File "/home/ubuntu/rdma-core/tests/test_device.py", line 63, in > test_query_device > self.verify_device_attr(attr, dev) > File "/home/ubuntu/rdma-core/tests/test_device.py", line 200, in > verify_device_attr > assert attr.sys_image_guid != 0 > ^^^^^^^^^^^^^^^^^^^^^^^^ > AssertionError > > ====================================================================== > FAIL: test_query_device_ex (tests.test_device.DeviceTest.test_query_device_ex) > Test ibv_query_device_ex() > ---------------------------------------------------------------------- > Traceback (most recent call last): > File "/home/ubuntu/rdma-core/tests/test_device.py", line 222, in > test_query_device_ex > self.verify_device_attr(attr_ex.orig_attr, dev) > File "/home/ubuntu/rdma-core/tests/test_device.py", line 200, in > verify_device_attr > assert attr.sys_image_guid != 0 > ^^^^^^^^^^^^^^^^^^^^^^^^ > AssertionError > " > > The root cause is: before a net device is set with rxe, this net device > is used to generate a sys_image_guid. I have tested this patch, and the problem I reported last week is now gone. The fix looks good. Thanks! Tested-by: Daisuke Matsuda <matsuda-daisuke@fujitsu.com> Reviewed-by: Daisuke Matsuda <matsuda-daisuke@fujitsu.com> > > Fixes: 2ac5415022d1 ("RDMA/rxe: Remove the direct link to net_device") > Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev> > --- > drivers/infiniband/sw/rxe/rxe.c | 25 ++++++------------------- > 1 file changed, 6 insertions(+), 19 deletions(-) > > diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c > index 1ba4a0c8726a..e27478fe9456 100644 > --- a/drivers/infiniband/sw/rxe/rxe.c > +++ b/drivers/infiniband/sw/rxe/rxe.c > @@ -38,10 +38,8 @@ void rxe_dealloc(struct ib_device *ib_dev) > } > > /* initialize rxe device parameters */ > -static void rxe_init_device_param(struct rxe_dev *rxe) > +static void rxe_init_device_param(struct rxe_dev *rxe, struct net_device *ndev) > { > - struct net_device *ndev; > - > rxe->max_inline_data = RXE_MAX_INLINE_DATA; > > rxe->attr.vendor_id = RXE_VENDOR_ID; > @@ -74,15 +72,9 @@ static void rxe_init_device_param(struct rxe_dev *rxe) > rxe->attr.max_pkeys = RXE_MAX_PKEYS; > rxe->attr.local_ca_ack_delay = RXE_LOCAL_CA_ACK_DELAY; > > - ndev = rxe_ib_device_get_netdev(&rxe->ib_dev); > - if (!ndev) > - return; > - > addrconf_addr_eui48((unsigned char *)&rxe->attr.sys_image_guid, > ndev->dev_addr); > > - dev_put(ndev); > - > rxe->max_ucontext = RXE_MAX_UCONTEXT; > } > > @@ -115,18 +107,13 @@ static void rxe_init_port_param(struct rxe_port *port) > /* initialize port state, note IB convention that HCA ports are always > * numbered from 1 > */ > -static void rxe_init_ports(struct rxe_dev *rxe) > +static void rxe_init_ports(struct rxe_dev *rxe, struct net_device *ndev) > { > struct rxe_port *port = &rxe->port; > - struct net_device *ndev; > > rxe_init_port_param(port); > - ndev = rxe_ib_device_get_netdev(&rxe->ib_dev); > - if (!ndev) > - return; > addrconf_addr_eui48((unsigned char *)&port->port_guid, > ndev->dev_addr); > - dev_put(ndev); > spin_lock_init(&port->port_lock); > } > > @@ -144,12 +131,12 @@ static void rxe_init_pools(struct rxe_dev *rxe) > } > > /* initialize rxe device state */ > -static void rxe_init(struct rxe_dev *rxe) > +static void rxe_init(struct rxe_dev *rxe, struct net_device *ndev) > { > /* init default device parameters */ > - rxe_init_device_param(rxe); > + rxe_init_device_param(rxe, ndev); > > - rxe_init_ports(rxe); > + rxe_init_ports(rxe, ndev); > rxe_init_pools(rxe); > > /* init pending mmap list */ > @@ -184,7 +171,7 @@ void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu) > int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name, > struct net_device *ndev) > { > - rxe_init(rxe); > + rxe_init(rxe, ndev); > rxe_set_mtu(rxe, mtu); > > return rxe_register_device(rxe, ibdev_name, ndev); > -- > 2.34.1 >
在 2025/3/3 5:21, Daisuke Matsuda (Fujitsu) 写道: > On Mon, March 3, 2025 6:55 AM Zhu Yanjun <yanjun.zhu@linux.dev> wrote: >> >> In rdma-core, the following failures appear. >> >> " >> $ ./build/bin/run_tests.py -k device >> ssssssss....FF........s >> ====================================================================== >> FAIL: test_query_device (tests.test_device.DeviceTest.test_query_device) >> Test ibv_query_device() >> ---------------------------------------------------------------------- >> Traceback (most recent call last): >> File "/home/ubuntu/rdma-core/tests/test_device.py", line 63, in >> test_query_device >> self.verify_device_attr(attr, dev) >> File "/home/ubuntu/rdma-core/tests/test_device.py", line 200, in >> verify_device_attr >> assert attr.sys_image_guid != 0 >> ^^^^^^^^^^^^^^^^^^^^^^^^ >> AssertionError >> >> ====================================================================== >> FAIL: test_query_device_ex (tests.test_device.DeviceTest.test_query_device_ex) >> Test ibv_query_device_ex() >> ---------------------------------------------------------------------- >> Traceback (most recent call last): >> File "/home/ubuntu/rdma-core/tests/test_device.py", line 222, in >> test_query_device_ex >> self.verify_device_attr(attr_ex.orig_attr, dev) >> File "/home/ubuntu/rdma-core/tests/test_device.py", line 200, in >> verify_device_attr >> assert attr.sys_image_guid != 0 >> ^^^^^^^^^^^^^^^^^^^^^^^^ >> AssertionError >> " >> >> The root cause is: before a net device is set with rxe, this net device >> is used to generate a sys_image_guid. > > I have tested this patch, and the problem I reported last week is now gone. > The fix looks good. Thanks! Thanks a lot. Zhu Yanjun > > Tested-by: Daisuke Matsuda <matsuda-daisuke@fujitsu.com> > Reviewed-by: Daisuke Matsuda <matsuda-daisuke@fujitsu.com> > >> >> Fixes: 2ac5415022d1 ("RDMA/rxe: Remove the direct link to net_device") >> Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev> >> --- >> drivers/infiniband/sw/rxe/rxe.c | 25 ++++++------------------- >> 1 file changed, 6 insertions(+), 19 deletions(-) >> >> diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c >> index 1ba4a0c8726a..e27478fe9456 100644 >> --- a/drivers/infiniband/sw/rxe/rxe.c >> +++ b/drivers/infiniband/sw/rxe/rxe.c >> @@ -38,10 +38,8 @@ void rxe_dealloc(struct ib_device *ib_dev) >> } >> >> /* initialize rxe device parameters */ >> -static void rxe_init_device_param(struct rxe_dev *rxe) >> +static void rxe_init_device_param(struct rxe_dev *rxe, struct net_device *ndev) >> { >> - struct net_device *ndev; >> - >> rxe->max_inline_data = RXE_MAX_INLINE_DATA; >> >> rxe->attr.vendor_id = RXE_VENDOR_ID; >> @@ -74,15 +72,9 @@ static void rxe_init_device_param(struct rxe_dev *rxe) >> rxe->attr.max_pkeys = RXE_MAX_PKEYS; >> rxe->attr.local_ca_ack_delay = RXE_LOCAL_CA_ACK_DELAY; >> >> - ndev = rxe_ib_device_get_netdev(&rxe->ib_dev); >> - if (!ndev) >> - return; >> - >> addrconf_addr_eui48((unsigned char *)&rxe->attr.sys_image_guid, >> ndev->dev_addr); >> >> - dev_put(ndev); >> - >> rxe->max_ucontext = RXE_MAX_UCONTEXT; >> } >> >> @@ -115,18 +107,13 @@ static void rxe_init_port_param(struct rxe_port *port) >> /* initialize port state, note IB convention that HCA ports are always >> * numbered from 1 >> */ >> -static void rxe_init_ports(struct rxe_dev *rxe) >> +static void rxe_init_ports(struct rxe_dev *rxe, struct net_device *ndev) >> { >> struct rxe_port *port = &rxe->port; >> - struct net_device *ndev; >> >> rxe_init_port_param(port); >> - ndev = rxe_ib_device_get_netdev(&rxe->ib_dev); >> - if (!ndev) >> - return; >> addrconf_addr_eui48((unsigned char *)&port->port_guid, >> ndev->dev_addr); >> - dev_put(ndev); >> spin_lock_init(&port->port_lock); >> } >> >> @@ -144,12 +131,12 @@ static void rxe_init_pools(struct rxe_dev *rxe) >> } >> >> /* initialize rxe device state */ >> -static void rxe_init(struct rxe_dev *rxe) >> +static void rxe_init(struct rxe_dev *rxe, struct net_device *ndev) >> { >> /* init default device parameters */ >> - rxe_init_device_param(rxe); >> + rxe_init_device_param(rxe, ndev); >> >> - rxe_init_ports(rxe); >> + rxe_init_ports(rxe, ndev); >> rxe_init_pools(rxe); >> >> /* init pending mmap list */ >> @@ -184,7 +171,7 @@ void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu) >> int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name, >> struct net_device *ndev) >> { >> - rxe_init(rxe); >> + rxe_init(rxe, ndev); >> rxe_set_mtu(rxe, mtu); >> >> return rxe_register_device(rxe, ibdev_name, ndev); >> -- >> 2.34.1 >> >
On Sun, 02 Mar 2025 22:54:44 +0100, Zhu Yanjun wrote: > In rdma-core, the following failures appear. > > " > $ ./build/bin/run_tests.py -k device > ssssssss....FF........s > ====================================================================== > FAIL: test_query_device (tests.test_device.DeviceTest.test_query_device) > Test ibv_query_device() > ---------------------------------------------------------------------- > Traceback (most recent call last): > File "/home/ubuntu/rdma-core/tests/test_device.py", line 63, in > test_query_device > self.verify_device_attr(attr, dev) > File "/home/ubuntu/rdma-core/tests/test_device.py", line 200, in > verify_device_attr > assert attr.sys_image_guid != 0 > ^^^^^^^^^^^^^^^^^^^^^^^^ > AssertionError > > [...] Applied, thanks! [1/1] RDMA/rxe: Fix the failure of ibv_query_device() and ibv_query_device_ex() tests https://git.kernel.org/rdma/rdma/c/8ce2eb9dfac874 Best regards,
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index 1ba4a0c8726a..e27478fe9456 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -38,10 +38,8 @@ void rxe_dealloc(struct ib_device *ib_dev) } /* initialize rxe device parameters */ -static void rxe_init_device_param(struct rxe_dev *rxe) +static void rxe_init_device_param(struct rxe_dev *rxe, struct net_device *ndev) { - struct net_device *ndev; - rxe->max_inline_data = RXE_MAX_INLINE_DATA; rxe->attr.vendor_id = RXE_VENDOR_ID; @@ -74,15 +72,9 @@ static void rxe_init_device_param(struct rxe_dev *rxe) rxe->attr.max_pkeys = RXE_MAX_PKEYS; rxe->attr.local_ca_ack_delay = RXE_LOCAL_CA_ACK_DELAY; - ndev = rxe_ib_device_get_netdev(&rxe->ib_dev); - if (!ndev) - return; - addrconf_addr_eui48((unsigned char *)&rxe->attr.sys_image_guid, ndev->dev_addr); - dev_put(ndev); - rxe->max_ucontext = RXE_MAX_UCONTEXT; } @@ -115,18 +107,13 @@ static void rxe_init_port_param(struct rxe_port *port) /* initialize port state, note IB convention that HCA ports are always * numbered from 1 */ -static void rxe_init_ports(struct rxe_dev *rxe) +static void rxe_init_ports(struct rxe_dev *rxe, struct net_device *ndev) { struct rxe_port *port = &rxe->port; - struct net_device *ndev; rxe_init_port_param(port); - ndev = rxe_ib_device_get_netdev(&rxe->ib_dev); - if (!ndev) - return; addrconf_addr_eui48((unsigned char *)&port->port_guid, ndev->dev_addr); - dev_put(ndev); spin_lock_init(&port->port_lock); } @@ -144,12 +131,12 @@ static void rxe_init_pools(struct rxe_dev *rxe) } /* initialize rxe device state */ -static void rxe_init(struct rxe_dev *rxe) +static void rxe_init(struct rxe_dev *rxe, struct net_device *ndev) { /* init default device parameters */ - rxe_init_device_param(rxe); + rxe_init_device_param(rxe, ndev); - rxe_init_ports(rxe); + rxe_init_ports(rxe, ndev); rxe_init_pools(rxe); /* init pending mmap list */ @@ -184,7 +171,7 @@ void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu) int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name, struct net_device *ndev) { - rxe_init(rxe); + rxe_init(rxe, ndev); rxe_set_mtu(rxe, mtu); return rxe_register_device(rxe, ibdev_name, ndev);
In rdma-core, the following failures appear. " $ ./build/bin/run_tests.py -k device ssssssss....FF........s ====================================================================== FAIL: test_query_device (tests.test_device.DeviceTest.test_query_device) Test ibv_query_device() ---------------------------------------------------------------------- Traceback (most recent call last): File "/home/ubuntu/rdma-core/tests/test_device.py", line 63, in test_query_device self.verify_device_attr(attr, dev) File "/home/ubuntu/rdma-core/tests/test_device.py", line 200, in verify_device_attr assert attr.sys_image_guid != 0 ^^^^^^^^^^^^^^^^^^^^^^^^ AssertionError ====================================================================== FAIL: test_query_device_ex (tests.test_device.DeviceTest.test_query_device_ex) Test ibv_query_device_ex() ---------------------------------------------------------------------- Traceback (most recent call last): File "/home/ubuntu/rdma-core/tests/test_device.py", line 222, in test_query_device_ex self.verify_device_attr(attr_ex.orig_attr, dev) File "/home/ubuntu/rdma-core/tests/test_device.py", line 200, in verify_device_attr assert attr.sys_image_guid != 0 ^^^^^^^^^^^^^^^^^^^^^^^^ AssertionError " The root cause is: before a net device is set with rxe, this net device is used to generate a sys_image_guid. Fixes: 2ac5415022d1 ("RDMA/rxe: Remove the direct link to net_device") Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev> --- drivers/infiniband/sw/rxe/rxe.c | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-)