Message ID | 20190507051537.2161-1-aditr@vmware.com (mailing list archive) |
---|---|
State | Not Applicable |
Headers | show |
Series | libibverbs: Expose the get neighbor timeout for dmac resolution | expand |
On Tue, May 07, 2019 at 05:17:45AM +0000, Adit Ranadive wrote: > This allows the neighbor timeout to be configured while building > rdma-core using the extra cmake flags. > > Reviewed-by: Jorgen Hansen <jhansen@vmware.com> > Reviewed-by: Vishnu Dasa <vdasa@vmware.com> > Signed-off-by: Adit Ranadive <aditr@vmware.com> > --- > CMakeLists.txt | 6 ++++++ > buildlib/config.h.in | 2 ++ > libibverbs/verbs.c | 1 - > 3 files changed, 8 insertions(+), 1 deletion(-) > --- > > Here is the PR: > https://github.com/linux-rdma/rdma-core/pull/524 > > --- > diff --git a/CMakeLists.txt b/CMakeLists.txt > index beb8f4ec1238..8dbdd2b807f4 100644 > --- a/CMakeLists.txt > +++ b/CMakeLists.txt > @@ -45,6 +45,8 @@ > # -DNO_PYVERBS=1 (default, build pyverbs) > # Invoke cython to build pyverbs. Usually you will run with this option > # is set, but it will be disabled for travis runs. > +# -DNEIGH_GET_DEFAULT_TIMEOUT_MS=3000 (default) > +# Set the default timeout for lookup of neighbor for mac address. > > cmake_minimum_required(VERSION 2.8.11 FATAL_ERROR) > project(rdma-core C) > @@ -84,6 +86,10 @@ if (IN_PLACE) > set(CMAKE_INSTALL_INCLUDEDIR "include") > endif() > > +if ("${NEIGH_GET_DEFAULT_TIMEOUT_MS}" STREQUAL "") > + set(NEIGH_GET_DEFAULT_TIMEOUT_MS 3000) > +endif() > + > include(GNUInstallDirs) > # C include root > set(BUILD_INCLUDE ${CMAKE_BINARY_DIR}/include) > diff --git a/buildlib/config.h.in b/buildlib/config.h.in > index 0754d2494234..590e70162d1e 100644 > --- a/buildlib/config.h.in > +++ b/buildlib/config.h.in > @@ -61,6 +61,8 @@ > # define VERBS_WRITE_ONLY 0 > #endif > > +# define NEIGH_GET_DEFAULT_TIMEOUT_MS @NEIGH_GET_DEFAULT_TIMEOUT_MS@ Extra space.. > + > // Configuration defaults > > #define IBACM_SERVER_MODE_UNIX 0 > diff --git a/libibverbs/verbs.c b/libibverbs/verbs.c > index 1766b9f52d31..2cab86184e32 100644 > --- a/libibverbs/verbs.c > +++ b/libibverbs/verbs.c > @@ -967,7 +967,6 @@ static inline int create_peer_from_gid(int family, void *raw_gid, > return 0; > } > > -#define NEIGH_GET_DEFAULT_TIMEOUT_MS 3000 > int ibv_resolve_eth_l2_from_gid(struct ibv_context *context, > struct ibv_ah_attr *attr, > uint8_t eth_mac[ETHERNET_LL_SIZE], Really compile time configurations are not so useful, what is the use case here? Why does this timeout even exist? Jason
On 5/7/19 5:53 AM, Jason Gunthorpe wrote: > On Tue, May 07, 2019 at 05:17:45AM +0000, Adit Ranadive wrote: >> This allows the neighbor timeout to be configured while building >> rdma-core using the extra cmake flags. >> >> Reviewed-by: Jorgen Hansen <jhansen@vmware.com> >> Reviewed-by: Vishnu Dasa <vdasa@vmware.com> >> Signed-off-by: Adit Ranadive <aditr@vmware.com> >> --- >> CMakeLists.txt | 6 ++++++ >> buildlib/config.h.in | 2 ++ >> libibverbs/verbs.c | 1 - >> 3 files changed, 8 insertions(+), 1 deletion(-) >> --- >> >> Here is the PR: >> https://github.com/linux-rdma/rdma-core/pull/524 >> >> --- >> diff --git a/CMakeLists.txt b/CMakeLists.txt >> index beb8f4ec1238..8dbdd2b807f4 100644 >> --- a/CMakeLists.txt >> +++ b/CMakeLists.txt >> @@ -45,6 +45,8 @@ >> # -DNO_PYVERBS=1 (default, build pyverbs) >> # Invoke cython to build pyverbs. Usually you will run with this option >> # is set, but it will be disabled for travis runs. >> +# -DNEIGH_GET_DEFAULT_TIMEOUT_MS=3000 (default) >> +# Set the default timeout for lookup of neighbor for mac address. >> >> cmake_minimum_required(VERSION 2.8.11 FATAL_ERROR) >> project(rdma-core C) >> @@ -84,6 +86,10 @@ if (IN_PLACE) >> set(CMAKE_INSTALL_INCLUDEDIR "include") >> endif() >> >> +if ("${NEIGH_GET_DEFAULT_TIMEOUT_MS}" STREQUAL "") >> + set(NEIGH_GET_DEFAULT_TIMEOUT_MS 3000) >> +endif() >> + >> include(GNUInstallDirs) >> # C include root >> set(BUILD_INCLUDE ${CMAKE_BINARY_DIR}/include) >> diff --git a/buildlib/config.h.in b/buildlib/config.h.in >> index 0754d2494234..590e70162d1e 100644 >> --- a/buildlib/config.h.in >> +++ b/buildlib/config.h.in >> @@ -61,6 +61,8 @@ >> # define VERBS_WRITE_ONLY 0 >> #endif >> >> +# define NEIGH_GET_DEFAULT_TIMEOUT_MS @NEIGH_GET_DEFAULT_TIMEOUT_MS@ > > Extra space.. > Ok. >> + >> // Configuration defaults >> >> #define IBACM_SERVER_MODE_UNIX 0 >> diff --git a/libibverbs/verbs.c b/libibverbs/verbs.c >> index 1766b9f52d31..2cab86184e32 100644 >> --- a/libibverbs/verbs.c >> +++ b/libibverbs/verbs.c >> @@ -967,7 +967,6 @@ static inline int create_peer_from_gid(int family, void *raw_gid, >> return 0; >> } >> >> -#define NEIGH_GET_DEFAULT_TIMEOUT_MS 3000 >> int ibv_resolve_eth_l2_from_gid(struct ibv_context *context, >> struct ibv_ah_attr *attr, >> uint8_t eth_mac[ETHERNET_LL_SIZE], > > Really compile time configurations are not so useful, what is the use > case here? > In the general sense I agree with you. Pre-built RPMs may not have this set to anything other than the default value. However, in our internal testing we've seen timeouts when trying to resolve the DMAC when creating an AH. Instead, of simply increasing the #define value here I thought it would be mildly helpful to expose this out. If this is not going to be useful I can drop it but I thought it would atleast make rdma-core a bit more configurable.. > Why does this timeout even exist? > It gets used somewhere in the neighbor lookup process but I'm not that familiar about how the netlink library works here. Maybe someone else could chime in on that. But it looks the netlink packets are getting dropped somewhere in the OS stack. It could some odd timing issue since everything worked fine when I enabled the libnl debug option. > Jason >
On Tue, May 07, 2019 at 05:55:25PM +0000, Adit Ranadive wrote: > >> // Configuration defaults > >> > >> #define IBACM_SERVER_MODE_UNIX 0 > >> diff --git a/libibverbs/verbs.c b/libibverbs/verbs.c > >> index 1766b9f52d31..2cab86184e32 100644 > >> +++ b/libibverbs/verbs.c > >> @@ -967,7 +967,6 @@ static inline int create_peer_from_gid(int family, void *raw_gid, > >> return 0; > >> } > >> > >> -#define NEIGH_GET_DEFAULT_TIMEOUT_MS 3000 > >> int ibv_resolve_eth_l2_from_gid(struct ibv_context *context, > >> struct ibv_ah_attr *attr, > >> uint8_t eth_mac[ETHERNET_LL_SIZE], > > > > Really compile time configurations are not so useful, what is the use > > case here? > > > > In the general sense I agree with you. Pre-built RPMs may not have this > set to anything other than the default value. > However, in our internal testing we've seen timeouts when trying to > resolve the DMAC when creating an AH. Instead, of simply increasing > the #define value here I thought it would be mildly helpful to expose > this out. > > If this is not going to be useful I can drop it but I thought it would > atleast make rdma-core a bit more configurable.. Stuff like this should not be configured.. if you are hitting timeout it sounds like a bug of some sort to me. Jason
> On May 8, 2019, at 5:31 PM, Jason Gunthorpe <jgg@mellanox.com> wrote: > > On Tue, May 07, 2019 at 05:55:25PM +0000, Adit Ranadive wrote: >>>> // Configuration defaults >>>> >>>> #define IBACM_SERVER_MODE_UNIX 0 >>>> diff --git a/libibverbs/verbs.c b/libibverbs/verbs.c >>>> index 1766b9f52d31..2cab86184e32 100644 >>>> +++ b/libibverbs/verbs.c >>>> @@ -967,7 +967,6 @@ static inline int create_peer_from_gid(int family, void *raw_gid, >>>> return 0; >>>> } >>>> >>>> -#define NEIGH_GET_DEFAULT_TIMEOUT_MS 3000 >>>> int ibv_resolve_eth_l2_from_gid(struct ibv_context *context, >>>> struct ibv_ah_attr *attr, >>>> uint8_t eth_mac[ETHERNET_LL_SIZE], >>> >>> Really compile time configurations are not so useful, what is the use >>> case here? >>> >> >> In the general sense I agree with you. Pre-built RPMs may not have this >> set to anything other than the default value. >> However, in our internal testing we've seen timeouts when trying to >> resolve the DMAC when creating an AH. You can do this using uverbs instead of netlink by changing the create_ah provider’s implementation.. and AFAIR it’s more scalable than netlink.. >> Instead, of simply increasing >> the #define value here I thought it would be mildly helpful to expose >> this out. >> >> If this is not going to be useful I can drop it but I thought it would >> atleast make rdma-core a bit more configurable.. > > Stuff like this should not be configured.. if you are hitting timeout > it sounds like a bug of some sort to me. > > Jason
On 5/8/19 7:31 AM, Jason Gunthorpe wrote: > On Tue, May 07, 2019 at 05:55:25PM +0000, Adit Ranadive wrote: >>>> // Configuration defaults >>>> >>>> #define IBACM_SERVER_MODE_UNIX 0 >>>> diff --git a/libibverbs/verbs.c b/libibverbs/verbs.c >>>> index 1766b9f52d31..2cab86184e32 100644 >>>> +++ b/libibverbs/verbs.c >>>> @@ -967,7 +967,6 @@ static inline int create_peer_from_gid(int family, void *raw_gid, >>>> return 0; >>>> } >>>> >>>> -#define NEIGH_GET_DEFAULT_TIMEOUT_MS 3000 >>>> int ibv_resolve_eth_l2_from_gid(struct ibv_context *context, >>>> struct ibv_ah_attr *attr, >>>> uint8_t eth_mac[ETHERNET_LL_SIZE], >>> >>> Really compile time configurations are not so useful, what is the use >>> case here? >>> >> >> In the general sense I agree with you. Pre-built RPMs may not have this >> set to anything other than the default value. >> However, in our internal testing we've seen timeouts when trying to >> resolve the DMAC when creating an AH. Instead, of simply increasing >> the #define value here I thought it would be mildly helpful to expose >> this out. >> >> If this is not going to be useful I can drop it but I thought it would >> atleast make rdma-core a bit more configurable.. > > Stuff like this should not be configured.. if you are hitting timeout > it sounds like a bug of some sort to me. Maybe .. I don't have any cycles to dig deeper into what libnl3 is doing. > Jason >
On 5/8/19 8:02 AM, Majd Dibbiny wrote: > >> On May 8, 2019, at 5:31 PM, Jason Gunthorpe <jgg@mellanox.com> wrote: >> >> On Tue, May 07, 2019 at 05:55:25PM +0000, Adit Ranadive wrote: >>>>> // Configuration defaults >>>>> >>>>> #define IBACM_SERVER_MODE_UNIX 0 >>>>> diff --git a/libibverbs/verbs.c b/libibverbs/verbs.c >>>>> index 1766b9f52d31..2cab86184e32 100644 >>>>> +++ b/libibverbs/verbs.c >>>>> @@ -967,7 +967,6 @@ static inline int create_peer_from_gid(int family, void *raw_gid, >>>>> return 0; >>>>> } >>>>> >>>>> -#define NEIGH_GET_DEFAULT_TIMEOUT_MS 3000 >>>>> int ibv_resolve_eth_l2_from_gid(struct ibv_context *context, >>>>> struct ibv_ah_attr *attr, >>>>> uint8_t eth_mac[ETHERNET_LL_SIZE], >>>> >>>> Really compile time configurations are not so useful, what is the use >>>> case here? >>>> >>> >>> In the general sense I agree with you. Pre-built RPMs may not have this >>> set to anything other than the default value. >>> However, in our internal testing we've seen timeouts when trying to >>> resolve the DMAC when creating an AH. > You can do this using uverbs instead of netlink by changing the create_ah provider’s implementation.. and AFAIR it’s more scalable than netlink.. Thanks Majid! We will mostly go down that route later but in the fallback case we will still need to the L2 lookup function... >>> Instead, of simply increasing >>> the #define value here I thought it would be mildly helpful to expose >>> this out. >>> >>> If this is not going to be useful I can drop it but I thought it would >>> atleast make rdma-core a bit more configurable.. >> >> Stuff like this should not be configured.. if you are hitting timeout >> it sounds like a bug of some sort to me. >> >> Jason
diff --git a/CMakeLists.txt b/CMakeLists.txt index beb8f4ec1238..8dbdd2b807f4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,6 +45,8 @@ # -DNO_PYVERBS=1 (default, build pyverbs) # Invoke cython to build pyverbs. Usually you will run with this option # is set, but it will be disabled for travis runs. +# -DNEIGH_GET_DEFAULT_TIMEOUT_MS=3000 (default) +# Set the default timeout for lookup of neighbor for mac address. cmake_minimum_required(VERSION 2.8.11 FATAL_ERROR) project(rdma-core C) @@ -84,6 +86,10 @@ if (IN_PLACE) set(CMAKE_INSTALL_INCLUDEDIR "include") endif() +if ("${NEIGH_GET_DEFAULT_TIMEOUT_MS}" STREQUAL "") + set(NEIGH_GET_DEFAULT_TIMEOUT_MS 3000) +endif() + include(GNUInstallDirs) # C include root set(BUILD_INCLUDE ${CMAKE_BINARY_DIR}/include) diff --git a/buildlib/config.h.in b/buildlib/config.h.in index 0754d2494234..590e70162d1e 100644 --- a/buildlib/config.h.in +++ b/buildlib/config.h.in @@ -61,6 +61,8 @@ # define VERBS_WRITE_ONLY 0 #endif +# define NEIGH_GET_DEFAULT_TIMEOUT_MS @NEIGH_GET_DEFAULT_TIMEOUT_MS@ + // Configuration defaults #define IBACM_SERVER_MODE_UNIX 0 diff --git a/libibverbs/verbs.c b/libibverbs/verbs.c index 1766b9f52d31..2cab86184e32 100644 --- a/libibverbs/verbs.c +++ b/libibverbs/verbs.c @@ -967,7 +967,6 @@ static inline int create_peer_from_gid(int family, void *raw_gid, return 0; } -#define NEIGH_GET_DEFAULT_TIMEOUT_MS 3000 int ibv_resolve_eth_l2_from_gid(struct ibv_context *context, struct ibv_ah_attr *attr, uint8_t eth_mac[ETHERNET_LL_SIZE],