Message ID | 20210616065213.987-4-anand.a.khoje@oracle.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | IB/core: Obtaining subnet_prefix from cache in | expand |
On Wed, Jun 16, 2021 at 12:22:13PM +0530, Anand Khoje wrote: > ib_query_port() calls device->ops.query_port() to get the port > attributes. The method of querying is device driver specific. > The same function calls device->ops.query_gid() to get the GID and > extract the subnet_prefix (gid_prefix). > > The GID and subnet_prefix are stored in a cache. But they do not get > read from the cache if the device is an Infiniband device. The > following change takes advantage of the cached subnet_prefix. > Testing with RDBMS has shown a significant improvement in performance > with this change. > > The function ib_cache_is_initialised() is introduced because > ib_query_port() gets called early in the stage when the cache is not > built while reading port immutable property. > > In that case, the default GID still gets read from HCA for IB link- > layer devices. > > In the situation of an event causing cache update, the subnet_prefix > will get retrieved from newly updated GID cache in ib_cache_update(), > so that we do not end up reading a stale value from cache via > ib_query_port(). > > Fixes: fad61ad ("IB/core: Add subnet prefix to port info") > Suggested-by: Leon Romanovsky <leonro@nvidia.com> > Suggested-by: Aru Kolappan <aru.kolappan@oracle.com> > Signed-off-by: Anand Khoje <anand.a.khoje@oracle.com> > Signed-off-by: Haakon Bugge <haakon.bugge@oracle.com> > --- > > v1 -> v2: > - Split the v1 patch in 3 patches as per Leon's suggestion. > > v2 -> v3: > - Added changes as per Mark Zhang's suggestion of clearing > flags in git_table_cleanup_one(). > v3 -> v4: > - Removed the enum ib_port_data_flags and 8 byte flags from > struct ib_port_data, and the set_bit()/clear_bit() API > used to update this flag as that was not necessary. > Done to keep the code simple. > - Added code to read subnet_prefix from updated GID cache in the > event of cache update. Prior to this change, ib_cache_update > was reading the value for subnet_prefix via ib_query_port(), > due to this patch, we ended up reading a stale cached value of > subnet_prefix. > > --- > drivers/infiniband/core/cache.c | 18 +++++++++++++++--- > drivers/infiniband/core/device.c | 9 +++++++++ > include/rdma/ib_cache.h | 5 +++++ > include/rdma/ib_verbs.h | 1 + > 4 files changed, 30 insertions(+), 3 deletions(-) > > diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c > index 2325171..cd99c46 100644 > --- a/drivers/infiniband/core/cache.c > +++ b/drivers/infiniband/core/cache.c > @@ -917,9 +917,11 @@ static void gid_table_cleanup_one(struct ib_device *ib_dev) > { > u32 p; > > - rdma_for_each_port (ib_dev, p) > + rdma_for_each_port (ib_dev, p) { > + ib_dev->port_data[p].cache_is_initialized = 0; I think that this line is not needed, we are removing device anyway and and query_port is not allowed at this stage. > cleanup_gid_table_port(ib_dev, p, > ib_dev->port_data[p].cache.gid); > + } > } > > static int gid_table_setup_one(struct ib_device *ib_dev) > @@ -1466,6 +1468,7 @@ static int config_non_roce_gid_cache(struct ib_device *device, > struct ib_port_attr *tprops = NULL; > struct ib_pkey_cache *pkey_cache = NULL; > struct ib_pkey_cache *old_pkey_cache = NULL; > + union ib_gid gid; > int i; > int ret; > > @@ -1523,13 +1526,21 @@ static int config_non_roce_gid_cache(struct ib_device *device, > device->port_data[port].cache.lmc = tprops->lmc; > device->port_data[port].cache.port_state = tprops->state; > > - device->port_data[port].cache.subnet_prefix = tprops->subnet_prefix; > + ret = rdma_query_gid(device, port, 0, &gid); > + if (ret) { > + write_unlock_irq(&device->cache.lock); > + goto err; > + } > + > + device->port_data[port].cache.subnet_prefix = > + be64_to_cpu(gid.global.subnet_prefix); > + > write_unlock_irq(&device->cache_lock); > > if (enforce_security) > ib_security_cache_change(device, > port, > - tprops->subnet_prefix); > + be64_to_cpu(gid.global.subnet_prefix)); > > kfree(old_pkey_cache); > kfree(tprops); > @@ -1629,6 +1640,7 @@ int ib_cache_setup_one(struct ib_device *device) > err = ib_cache_update(device, p, true, true, true); > if (err) > return err; > + device->port_data[p].cache_is_initialized = 1; > } > > return 0; > diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c > index 7a617e4..57b9039 100644 > --- a/drivers/infiniband/core/device.c > +++ b/drivers/infiniband/core/device.c > @@ -2057,6 +2057,15 @@ static int __ib_query_port(struct ib_device *device, > IB_LINK_LAYER_INFINIBAND) > return 0; > > + if (!ib_cache_is_initialised(device, port_num)) > + goto query_gid_from_device; IMHO, we don't need this new function and can access ib_port_data directly. In device.c, we have plenty of places that does it. Not critical. > + > + ib_get_cached_subnet_prefix(device, port_num, > + &port_attr->subnet_prefix); > + > + return 0; > + > +query_gid_from_device: > err = device->ops.query_gid(device, port_num, 0, &gid); > if (err) > return err; > diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h > index 226ae37..46b43a7 100644 > --- a/include/rdma/ib_cache.h > +++ b/include/rdma/ib_cache.h > @@ -114,4 +114,9 @@ ssize_t rdma_query_gid_table(struct ib_device *device, > struct ib_uverbs_gid_entry *entries, > size_t max_entries); > > +static inline bool ib_cache_is_initialised(struct ib_device *device, > + u32 port_num) > +{ > + return device->port_data[port_num].cache_is_initialized; > +} > #endif /* _IB_CACHE_H */ > diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h > index c96d601..405f7da 100644 > --- a/include/rdma/ib_verbs.h > +++ b/include/rdma/ib_verbs.h > @@ -2177,6 +2177,7 @@ struct ib_port_data { > > spinlock_t netdev_lock; > > + u8 cache_is_initialized:1; > struct list_head pkey_list; > > struct ib_port_cache cache; > -- > 1.8.3.1 >
On Wed, Jun 16, 2021 at 12:22:13PM +0530, Anand Khoje wrote: > ib_query_port() calls device->ops.query_port() to get the port > attributes. The method of querying is device driver specific. > The same function calls device->ops.query_gid() to get the GID and > extract the subnet_prefix (gid_prefix). > > The GID and subnet_prefix are stored in a cache. But they do not get > read from the cache if the device is an Infiniband device. The > following change takes advantage of the cached subnet_prefix. > Testing with RDBMS has shown a significant improvement in performance > with this change. > > The function ib_cache_is_initialised() is introduced because > ib_query_port() gets called early in the stage when the cache is not > built while reading port immutable property. > > In that case, the default GID still gets read from HCA for IB link- > layer devices. > > In the situation of an event causing cache update, the subnet_prefix > will get retrieved from newly updated GID cache in ib_cache_update(), > so that we do not end up reading a stale value from cache via > ib_query_port(). > > Fixes: fad61ad ("IB/core: Add subnet prefix to port info") > Suggested-by: Leon Romanovsky <leonro@nvidia.com> > Suggested-by: Aru Kolappan <aru.kolappan@oracle.com> > Signed-off-by: Anand Khoje <anand.a.khoje@oracle.com> > Signed-off-by: Haakon Bugge <haakon.bugge@oracle.com> > --- <...> > @@ -1523,13 +1526,21 @@ static int config_non_roce_gid_cache(struct ib_device *device, > device->port_data[port].cache.lmc = tprops->lmc; > device->port_data[port].cache.port_state = tprops->state; > > - device->port_data[port].cache.subnet_prefix = tprops->subnet_prefix; > + ret = rdma_query_gid(device, port, 0, &gid); > + if (ret) { > + write_unlock_irq(&device->cache.lock); And this patch can't compile. It should be cache_lock and not cache.lock. Thanks
On 6/16/2021 12:57 PM, Leon Romanovsky wrote: > On Wed, Jun 16, 2021 at 12:22:13PM +0530, Anand Khoje wrote: >> ib_query_port() calls device->ops.query_port() to get the port >> attributes. The method of querying is device driver specific. >> The same function calls device->ops.query_gid() to get the GID and >> extract the subnet_prefix (gid_prefix). >> >> The GID and subnet_prefix are stored in a cache. But they do not get >> read from the cache if the device is an Infiniband device. The >> following change takes advantage of the cached subnet_prefix. >> Testing with RDBMS has shown a significant improvement in performance >> with this change. >> >> The function ib_cache_is_initialised() is introduced because >> ib_query_port() gets called early in the stage when the cache is not >> built while reading port immutable property. >> >> In that case, the default GID still gets read from HCA for IB link- >> layer devices. >> >> In the situation of an event causing cache update, the subnet_prefix >> will get retrieved from newly updated GID cache in ib_cache_update(), >> so that we do not end up reading a stale value from cache via >> ib_query_port(). >> >> Fixes: fad61ad ("IB/core: Add subnet prefix to port info") >> Suggested-by: Leon Romanovsky <leonro@nvidia.com> >> Suggested-by: Aru Kolappan <aru.kolappan@oracle.com> >> Signed-off-by: Anand Khoje <anand.a.khoje@oracle.com> >> Signed-off-by: Haakon Bugge <haakon.bugge@oracle.com> >> --- >> >> v1 -> v2: >> - Split the v1 patch in 3 patches as per Leon's suggestion. >> >> v2 -> v3: >> - Added changes as per Mark Zhang's suggestion of clearing >> flags in git_table_cleanup_one(). >> v3 -> v4: >> - Removed the enum ib_port_data_flags and 8 byte flags from >> struct ib_port_data, and the set_bit()/clear_bit() API >> used to update this flag as that was not necessary. >> Done to keep the code simple. >> - Added code to read subnet_prefix from updated GID cache in the >> event of cache update. Prior to this change, ib_cache_update >> was reading the value for subnet_prefix via ib_query_port(), >> due to this patch, we ended up reading a stale cached value of >> subnet_prefix. >> >> --- >> drivers/infiniband/core/cache.c | 18 +++++++++++++++--- >> drivers/infiniband/core/device.c | 9 +++++++++ >> include/rdma/ib_cache.h | 5 +++++ >> include/rdma/ib_verbs.h | 1 + >> 4 files changed, 30 insertions(+), 3 deletions(-) >> >> diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c >> index 2325171..cd99c46 100644 >> --- a/drivers/infiniband/core/cache.c >> +++ b/drivers/infiniband/core/cache.c >> @@ -917,9 +917,11 @@ static void gid_table_cleanup_one(struct ib_device *ib_dev) >> { >> u32 p; >> >> - rdma_for_each_port (ib_dev, p) >> + rdma_for_each_port (ib_dev, p) { >> + ib_dev->port_data[p].cache_is_initialized = 0; > > I think that this line is not needed, we are removing device anyway and > and query_port is not allowed at this stage. > We have kept this for code completeness purposes. Just as we did with set_bit() and clear_bit() APIs. >> cleanup_gid_table_port(ib_dev, p, >> ib_dev->port_data[p].cache.gid); >> + } >> } >> >> static int gid_table_setup_one(struct ib_device *ib_dev) >> @@ -1466,6 +1468,7 @@ static int config_non_roce_gid_cache(struct ib_device *device, >> struct ib_port_attr *tprops = NULL; >> struct ib_pkey_cache *pkey_cache = NULL; >> struct ib_pkey_cache *old_pkey_cache = NULL; >> + union ib_gid gid; >> int i; >> int ret; >> >> @@ -1523,13 +1526,21 @@ static int config_non_roce_gid_cache(struct ib_device *device, >> device->port_data[port].cache.lmc = tprops->lmc; >> device->port_data[port].cache.port_state = tprops->state; >> >> - device->port_data[port].cache.subnet_prefix = tprops->subnet_prefix; >> + ret = rdma_query_gid(device, port, 0, &gid); >> + if (ret) { >> + write_unlock_irq(&device->cache.lock); >> + goto err; >> + } >> + >> + device->port_data[port].cache.subnet_prefix = >> + be64_to_cpu(gid.global.subnet_prefix); >> + >> write_unlock_irq(&device->cache_lock); >> >> if (enforce_security) >> ib_security_cache_change(device, >> port, >> - tprops->subnet_prefix); >> + be64_to_cpu(gid.global.subnet_prefix)); >> >> kfree(old_pkey_cache); >> kfree(tprops); >> @@ -1629,6 +1640,7 @@ int ib_cache_setup_one(struct ib_device *device) >> err = ib_cache_update(device, p, true, true, true); >> if (err) >> return err; >> + device->port_data[p].cache_is_initialized = 1; >> } >> >> return 0; >> diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c >> index 7a617e4..57b9039 100644 >> --- a/drivers/infiniband/core/device.c >> +++ b/drivers/infiniband/core/device.c >> @@ -2057,6 +2057,15 @@ static int __ib_query_port(struct ib_device *device, >> IB_LINK_LAYER_INFINIBAND) >> return 0; >> >> + if (!ib_cache_is_initialised(device, port_num)) >> + goto query_gid_from_device; > > IMHO, we don't need this new function and can access ib_port_data > directly. In device.c, we have plenty of places that does it. > > Not critical. > Added this function to have a way to check validity of cache, such that it could be used in future for the same check in areas to which ib_port_data is opaque. >> + >> + ib_get_cached_subnet_prefix(device, port_num, >> + &port_attr->subnet_prefix); >> + >> + return 0; >> + >> +query_gid_from_device: >> err = device->ops.query_gid(device, port_num, 0, &gid); >> if (err) >> return err; >> diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h >> index 226ae37..46b43a7 100644 >> --- a/include/rdma/ib_cache.h >> +++ b/include/rdma/ib_cache.h >> @@ -114,4 +114,9 @@ ssize_t rdma_query_gid_table(struct ib_device *device, >> struct ib_uverbs_gid_entry *entries, >> size_t max_entries); >> >> +static inline bool ib_cache_is_initialised(struct ib_device *device, >> + u32 port_num) >> +{ >> + return device->port_data[port_num].cache_is_initialized; >> +} >> #endif /* _IB_CACHE_H */ >> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h >> index c96d601..405f7da 100644 >> --- a/include/rdma/ib_verbs.h >> +++ b/include/rdma/ib_verbs.h >> @@ -2177,6 +2177,7 @@ struct ib_port_data { >> >> spinlock_t netdev_lock; >> >> + u8 cache_is_initialized:1; >> struct list_head pkey_list; >> >> struct ib_port_cache cache; >> -- >> 1.8.3.1 >>
On 6/16/2021 1:00 PM, Leon Romanovsky wrote: > On Wed, Jun 16, 2021 at 12:22:13PM +0530, Anand Khoje wrote: >> ib_query_port() calls device->ops.query_port() to get the port >> attributes. The method of querying is device driver specific. >> The same function calls device->ops.query_gid() to get the GID and >> extract the subnet_prefix (gid_prefix). >> >> The GID and subnet_prefix are stored in a cache. But they do not get >> read from the cache if the device is an Infiniband device. The >> following change takes advantage of the cached subnet_prefix. >> Testing with RDBMS has shown a significant improvement in performance >> with this change. >> >> The function ib_cache_is_initialised() is introduced because >> ib_query_port() gets called early in the stage when the cache is not >> built while reading port immutable property. >> >> In that case, the default GID still gets read from HCA for IB link- >> layer devices. >> >> In the situation of an event causing cache update, the subnet_prefix >> will get retrieved from newly updated GID cache in ib_cache_update(), >> so that we do not end up reading a stale value from cache via >> ib_query_port(). >> >> Fixes: fad61ad ("IB/core: Add subnet prefix to port info") >> Suggested-by: Leon Romanovsky <leonro@nvidia.com> >> Suggested-by: Aru Kolappan <aru.kolappan@oracle.com> >> Signed-off-by: Anand Khoje <anand.a.khoje@oracle.com> >> Signed-off-by: Haakon Bugge <haakon.bugge@oracle.com> >> --- > > <...> > >> @@ -1523,13 +1526,21 @@ static int config_non_roce_gid_cache(struct ib_device *device, >> device->port_data[port].cache.lmc = tprops->lmc; >> device->port_data[port].cache.port_state = tprops->state; >> >> - device->port_data[port].cache.subnet_prefix = tprops->subnet_prefix; >> + ret = rdma_query_gid(device, port, 0, &gid); >> + if (ret) { >> + write_unlock_irq(&device->cache.lock); > > And this patch can't compile. It should be cache_lock and not cache.lock. > > Thanks > Unfortunate typo from my end. Thanks for pointing this out. I will share the updated patch.
On 6/16/2021 1:12 PM, Anand Khoje wrote: > On 6/16/2021 12:57 PM, Leon Romanovsky wrote: >> On Wed, Jun 16, 2021 at 12:22:13PM +0530, Anand Khoje wrote: >>> ib_query_port() calls device->ops.query_port() to get the port >>> attributes. The method of querying is device driver specific. >>> The same function calls device->ops.query_gid() to get the GID and >>> extract the subnet_prefix (gid_prefix). >>> >>> The GID and subnet_prefix are stored in a cache. But they do not get >>> read from the cache if the device is an Infiniband device. The >>> following change takes advantage of the cached subnet_prefix. >>> Testing with RDBMS has shown a significant improvement in performance >>> with this change. >>> >>> The function ib_cache_is_initialised() is introduced because >>> ib_query_port() gets called early in the stage when the cache is not >>> built while reading port immutable property. >>> >>> In that case, the default GID still gets read from HCA for IB link- >>> layer devices. >>> >>> In the situation of an event causing cache update, the subnet_prefix >>> will get retrieved from newly updated GID cache in ib_cache_update(), >>> so that we do not end up reading a stale value from cache via >>> ib_query_port(). >>> >>> Fixes: fad61ad ("IB/core: Add subnet prefix to port info") >>> Suggested-by: Leon Romanovsky <leonro@nvidia.com> >>> Suggested-by: Aru Kolappan <aru.kolappan@oracle.com> >>> Signed-off-by: Anand Khoje <anand.a.khoje@oracle.com> >>> Signed-off-by: Haakon Bugge <haakon.bugge@oracle.com> >>> --- >>> >>> v1 -> v2: >>> - Split the v1 patch in 3 patches as per Leon's suggestion. >>> >>> v2 -> v3: >>> - Added changes as per Mark Zhang's suggestion of clearing >>> flags in git_table_cleanup_one(). >>> v3 -> v4: >>> - Removed the enum ib_port_data_flags and 8 byte flags from >>> struct ib_port_data, and the set_bit()/clear_bit() API >>> used to update this flag as that was not necessary. >>> Done to keep the code simple. >>> - Added code to read subnet_prefix from updated GID cache in the >>> event of cache update. Prior to this change, ib_cache_update >>> was reading the value for subnet_prefix via ib_query_port(), >>> due to this patch, we ended up reading a stale cached value of >>> subnet_prefix. >>> >>> --- >>> drivers/infiniband/core/cache.c | 18 +++++++++++++++--- >>> drivers/infiniband/core/device.c | 9 +++++++++ >>> include/rdma/ib_cache.h | 5 +++++ >>> include/rdma/ib_verbs.h | 1 + >>> 4 files changed, 30 insertions(+), 3 deletions(-) >>> >>> diff --git a/drivers/infiniband/core/cache.c >>> b/drivers/infiniband/core/cache.c >>> index 2325171..cd99c46 100644 >>> --- a/drivers/infiniband/core/cache.c >>> +++ b/drivers/infiniband/core/cache.c >>> @@ -917,9 +917,11 @@ static void gid_table_cleanup_one(struct >>> ib_device *ib_dev) >>> { >>> u32 p; >>> - rdma_for_each_port (ib_dev, p) >>> + rdma_for_each_port (ib_dev, p) { >>> + ib_dev->port_data[p].cache_is_initialized = 0; >> >> I think that this line is not needed, we are removing device anyway and >> and query_port is not allowed at this stage. >> > We have kept this for code completeness purposes. Just as we did with > set_bit() and clear_bit() APIs. > >>> cleanup_gid_table_port(ib_dev, p, >>> ib_dev->port_data[p].cache.gid); >>> + } >>> } >>> static int gid_table_setup_one(struct ib_device *ib_dev) >>> @@ -1466,6 +1468,7 @@ static int config_non_roce_gid_cache(struct >>> ib_device *device, >>> struct ib_port_attr *tprops = NULL; >>> struct ib_pkey_cache *pkey_cache = NULL; >>> struct ib_pkey_cache *old_pkey_cache = NULL; >>> + union ib_gid gid; >>> int i; >>> int ret; >>> @@ -1523,13 +1526,21 @@ static int config_non_roce_gid_cache(struct >>> ib_device *device, >>> device->port_data[port].cache.lmc = tprops->lmc; >>> device->port_data[port].cache.port_state = tprops->state; >>> - device->port_data[port].cache.subnet_prefix = >>> tprops->subnet_prefix; >>> + ret = rdma_query_gid(device, port, 0, &gid); >>> + if (ret) { >>> + write_unlock_irq(&device->cache.lock); >>> + goto err; >>> + } >>> + >>> + device->port_data[port].cache.subnet_prefix = >>> + be64_to_cpu(gid.global.subnet_prefix); >>> + >>> write_unlock_irq(&device->cache_lock); >>> if (enforce_security) >>> ib_security_cache_change(device, >>> port, >>> - tprops->subnet_prefix); >>> + be64_to_cpu(gid.global.subnet_prefix)); >>> kfree(old_pkey_cache); >>> kfree(tprops); >>> @@ -1629,6 +1640,7 @@ int ib_cache_setup_one(struct ib_device *device) >>> err = ib_cache_update(device, p, true, true, true); >>> if (err) >>> return err; >>> + device->port_data[p].cache_is_initialized = 1; >>> } >>> return 0; >>> diff --git a/drivers/infiniband/core/device.c >>> b/drivers/infiniband/core/device.c >>> index 7a617e4..57b9039 100644 >>> --- a/drivers/infiniband/core/device.c >>> +++ b/drivers/infiniband/core/device.c >>> @@ -2057,6 +2057,15 @@ static int __ib_query_port(struct ib_device >>> *device, >>> IB_LINK_LAYER_INFINIBAND) >>> return 0; >>> + if (!ib_cache_is_initialised(device, port_num)) >>> + goto query_gid_from_device; >> >> IMHO, we don't need this new function and can access ib_port_data >> directly. In device.c, we have plenty of places that does it. >> >> Not critical. >> > Added this function to have a way to check validity of cache, such that > it could be used in future for the same check in areas to which > ib_port_data is opaque. > Also, there was a review comment from Mark Zhang for patch version 2, where he had suggested to shift clear_bit(IB_PORT_CACHE_INITIALIZED, &device->port_data[p].flags); to gid_table_cleanup_one(). Thanks, Anand >>> + >>> + ib_get_cached_subnet_prefix(device, port_num, >>> + &port_attr->subnet_prefix); >>> + >>> + return 0; >>> + >>> +query_gid_from_device: >>> err = device->ops.query_gid(device, port_num, 0, &gid); >>> if (err) >>> return err; >>> diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h >>> index 226ae37..46b43a7 100644 >>> --- a/include/rdma/ib_cache.h >>> +++ b/include/rdma/ib_cache.h >>> @@ -114,4 +114,9 @@ ssize_t rdma_query_gid_table(struct ib_device >>> *device, >>> struct ib_uverbs_gid_entry *entries, >>> size_t max_entries); >>> +static inline bool ib_cache_is_initialised(struct ib_device *device, >>> + u32 port_num) >>> +{ >>> + return device->port_data[port_num].cache_is_initialized; >>> +} >>> #endif /* _IB_CACHE_H */ >>> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h >>> index c96d601..405f7da 100644 >>> --- a/include/rdma/ib_verbs.h >>> +++ b/include/rdma/ib_verbs.h >>> @@ -2177,6 +2177,7 @@ struct ib_port_data { >>> spinlock_t netdev_lock; >>> + u8 cache_is_initialized:1; >>> struct list_head pkey_list; >>> struct ib_port_cache cache; >>> -- >>> 1.8.3.1 >>> >
On Wed, Jun 16, 2021 at 01:12:51PM +0530, Anand Khoje wrote: > On 6/16/2021 12:57 PM, Leon Romanovsky wrote: > > On Wed, Jun 16, 2021 at 12:22:13PM +0530, Anand Khoje wrote: > > > ib_query_port() calls device->ops.query_port() to get the port > > > attributes. The method of querying is device driver specific. > > > The same function calls device->ops.query_gid() to get the GID and > > > extract the subnet_prefix (gid_prefix). > > > > > > The GID and subnet_prefix are stored in a cache. But they do not get > > > read from the cache if the device is an Infiniband device. The > > > following change takes advantage of the cached subnet_prefix. > > > Testing with RDBMS has shown a significant improvement in performance > > > with this change. > > > > > > The function ib_cache_is_initialised() is introduced because > > > ib_query_port() gets called early in the stage when the cache is not > > > built while reading port immutable property. > > > > > > In that case, the default GID still gets read from HCA for IB link- > > > layer devices. > > > > > > In the situation of an event causing cache update, the subnet_prefix > > > will get retrieved from newly updated GID cache in ib_cache_update(), > > > so that we do not end up reading a stale value from cache via > > > ib_query_port(). > > > > > > Fixes: fad61ad ("IB/core: Add subnet prefix to port info") > > > Suggested-by: Leon Romanovsky <leonro@nvidia.com> > > > Suggested-by: Aru Kolappan <aru.kolappan@oracle.com> > > > Signed-off-by: Anand Khoje <anand.a.khoje@oracle.com> > > > Signed-off-by: Haakon Bugge <haakon.bugge@oracle.com> > > > --- > > > > > > v1 -> v2: > > > - Split the v1 patch in 3 patches as per Leon's suggestion. > > > > > > v2 -> v3: > > > - Added changes as per Mark Zhang's suggestion of clearing > > > flags in git_table_cleanup_one(). > > > v3 -> v4: > > > - Removed the enum ib_port_data_flags and 8 byte flags from > > > struct ib_port_data, and the set_bit()/clear_bit() API > > > used to update this flag as that was not necessary. > > > Done to keep the code simple. > > > - Added code to read subnet_prefix from updated GID cache in the > > > event of cache update. Prior to this change, ib_cache_update > > > was reading the value for subnet_prefix via ib_query_port(), > > > due to this patch, we ended up reading a stale cached value of > > > subnet_prefix. > > > > > > --- > > > drivers/infiniband/core/cache.c | 18 +++++++++++++++--- > > > drivers/infiniband/core/device.c | 9 +++++++++ > > > include/rdma/ib_cache.h | 5 +++++ > > > include/rdma/ib_verbs.h | 1 + > > > 4 files changed, 30 insertions(+), 3 deletions(-) > > > > > > diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c > > > index 2325171..cd99c46 100644 > > > --- a/drivers/infiniband/core/cache.c > > > +++ b/drivers/infiniband/core/cache.c > > > @@ -917,9 +917,11 @@ static void gid_table_cleanup_one(struct ib_device *ib_dev) > > > { > > > u32 p; > > > - rdma_for_each_port (ib_dev, p) > > > + rdma_for_each_port (ib_dev, p) { > > > + ib_dev->port_data[p].cache_is_initialized = 0; > > > > I think that this line is not needed, we are removing device anyway and > > and query_port is not allowed at this stage. > > > We have kept this for code completeness purposes. Just as we did with > set_bit() and clear_bit() APIs. You are not using *_bit() API now, so let's not clear here. It is not completeness, but misleading. It gives false assumption that cache_is_initialized is used later in the code. > > > > cleanup_gid_table_port(ib_dev, p, > > > ib_dev->port_data[p].cache.gid); > > > + } > > > } > > > static int gid_table_setup_one(struct ib_device *ib_dev) > > > @@ -1466,6 +1468,7 @@ static int config_non_roce_gid_cache(struct ib_device *device, > > > struct ib_port_attr *tprops = NULL; > > > struct ib_pkey_cache *pkey_cache = NULL; > > > struct ib_pkey_cache *old_pkey_cache = NULL; > > > + union ib_gid gid; > > > int i; > > > int ret; > > > @@ -1523,13 +1526,21 @@ static int config_non_roce_gid_cache(struct ib_device *device, > > > device->port_data[port].cache.lmc = tprops->lmc; > > > device->port_data[port].cache.port_state = tprops->state; > > > - device->port_data[port].cache.subnet_prefix = tprops->subnet_prefix; > > > + ret = rdma_query_gid(device, port, 0, &gid); > > > + if (ret) { > > > + write_unlock_irq(&device->cache.lock); > > > + goto err; > > > + } > > > + > > > + device->port_data[port].cache.subnet_prefix = > > > + be64_to_cpu(gid.global.subnet_prefix); > > > + > > > write_unlock_irq(&device->cache_lock); > > > if (enforce_security) > > > ib_security_cache_change(device, > > > port, > > > - tprops->subnet_prefix); > > > + be64_to_cpu(gid.global.subnet_prefix)); > > > kfree(old_pkey_cache); > > > kfree(tprops); > > > @@ -1629,6 +1640,7 @@ int ib_cache_setup_one(struct ib_device *device) > > > err = ib_cache_update(device, p, true, true, true); > > > if (err) > > > return err; > > > + device->port_data[p].cache_is_initialized = 1; > > > } > > > return 0; > > > diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c > > > index 7a617e4..57b9039 100644 > > > --- a/drivers/infiniband/core/device.c > > > +++ b/drivers/infiniband/core/device.c > > > @@ -2057,6 +2057,15 @@ static int __ib_query_port(struct ib_device *device, > > > IB_LINK_LAYER_INFINIBAND) > > > return 0; > > > + if (!ib_cache_is_initialised(device, port_num)) > > > + goto query_gid_from_device; > > > > IMHO, we don't need this new function and can access ib_port_data > > directly. In device.c, we have plenty of places that does it. > > > > Not critical. > > > Added this function to have a way to check validity of cache, such that it > could be used in future for the same check in areas to which ib_port_data is > opaque. It is ok, just call directly if (!device->port_data[port_num].cache_is_initialized). Thanks
Hi Anand, Thank you for the patch! Yet something to improve: [auto build test ERROR on rdma/for-next] [also build test ERROR on next-20210616] [cannot apply to linux/master linus/master v5.13-rc6] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Anand-Khoje/IB-core-Obtaining-subnet_prefix-from-cache-in/20210617-102611 base: https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git for-next config: powerpc64-randconfig-r024-20210617 (attached as .config) compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project 64720f57bea6a6bf033feef4a5751ab9c0c3b401) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # install powerpc64 cross compiling tool for clang build # apt-get install binutils-powerpc64-linux-gnu # https://github.com/0day-ci/linux/commit/495253a987df586d8c5f4c525999cdf39c5823f0 git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Anand-Khoje/IB-core-Obtaining-subnet_prefix-from-cache-in/20210617-102611 git checkout 495253a987df586d8c5f4c525999cdf39c5823f0 # save the attached .config to linux build tree COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=powerpc64 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All errors (new ones prefixed by >>): In file included from drivers/infiniband/core/cache.c:36: In file included from include/linux/module.h:12: In file included from include/linux/list.h:9: In file included from include/linux/kernel.h:12: In file included from include/linux/bitops.h:32: In file included from arch/powerpc/include/asm/bitops.h:62: arch/powerpc/include/asm/barrier.h:49:9: warning: '__lwsync' macro redefined [-Wmacro-redefined] #define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory") ^ <built-in>:308:9: note: previous definition is here #define __lwsync __builtin_ppc_lwsync ^ >> drivers/infiniband/core/cache.c:1531:29: error: no member named 'cache' in 'struct ib_device' write_unlock_irq(&device->cache.lock); ~~~~~~ ^ include/linux/rwlock.h:108:55: note: expanded from macro 'write_unlock_irq' #define write_unlock_irq(lock) _raw_write_unlock_irq(lock) ^~~~ 1 warning and 1 error generated. vim +1531 drivers/infiniband/core/cache.c 1463 1464 static int 1465 ib_cache_update(struct ib_device *device, u32 port, bool update_gids, 1466 bool update_pkeys, bool enforce_security) 1467 { 1468 struct ib_port_attr *tprops = NULL; 1469 struct ib_pkey_cache *pkey_cache = NULL; 1470 struct ib_pkey_cache *old_pkey_cache = NULL; 1471 union ib_gid gid; 1472 int i; 1473 int ret; 1474 1475 if (!rdma_is_port_valid(device, port)) 1476 return -EINVAL; 1477 1478 tprops = kmalloc(sizeof *tprops, GFP_KERNEL); 1479 if (!tprops) 1480 return -ENOMEM; 1481 1482 ret = ib_query_port(device, port, tprops); 1483 if (ret) { 1484 dev_warn(&device->dev, "ib_query_port failed (%d)\n", ret); 1485 goto err; 1486 } 1487 1488 if (!rdma_protocol_roce(device, port) && update_gids) { 1489 ret = config_non_roce_gid_cache(device, port, 1490 tprops->gid_tbl_len); 1491 if (ret) 1492 goto err; 1493 } 1494 1495 update_pkeys &= !!tprops->pkey_tbl_len; 1496 1497 if (update_pkeys) { 1498 pkey_cache = kmalloc(struct_size(pkey_cache, table, 1499 tprops->pkey_tbl_len), 1500 GFP_KERNEL); 1501 if (!pkey_cache) { 1502 ret = -ENOMEM; 1503 goto err; 1504 } 1505 1506 pkey_cache->table_len = tprops->pkey_tbl_len; 1507 1508 for (i = 0; i < pkey_cache->table_len; ++i) { 1509 ret = ib_query_pkey(device, port, i, 1510 pkey_cache->table + i); 1511 if (ret) { 1512 dev_warn(&device->dev, 1513 "ib_query_pkey failed (%d) for index %d\n", 1514 ret, i); 1515 goto err; 1516 } 1517 } 1518 } 1519 1520 write_lock_irq(&device->cache_lock); 1521 1522 if (update_pkeys) { 1523 old_pkey_cache = device->port_data[port].cache.pkey; 1524 device->port_data[port].cache.pkey = pkey_cache; 1525 } 1526 device->port_data[port].cache.lmc = tprops->lmc; 1527 device->port_data[port].cache.port_state = tprops->state; 1528 1529 ret = rdma_query_gid(device, port, 0, &gid); 1530 if (ret) { > 1531 write_unlock_irq(&device->cache.lock); 1532 goto err; 1533 } 1534 1535 device->port_data[port].cache.subnet_prefix = 1536 be64_to_cpu(gid.global.subnet_prefix); 1537 1538 write_unlock_irq(&device->cache_lock); 1539 1540 if (enforce_security) 1541 ib_security_cache_change(device, 1542 port, 1543 be64_to_cpu(gid.global.subnet_prefix)); 1544 1545 kfree(old_pkey_cache); 1546 kfree(tprops); 1547 return 0; 1548 1549 err: 1550 kfree(pkey_cache); 1551 kfree(tprops); 1552 return ret; 1553 } 1554 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Hi Anand, Thank you for the patch! Yet something to improve: [auto build test ERROR on rdma/for-next] [also build test ERROR on next-20210616] [cannot apply to linux/master linus/master v5.13-rc6] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Anand-Khoje/IB-core-Obtaining-subnet_prefix-from-cache-in/20210617-102611 base: https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git for-next config: powerpc64-randconfig-r012-20210617 (attached as .config) compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project 64720f57bea6a6bf033feef4a5751ab9c0c3b401) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # install powerpc64 cross compiling tool for clang build # apt-get install binutils-powerpc64-linux-gnu # https://github.com/0day-ci/linux/commit/495253a987df586d8c5f4c525999cdf39c5823f0 git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Anand-Khoje/IB-core-Obtaining-subnet_prefix-from-cache-in/20210617-102611 git checkout 495253a987df586d8c5f4c525999cdf39c5823f0 # save the attached .config to linux build tree COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=powerpc64 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All errors (new ones prefixed by >>): In file included from drivers/infiniband/core/cache.c:36: In file included from include/linux/module.h:12: In file included from include/linux/list.h:9: In file included from include/linux/kernel.h:12: In file included from include/linux/bitops.h:32: In file included from arch/powerpc/include/asm/bitops.h:62: arch/powerpc/include/asm/barrier.h:49:9: warning: '__lwsync' macro redefined [-Wmacro-redefined] #define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory") ^ <built-in>:310:9: note: previous definition is here #define __lwsync __builtin_ppc_lwsync ^ >> drivers/infiniband/core/cache.c:1531:29: error: no member named 'cache' in 'struct ib_device' write_unlock_irq(&device->cache.lock); ~~~~~~ ^ include/linux/rwlock.h:108:55: note: expanded from macro 'write_unlock_irq' #define write_unlock_irq(lock) _raw_write_unlock_irq(lock) ^~~~ 1 warning and 1 error generated. vim +1531 drivers/infiniband/core/cache.c 1463 1464 static int 1465 ib_cache_update(struct ib_device *device, u32 port, bool update_gids, 1466 bool update_pkeys, bool enforce_security) 1467 { 1468 struct ib_port_attr *tprops = NULL; 1469 struct ib_pkey_cache *pkey_cache = NULL; 1470 struct ib_pkey_cache *old_pkey_cache = NULL; 1471 union ib_gid gid; 1472 int i; 1473 int ret; 1474 1475 if (!rdma_is_port_valid(device, port)) 1476 return -EINVAL; 1477 1478 tprops = kmalloc(sizeof *tprops, GFP_KERNEL); 1479 if (!tprops) 1480 return -ENOMEM; 1481 1482 ret = ib_query_port(device, port, tprops); 1483 if (ret) { 1484 dev_warn(&device->dev, "ib_query_port failed (%d)\n", ret); 1485 goto err; 1486 } 1487 1488 if (!rdma_protocol_roce(device, port) && update_gids) { 1489 ret = config_non_roce_gid_cache(device, port, 1490 tprops->gid_tbl_len); 1491 if (ret) 1492 goto err; 1493 } 1494 1495 update_pkeys &= !!tprops->pkey_tbl_len; 1496 1497 if (update_pkeys) { 1498 pkey_cache = kmalloc(struct_size(pkey_cache, table, 1499 tprops->pkey_tbl_len), 1500 GFP_KERNEL); 1501 if (!pkey_cache) { 1502 ret = -ENOMEM; 1503 goto err; 1504 } 1505 1506 pkey_cache->table_len = tprops->pkey_tbl_len; 1507 1508 for (i = 0; i < pkey_cache->table_len; ++i) { 1509 ret = ib_query_pkey(device, port, i, 1510 pkey_cache->table + i); 1511 if (ret) { 1512 dev_warn(&device->dev, 1513 "ib_query_pkey failed (%d) for index %d\n", 1514 ret, i); 1515 goto err; 1516 } 1517 } 1518 } 1519 1520 write_lock_irq(&device->cache_lock); 1521 1522 if (update_pkeys) { 1523 old_pkey_cache = device->port_data[port].cache.pkey; 1524 device->port_data[port].cache.pkey = pkey_cache; 1525 } 1526 device->port_data[port].cache.lmc = tprops->lmc; 1527 device->port_data[port].cache.port_state = tprops->state; 1528 1529 ret = rdma_query_gid(device, port, 0, &gid); 1530 if (ret) { > 1531 write_unlock_irq(&device->cache.lock); 1532 goto err; 1533 } 1534 1535 device->port_data[port].cache.subnet_prefix = 1536 be64_to_cpu(gid.global.subnet_prefix); 1537 1538 write_unlock_irq(&device->cache_lock); 1539 1540 if (enforce_security) 1541 ib_security_cache_change(device, 1542 port, 1543 be64_to_cpu(gid.global.subnet_prefix)); 1544 1545 kfree(old_pkey_cache); 1546 kfree(tprops); 1547 return 0; 1548 1549 err: 1550 kfree(pkey_cache); 1551 kfree(tprops); 1552 return ret; 1553 } 1554 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 2325171..cd99c46 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -917,9 +917,11 @@ static void gid_table_cleanup_one(struct ib_device *ib_dev) { u32 p; - rdma_for_each_port (ib_dev, p) + rdma_for_each_port (ib_dev, p) { + ib_dev->port_data[p].cache_is_initialized = 0; cleanup_gid_table_port(ib_dev, p, ib_dev->port_data[p].cache.gid); + } } static int gid_table_setup_one(struct ib_device *ib_dev) @@ -1466,6 +1468,7 @@ static int config_non_roce_gid_cache(struct ib_device *device, struct ib_port_attr *tprops = NULL; struct ib_pkey_cache *pkey_cache = NULL; struct ib_pkey_cache *old_pkey_cache = NULL; + union ib_gid gid; int i; int ret; @@ -1523,13 +1526,21 @@ static int config_non_roce_gid_cache(struct ib_device *device, device->port_data[port].cache.lmc = tprops->lmc; device->port_data[port].cache.port_state = tprops->state; - device->port_data[port].cache.subnet_prefix = tprops->subnet_prefix; + ret = rdma_query_gid(device, port, 0, &gid); + if (ret) { + write_unlock_irq(&device->cache.lock); + goto err; + } + + device->port_data[port].cache.subnet_prefix = + be64_to_cpu(gid.global.subnet_prefix); + write_unlock_irq(&device->cache_lock); if (enforce_security) ib_security_cache_change(device, port, - tprops->subnet_prefix); + be64_to_cpu(gid.global.subnet_prefix)); kfree(old_pkey_cache); kfree(tprops); @@ -1629,6 +1640,7 @@ int ib_cache_setup_one(struct ib_device *device) err = ib_cache_update(device, p, true, true, true); if (err) return err; + device->port_data[p].cache_is_initialized = 1; } return 0; diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 7a617e4..57b9039 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2057,6 +2057,15 @@ static int __ib_query_port(struct ib_device *device, IB_LINK_LAYER_INFINIBAND) return 0; + if (!ib_cache_is_initialised(device, port_num)) + goto query_gid_from_device; + + ib_get_cached_subnet_prefix(device, port_num, + &port_attr->subnet_prefix); + + return 0; + +query_gid_from_device: err = device->ops.query_gid(device, port_num, 0, &gid); if (err) return err; diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h index 226ae37..46b43a7 100644 --- a/include/rdma/ib_cache.h +++ b/include/rdma/ib_cache.h @@ -114,4 +114,9 @@ ssize_t rdma_query_gid_table(struct ib_device *device, struct ib_uverbs_gid_entry *entries, size_t max_entries); +static inline bool ib_cache_is_initialised(struct ib_device *device, + u32 port_num) +{ + return device->port_data[port_num].cache_is_initialized; +} #endif /* _IB_CACHE_H */ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index c96d601..405f7da 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2177,6 +2177,7 @@ struct ib_port_data { spinlock_t netdev_lock; + u8 cache_is_initialized:1; struct list_head pkey_list; struct ib_port_cache cache;