diff mbox series

[v4,for-next,3/3] IB/core: Obtain subnet_prefix from cache in IB devices

Message ID 20210616065213.987-4-anand.a.khoje@oracle.com (mailing list archive)
State Superseded
Headers show
Series IB/core: Obtaining subnet_prefix from cache in | expand

Commit Message

Anand Khoje June 16, 2021, 6:52 a.m. UTC
ib_query_port() calls device->ops.query_port() to get the port
attributes. The method of querying is device driver specific.
The same function calls device->ops.query_gid() to get the GID and
extract the subnet_prefix (gid_prefix).

The GID and subnet_prefix are stored in a cache. But they do not get
read from the cache if the device is an Infiniband device. The
following change takes advantage of the cached subnet_prefix.
Testing with RDBMS has shown a significant improvement in performance
with this change.

The function ib_cache_is_initialised() is introduced because
ib_query_port() gets called early in the stage when the cache is not
built while reading port immutable property.

In that case, the default GID still gets read from HCA for IB link-
layer devices.

In the situation of an event causing cache update, the subnet_prefix
will get retrieved from newly updated GID cache in ib_cache_update(),
so that we do not end up reading a stale value from cache via
ib_query_port().

Fixes: fad61ad ("IB/core: Add subnet prefix to port info")
Suggested-by: Leon Romanovsky <leonro@nvidia.com>
Suggested-by: Aru Kolappan <aru.kolappan@oracle.com>
Signed-off-by: Anand Khoje <anand.a.khoje@oracle.com>
Signed-off-by: Haakon Bugge <haakon.bugge@oracle.com>
---

v1 -> v2:
    -   Split the v1 patch in 3 patches as per Leon's suggestion.

v2 -> v3:
    -   Added changes as per Mark Zhang's suggestion of clearing
        flags in git_table_cleanup_one().
v3 -> v4:
    -   Removed the enum ib_port_data_flags and 8 byte flags from
        struct ib_port_data, and the set_bit()/clear_bit() API
        used to update this flag as that was not necessary.
        Done to keep the code simple.
    -   Added code to read subnet_prefix from updated GID cache in the
        event of cache update. Prior to this change, ib_cache_update
        was reading the value for subnet_prefix via ib_query_port(),
        due to this patch, we ended up reading a stale cached value of
        subnet_prefix.

---
 drivers/infiniband/core/cache.c  | 18 +++++++++++++++---
 drivers/infiniband/core/device.c |  9 +++++++++
 include/rdma/ib_cache.h          |  5 +++++
 include/rdma/ib_verbs.h          |  1 +
 4 files changed, 30 insertions(+), 3 deletions(-)

Comments

Leon Romanovsky June 16, 2021, 7:27 a.m. UTC | #1
On Wed, Jun 16, 2021 at 12:22:13PM +0530, Anand Khoje wrote:
> ib_query_port() calls device->ops.query_port() to get the port
> attributes. The method of querying is device driver specific.
> The same function calls device->ops.query_gid() to get the GID and
> extract the subnet_prefix (gid_prefix).
> 
> The GID and subnet_prefix are stored in a cache. But they do not get
> read from the cache if the device is an Infiniband device. The
> following change takes advantage of the cached subnet_prefix.
> Testing with RDBMS has shown a significant improvement in performance
> with this change.
> 
> The function ib_cache_is_initialised() is introduced because
> ib_query_port() gets called early in the stage when the cache is not
> built while reading port immutable property.
> 
> In that case, the default GID still gets read from HCA for IB link-
> layer devices.
> 
> In the situation of an event causing cache update, the subnet_prefix
> will get retrieved from newly updated GID cache in ib_cache_update(),
> so that we do not end up reading a stale value from cache via
> ib_query_port().
> 
> Fixes: fad61ad ("IB/core: Add subnet prefix to port info")
> Suggested-by: Leon Romanovsky <leonro@nvidia.com>
> Suggested-by: Aru Kolappan <aru.kolappan@oracle.com>
> Signed-off-by: Anand Khoje <anand.a.khoje@oracle.com>
> Signed-off-by: Haakon Bugge <haakon.bugge@oracle.com>
> ---
> 
> v1 -> v2:
>     -   Split the v1 patch in 3 patches as per Leon's suggestion.
> 
> v2 -> v3:
>     -   Added changes as per Mark Zhang's suggestion of clearing
>         flags in git_table_cleanup_one().
> v3 -> v4:
>     -   Removed the enum ib_port_data_flags and 8 byte flags from
>         struct ib_port_data, and the set_bit()/clear_bit() API
>         used to update this flag as that was not necessary.
>         Done to keep the code simple.
>     -   Added code to read subnet_prefix from updated GID cache in the
>         event of cache update. Prior to this change, ib_cache_update
>         was reading the value for subnet_prefix via ib_query_port(),
>         due to this patch, we ended up reading a stale cached value of
>         subnet_prefix.
> 
> ---
>  drivers/infiniband/core/cache.c  | 18 +++++++++++++++---
>  drivers/infiniband/core/device.c |  9 +++++++++
>  include/rdma/ib_cache.h          |  5 +++++
>  include/rdma/ib_verbs.h          |  1 +
>  4 files changed, 30 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
> index 2325171..cd99c46 100644
> --- a/drivers/infiniband/core/cache.c
> +++ b/drivers/infiniband/core/cache.c
> @@ -917,9 +917,11 @@ static void gid_table_cleanup_one(struct ib_device *ib_dev)
>  {
>  	u32 p;
>  
> -	rdma_for_each_port (ib_dev, p)
> +	rdma_for_each_port (ib_dev, p) {
> +		ib_dev->port_data[p].cache_is_initialized = 0;

I think that this line is not needed, we are removing device anyway and
and query_port is not allowed at this stage.

>  		cleanup_gid_table_port(ib_dev, p,
>  				       ib_dev->port_data[p].cache.gid);
> +	}
>  }
>  
>  static int gid_table_setup_one(struct ib_device *ib_dev)
> @@ -1466,6 +1468,7 @@ static int config_non_roce_gid_cache(struct ib_device *device,
>  	struct ib_port_attr       *tprops = NULL;
>  	struct ib_pkey_cache      *pkey_cache = NULL;
>  	struct ib_pkey_cache      *old_pkey_cache = NULL;
> +	union ib_gid               gid;
>  	int                        i;
>  	int                        ret;
>  
> @@ -1523,13 +1526,21 @@ static int config_non_roce_gid_cache(struct ib_device *device,
>  	device->port_data[port].cache.lmc = tprops->lmc;
>  	device->port_data[port].cache.port_state = tprops->state;
>  
> -	device->port_data[port].cache.subnet_prefix = tprops->subnet_prefix;
> +	ret = rdma_query_gid(device, port, 0, &gid);
> +	if (ret) {
> +		write_unlock_irq(&device->cache.lock);
> +		goto err;
> +	}
> +
> +	device->port_data[port].cache.subnet_prefix =
> +			be64_to_cpu(gid.global.subnet_prefix);
> +
>  	write_unlock_irq(&device->cache_lock);
>  
>  	if (enforce_security)
>  		ib_security_cache_change(device,
>  					 port,
> -					 tprops->subnet_prefix);
> +					 be64_to_cpu(gid.global.subnet_prefix));
>  
>  	kfree(old_pkey_cache);
>  	kfree(tprops);
> @@ -1629,6 +1640,7 @@ int ib_cache_setup_one(struct ib_device *device)
>  		err = ib_cache_update(device, p, true, true, true);
>  		if (err)
>  			return err;
> +		device->port_data[p].cache_is_initialized = 1;
>  	}
>  
>  	return 0;
> diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
> index 7a617e4..57b9039 100644
> --- a/drivers/infiniband/core/device.c
> +++ b/drivers/infiniband/core/device.c
> @@ -2057,6 +2057,15 @@ static int __ib_query_port(struct ib_device *device,
>  	    IB_LINK_LAYER_INFINIBAND)
>  		return 0;
>  
> +	if (!ib_cache_is_initialised(device, port_num))
> +		goto query_gid_from_device;

IMHO, we don't need this new function and can access ib_port_data
directly. In device.c, we have plenty of places that does it.

Not critical.

> +
> +	ib_get_cached_subnet_prefix(device, port_num,
> +				    &port_attr->subnet_prefix);
> +
> +	return 0;
> +
> +query_gid_from_device:
>  	err = device->ops.query_gid(device, port_num, 0, &gid);
>  	if (err)
>  		return err;
> diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h
> index 226ae37..46b43a7 100644
> --- a/include/rdma/ib_cache.h
> +++ b/include/rdma/ib_cache.h
> @@ -114,4 +114,9 @@ ssize_t rdma_query_gid_table(struct ib_device *device,
>  			     struct ib_uverbs_gid_entry *entries,
>  			     size_t max_entries);
>  
> +static inline bool ib_cache_is_initialised(struct ib_device *device,
> +					u32 port_num)
> +{
> +	return device->port_data[port_num].cache_is_initialized;
> +}
>  #endif /* _IB_CACHE_H */
> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
> index c96d601..405f7da 100644
> --- a/include/rdma/ib_verbs.h
> +++ b/include/rdma/ib_verbs.h
> @@ -2177,6 +2177,7 @@ struct ib_port_data {
>  
>  	spinlock_t netdev_lock;
>  
> +	u8 cache_is_initialized:1;
>  	struct list_head pkey_list;
>  
>  	struct ib_port_cache cache;
> -- 
> 1.8.3.1
>
Leon Romanovsky June 16, 2021, 7:30 a.m. UTC | #2
On Wed, Jun 16, 2021 at 12:22:13PM +0530, Anand Khoje wrote:
> ib_query_port() calls device->ops.query_port() to get the port
> attributes. The method of querying is device driver specific.
> The same function calls device->ops.query_gid() to get the GID and
> extract the subnet_prefix (gid_prefix).
> 
> The GID and subnet_prefix are stored in a cache. But they do not get
> read from the cache if the device is an Infiniband device. The
> following change takes advantage of the cached subnet_prefix.
> Testing with RDBMS has shown a significant improvement in performance
> with this change.
> 
> The function ib_cache_is_initialised() is introduced because
> ib_query_port() gets called early in the stage when the cache is not
> built while reading port immutable property.
> 
> In that case, the default GID still gets read from HCA for IB link-
> layer devices.
> 
> In the situation of an event causing cache update, the subnet_prefix
> will get retrieved from newly updated GID cache in ib_cache_update(),
> so that we do not end up reading a stale value from cache via
> ib_query_port().
> 
> Fixes: fad61ad ("IB/core: Add subnet prefix to port info")
> Suggested-by: Leon Romanovsky <leonro@nvidia.com>
> Suggested-by: Aru Kolappan <aru.kolappan@oracle.com>
> Signed-off-by: Anand Khoje <anand.a.khoje@oracle.com>
> Signed-off-by: Haakon Bugge <haakon.bugge@oracle.com>
> ---

<...>

> @@ -1523,13 +1526,21 @@ static int config_non_roce_gid_cache(struct ib_device *device,
>  	device->port_data[port].cache.lmc = tprops->lmc;
>  	device->port_data[port].cache.port_state = tprops->state;
>  
> -	device->port_data[port].cache.subnet_prefix = tprops->subnet_prefix;
> +	ret = rdma_query_gid(device, port, 0, &gid);
> +	if (ret) {
> +		write_unlock_irq(&device->cache.lock);

And this patch can't compile. It should be cache_lock and not cache.lock.

Thanks
Anand Khoje June 16, 2021, 7:42 a.m. UTC | #3
On 6/16/2021 12:57 PM, Leon Romanovsky wrote:
> On Wed, Jun 16, 2021 at 12:22:13PM +0530, Anand Khoje wrote:
>> ib_query_port() calls device->ops.query_port() to get the port
>> attributes. The method of querying is device driver specific.
>> The same function calls device->ops.query_gid() to get the GID and
>> extract the subnet_prefix (gid_prefix).
>>
>> The GID and subnet_prefix are stored in a cache. But they do not get
>> read from the cache if the device is an Infiniband device. The
>> following change takes advantage of the cached subnet_prefix.
>> Testing with RDBMS has shown a significant improvement in performance
>> with this change.
>>
>> The function ib_cache_is_initialised() is introduced because
>> ib_query_port() gets called early in the stage when the cache is not
>> built while reading port immutable property.
>>
>> In that case, the default GID still gets read from HCA for IB link-
>> layer devices.
>>
>> In the situation of an event causing cache update, the subnet_prefix
>> will get retrieved from newly updated GID cache in ib_cache_update(),
>> so that we do not end up reading a stale value from cache via
>> ib_query_port().
>>
>> Fixes: fad61ad ("IB/core: Add subnet prefix to port info")
>> Suggested-by: Leon Romanovsky <leonro@nvidia.com>
>> Suggested-by: Aru Kolappan <aru.kolappan@oracle.com>
>> Signed-off-by: Anand Khoje <anand.a.khoje@oracle.com>
>> Signed-off-by: Haakon Bugge <haakon.bugge@oracle.com>
>> ---
>>
>> v1 -> v2:
>>      -   Split the v1 patch in 3 patches as per Leon's suggestion.
>>
>> v2 -> v3:
>>      -   Added changes as per Mark Zhang's suggestion of clearing
>>          flags in git_table_cleanup_one().
>> v3 -> v4:
>>      -   Removed the enum ib_port_data_flags and 8 byte flags from
>>          struct ib_port_data, and the set_bit()/clear_bit() API
>>          used to update this flag as that was not necessary.
>>          Done to keep the code simple.
>>      -   Added code to read subnet_prefix from updated GID cache in the
>>          event of cache update. Prior to this change, ib_cache_update
>>          was reading the value for subnet_prefix via ib_query_port(),
>>          due to this patch, we ended up reading a stale cached value of
>>          subnet_prefix.
>>
>> ---
>>   drivers/infiniband/core/cache.c  | 18 +++++++++++++++---
>>   drivers/infiniband/core/device.c |  9 +++++++++
>>   include/rdma/ib_cache.h          |  5 +++++
>>   include/rdma/ib_verbs.h          |  1 +
>>   4 files changed, 30 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
>> index 2325171..cd99c46 100644
>> --- a/drivers/infiniband/core/cache.c
>> +++ b/drivers/infiniband/core/cache.c
>> @@ -917,9 +917,11 @@ static void gid_table_cleanup_one(struct ib_device *ib_dev)
>>   {
>>   	u32 p;
>>   
>> -	rdma_for_each_port (ib_dev, p)
>> +	rdma_for_each_port (ib_dev, p) {
>> +		ib_dev->port_data[p].cache_is_initialized = 0;
> 
> I think that this line is not needed, we are removing device anyway and
> and query_port is not allowed at this stage.
> 
We have kept this for code completeness purposes. Just as we did with 
set_bit() and clear_bit() APIs.

>>   		cleanup_gid_table_port(ib_dev, p,
>>   				       ib_dev->port_data[p].cache.gid);
>> +	}
>>   }
>>   
>>   static int gid_table_setup_one(struct ib_device *ib_dev)
>> @@ -1466,6 +1468,7 @@ static int config_non_roce_gid_cache(struct ib_device *device,
>>   	struct ib_port_attr       *tprops = NULL;
>>   	struct ib_pkey_cache      *pkey_cache = NULL;
>>   	struct ib_pkey_cache      *old_pkey_cache = NULL;
>> +	union ib_gid               gid;
>>   	int                        i;
>>   	int                        ret;
>>   
>> @@ -1523,13 +1526,21 @@ static int config_non_roce_gid_cache(struct ib_device *device,
>>   	device->port_data[port].cache.lmc = tprops->lmc;
>>   	device->port_data[port].cache.port_state = tprops->state;
>>   
>> -	device->port_data[port].cache.subnet_prefix = tprops->subnet_prefix;
>> +	ret = rdma_query_gid(device, port, 0, &gid);
>> +	if (ret) {
>> +		write_unlock_irq(&device->cache.lock);
>> +		goto err;
>> +	}
>> +
>> +	device->port_data[port].cache.subnet_prefix =
>> +			be64_to_cpu(gid.global.subnet_prefix);
>> +
>>   	write_unlock_irq(&device->cache_lock);
>>   
>>   	if (enforce_security)
>>   		ib_security_cache_change(device,
>>   					 port,
>> -					 tprops->subnet_prefix);
>> +					 be64_to_cpu(gid.global.subnet_prefix));
>>   
>>   	kfree(old_pkey_cache);
>>   	kfree(tprops);
>> @@ -1629,6 +1640,7 @@ int ib_cache_setup_one(struct ib_device *device)
>>   		err = ib_cache_update(device, p, true, true, true);
>>   		if (err)
>>   			return err;
>> +		device->port_data[p].cache_is_initialized = 1;
>>   	}
>>   
>>   	return 0;
>> diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
>> index 7a617e4..57b9039 100644
>> --- a/drivers/infiniband/core/device.c
>> +++ b/drivers/infiniband/core/device.c
>> @@ -2057,6 +2057,15 @@ static int __ib_query_port(struct ib_device *device,
>>   	    IB_LINK_LAYER_INFINIBAND)
>>   		return 0;
>>   
>> +	if (!ib_cache_is_initialised(device, port_num))
>> +		goto query_gid_from_device;
> 
> IMHO, we don't need this new function and can access ib_port_data
> directly. In device.c, we have plenty of places that does it.
> 
> Not critical.
> 
Added this function to have a way to check validity of cache, such that 
it could be used in future for the same check in areas to which 
ib_port_data is opaque.

>> +
>> +	ib_get_cached_subnet_prefix(device, port_num,
>> +				    &port_attr->subnet_prefix);
>> +
>> +	return 0;
>> +
>> +query_gid_from_device:
>>   	err = device->ops.query_gid(device, port_num, 0, &gid);
>>   	if (err)
>>   		return err;
>> diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h
>> index 226ae37..46b43a7 100644
>> --- a/include/rdma/ib_cache.h
>> +++ b/include/rdma/ib_cache.h
>> @@ -114,4 +114,9 @@ ssize_t rdma_query_gid_table(struct ib_device *device,
>>   			     struct ib_uverbs_gid_entry *entries,
>>   			     size_t max_entries);
>>   
>> +static inline bool ib_cache_is_initialised(struct ib_device *device,
>> +					u32 port_num)
>> +{
>> +	return device->port_data[port_num].cache_is_initialized;
>> +}
>>   #endif /* _IB_CACHE_H */
>> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
>> index c96d601..405f7da 100644
>> --- a/include/rdma/ib_verbs.h
>> +++ b/include/rdma/ib_verbs.h
>> @@ -2177,6 +2177,7 @@ struct ib_port_data {
>>   
>>   	spinlock_t netdev_lock;
>>   
>> +	u8 cache_is_initialized:1;
>>   	struct list_head pkey_list;
>>   
>>   	struct ib_port_cache cache;
>> -- 
>> 1.8.3.1
>>
Anand Khoje June 16, 2021, 7:43 a.m. UTC | #4
On 6/16/2021 1:00 PM, Leon Romanovsky wrote:
> On Wed, Jun 16, 2021 at 12:22:13PM +0530, Anand Khoje wrote:
>> ib_query_port() calls device->ops.query_port() to get the port
>> attributes. The method of querying is device driver specific.
>> The same function calls device->ops.query_gid() to get the GID and
>> extract the subnet_prefix (gid_prefix).
>>
>> The GID and subnet_prefix are stored in a cache. But they do not get
>> read from the cache if the device is an Infiniband device. The
>> following change takes advantage of the cached subnet_prefix.
>> Testing with RDBMS has shown a significant improvement in performance
>> with this change.
>>
>> The function ib_cache_is_initialised() is introduced because
>> ib_query_port() gets called early in the stage when the cache is not
>> built while reading port immutable property.
>>
>> In that case, the default GID still gets read from HCA for IB link-
>> layer devices.
>>
>> In the situation of an event causing cache update, the subnet_prefix
>> will get retrieved from newly updated GID cache in ib_cache_update(),
>> so that we do not end up reading a stale value from cache via
>> ib_query_port().
>>
>> Fixes: fad61ad ("IB/core: Add subnet prefix to port info")
>> Suggested-by: Leon Romanovsky <leonro@nvidia.com>
>> Suggested-by: Aru Kolappan <aru.kolappan@oracle.com>
>> Signed-off-by: Anand Khoje <anand.a.khoje@oracle.com>
>> Signed-off-by: Haakon Bugge <haakon.bugge@oracle.com>
>> ---
> 
> <...>
> 
>> @@ -1523,13 +1526,21 @@ static int config_non_roce_gid_cache(struct ib_device *device,
>>   	device->port_data[port].cache.lmc = tprops->lmc;
>>   	device->port_data[port].cache.port_state = tprops->state;
>>   
>> -	device->port_data[port].cache.subnet_prefix = tprops->subnet_prefix;
>> +	ret = rdma_query_gid(device, port, 0, &gid);
>> +	if (ret) {
>> +		write_unlock_irq(&device->cache.lock);
> 
> And this patch can't compile. It should be cache_lock and not cache.lock.
> 
> Thanks
> 
Unfortunate typo from my end. Thanks for pointing this out. I will share 
the updated patch.
Anand Khoje June 16, 2021, 8:05 a.m. UTC | #5
On 6/16/2021 1:12 PM, Anand Khoje wrote:
> On 6/16/2021 12:57 PM, Leon Romanovsky wrote:
>> On Wed, Jun 16, 2021 at 12:22:13PM +0530, Anand Khoje wrote:
>>> ib_query_port() calls device->ops.query_port() to get the port
>>> attributes. The method of querying is device driver specific.
>>> The same function calls device->ops.query_gid() to get the GID and
>>> extract the subnet_prefix (gid_prefix).
>>>
>>> The GID and subnet_prefix are stored in a cache. But they do not get
>>> read from the cache if the device is an Infiniband device. The
>>> following change takes advantage of the cached subnet_prefix.
>>> Testing with RDBMS has shown a significant improvement in performance
>>> with this change.
>>>
>>> The function ib_cache_is_initialised() is introduced because
>>> ib_query_port() gets called early in the stage when the cache is not
>>> built while reading port immutable property.
>>>
>>> In that case, the default GID still gets read from HCA for IB link-
>>> layer devices.
>>>
>>> In the situation of an event causing cache update, the subnet_prefix
>>> will get retrieved from newly updated GID cache in ib_cache_update(),
>>> so that we do not end up reading a stale value from cache via
>>> ib_query_port().
>>>
>>> Fixes: fad61ad ("IB/core: Add subnet prefix to port info")
>>> Suggested-by: Leon Romanovsky <leonro@nvidia.com>
>>> Suggested-by: Aru Kolappan <aru.kolappan@oracle.com>
>>> Signed-off-by: Anand Khoje <anand.a.khoje@oracle.com>
>>> Signed-off-by: Haakon Bugge <haakon.bugge@oracle.com>
>>> ---
>>>
>>> v1 -> v2:
>>>      -   Split the v1 patch in 3 patches as per Leon's suggestion.
>>>
>>> v2 -> v3:
>>>      -   Added changes as per Mark Zhang's suggestion of clearing
>>>          flags in git_table_cleanup_one().
>>> v3 -> v4:
>>>      -   Removed the enum ib_port_data_flags and 8 byte flags from
>>>          struct ib_port_data, and the set_bit()/clear_bit() API
>>>          used to update this flag as that was not necessary.
>>>          Done to keep the code simple.
>>>      -   Added code to read subnet_prefix from updated GID cache in the
>>>          event of cache update. Prior to this change, ib_cache_update
>>>          was reading the value for subnet_prefix via ib_query_port(),
>>>          due to this patch, we ended up reading a stale cached value of
>>>          subnet_prefix.
>>>
>>> ---
>>>   drivers/infiniband/core/cache.c  | 18 +++++++++++++++---
>>>   drivers/infiniband/core/device.c |  9 +++++++++
>>>   include/rdma/ib_cache.h          |  5 +++++
>>>   include/rdma/ib_verbs.h          |  1 +
>>>   4 files changed, 30 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/drivers/infiniband/core/cache.c 
>>> b/drivers/infiniband/core/cache.c
>>> index 2325171..cd99c46 100644
>>> --- a/drivers/infiniband/core/cache.c
>>> +++ b/drivers/infiniband/core/cache.c
>>> @@ -917,9 +917,11 @@ static void gid_table_cleanup_one(struct 
>>> ib_device *ib_dev)
>>>   {
>>>       u32 p;
>>> -    rdma_for_each_port (ib_dev, p)
>>> +    rdma_for_each_port (ib_dev, p) {
>>> +        ib_dev->port_data[p].cache_is_initialized = 0;
>>
>> I think that this line is not needed, we are removing device anyway and
>> and query_port is not allowed at this stage.
>>
> We have kept this for code completeness purposes. Just as we did with 
> set_bit() and clear_bit() APIs.
> 
>>>           cleanup_gid_table_port(ib_dev, p,
>>>                          ib_dev->port_data[p].cache.gid);
>>> +    }
>>>   }
>>>   static int gid_table_setup_one(struct ib_device *ib_dev)
>>> @@ -1466,6 +1468,7 @@ static int config_non_roce_gid_cache(struct 
>>> ib_device *device,
>>>       struct ib_port_attr       *tprops = NULL;
>>>       struct ib_pkey_cache      *pkey_cache = NULL;
>>>       struct ib_pkey_cache      *old_pkey_cache = NULL;
>>> +    union ib_gid               gid;
>>>       int                        i;
>>>       int                        ret;
>>> @@ -1523,13 +1526,21 @@ static int config_non_roce_gid_cache(struct 
>>> ib_device *device,
>>>       device->port_data[port].cache.lmc = tprops->lmc;
>>>       device->port_data[port].cache.port_state = tprops->state;
>>> -    device->port_data[port].cache.subnet_prefix = 
>>> tprops->subnet_prefix;
>>> +    ret = rdma_query_gid(device, port, 0, &gid);
>>> +    if (ret) {
>>> +        write_unlock_irq(&device->cache.lock);
>>> +        goto err;
>>> +    }
>>> +
>>> +    device->port_data[port].cache.subnet_prefix =
>>> +            be64_to_cpu(gid.global.subnet_prefix);
>>> +
>>>       write_unlock_irq(&device->cache_lock);
>>>       if (enforce_security)
>>>           ib_security_cache_change(device,
>>>                        port,
>>> -                     tprops->subnet_prefix);
>>> +                     be64_to_cpu(gid.global.subnet_prefix));
>>>       kfree(old_pkey_cache);
>>>       kfree(tprops);
>>> @@ -1629,6 +1640,7 @@ int ib_cache_setup_one(struct ib_device *device)
>>>           err = ib_cache_update(device, p, true, true, true);
>>>           if (err)
>>>               return err;
>>> +        device->port_data[p].cache_is_initialized = 1;
>>>       }
>>>       return 0;
>>> diff --git a/drivers/infiniband/core/device.c 
>>> b/drivers/infiniband/core/device.c
>>> index 7a617e4..57b9039 100644
>>> --- a/drivers/infiniband/core/device.c
>>> +++ b/drivers/infiniband/core/device.c
>>> @@ -2057,6 +2057,15 @@ static int __ib_query_port(struct ib_device 
>>> *device,
>>>           IB_LINK_LAYER_INFINIBAND)
>>>           return 0;
>>> +    if (!ib_cache_is_initialised(device, port_num))
>>> +        goto query_gid_from_device;
>>
>> IMHO, we don't need this new function and can access ib_port_data
>> directly. In device.c, we have plenty of places that does it.
>>
>> Not critical.
>>
> Added this function to have a way to check validity of cache, such that 
> it could be used in future for the same check in areas to which 
> ib_port_data is opaque.
> 
Also, there was a review comment from Mark Zhang for patch version 2, 
where he had suggested to shift clear_bit(IB_PORT_CACHE_INITIALIZED, 
&device->port_data[p].flags);
to gid_table_cleanup_one().

Thanks,
Anand
>>> +
>>> +    ib_get_cached_subnet_prefix(device, port_num,
>>> +                    &port_attr->subnet_prefix);
>>> +
>>> +    return 0;
>>> +
>>> +query_gid_from_device:
>>>       err = device->ops.query_gid(device, port_num, 0, &gid);
>>>       if (err)
>>>           return err;
>>> diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h
>>> index 226ae37..46b43a7 100644
>>> --- a/include/rdma/ib_cache.h
>>> +++ b/include/rdma/ib_cache.h
>>> @@ -114,4 +114,9 @@ ssize_t rdma_query_gid_table(struct ib_device 
>>> *device,
>>>                    struct ib_uverbs_gid_entry *entries,
>>>                    size_t max_entries);
>>> +static inline bool ib_cache_is_initialised(struct ib_device *device,
>>> +                    u32 port_num)
>>> +{
>>> +    return device->port_data[port_num].cache_is_initialized;
>>> +}
>>>   #endif /* _IB_CACHE_H */
>>> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
>>> index c96d601..405f7da 100644
>>> --- a/include/rdma/ib_verbs.h
>>> +++ b/include/rdma/ib_verbs.h
>>> @@ -2177,6 +2177,7 @@ struct ib_port_data {
>>>       spinlock_t netdev_lock;
>>> +    u8 cache_is_initialized:1;
>>>       struct list_head pkey_list;
>>>       struct ib_port_cache cache;
>>> -- 
>>> 1.8.3.1
>>>
>
Leon Romanovsky June 16, 2021, 8:41 a.m. UTC | #6
On Wed, Jun 16, 2021 at 01:12:51PM +0530, Anand Khoje wrote:
> On 6/16/2021 12:57 PM, Leon Romanovsky wrote:
> > On Wed, Jun 16, 2021 at 12:22:13PM +0530, Anand Khoje wrote:
> > > ib_query_port() calls device->ops.query_port() to get the port
> > > attributes. The method of querying is device driver specific.
> > > The same function calls device->ops.query_gid() to get the GID and
> > > extract the subnet_prefix (gid_prefix).
> > > 
> > > The GID and subnet_prefix are stored in a cache. But they do not get
> > > read from the cache if the device is an Infiniband device. The
> > > following change takes advantage of the cached subnet_prefix.
> > > Testing with RDBMS has shown a significant improvement in performance
> > > with this change.
> > > 
> > > The function ib_cache_is_initialised() is introduced because
> > > ib_query_port() gets called early in the stage when the cache is not
> > > built while reading port immutable property.
> > > 
> > > In that case, the default GID still gets read from HCA for IB link-
> > > layer devices.
> > > 
> > > In the situation of an event causing cache update, the subnet_prefix
> > > will get retrieved from newly updated GID cache in ib_cache_update(),
> > > so that we do not end up reading a stale value from cache via
> > > ib_query_port().
> > > 
> > > Fixes: fad61ad ("IB/core: Add subnet prefix to port info")
> > > Suggested-by: Leon Romanovsky <leonro@nvidia.com>
> > > Suggested-by: Aru Kolappan <aru.kolappan@oracle.com>
> > > Signed-off-by: Anand Khoje <anand.a.khoje@oracle.com>
> > > Signed-off-by: Haakon Bugge <haakon.bugge@oracle.com>
> > > ---
> > > 
> > > v1 -> v2:
> > >      -   Split the v1 patch in 3 patches as per Leon's suggestion.
> > > 
> > > v2 -> v3:
> > >      -   Added changes as per Mark Zhang's suggestion of clearing
> > >          flags in git_table_cleanup_one().
> > > v3 -> v4:
> > >      -   Removed the enum ib_port_data_flags and 8 byte flags from
> > >          struct ib_port_data, and the set_bit()/clear_bit() API
> > >          used to update this flag as that was not necessary.
> > >          Done to keep the code simple.
> > >      -   Added code to read subnet_prefix from updated GID cache in the
> > >          event of cache update. Prior to this change, ib_cache_update
> > >          was reading the value for subnet_prefix via ib_query_port(),
> > >          due to this patch, we ended up reading a stale cached value of
> > >          subnet_prefix.
> > > 
> > > ---
> > >   drivers/infiniband/core/cache.c  | 18 +++++++++++++++---
> > >   drivers/infiniband/core/device.c |  9 +++++++++
> > >   include/rdma/ib_cache.h          |  5 +++++
> > >   include/rdma/ib_verbs.h          |  1 +
> > >   4 files changed, 30 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
> > > index 2325171..cd99c46 100644
> > > --- a/drivers/infiniband/core/cache.c
> > > +++ b/drivers/infiniband/core/cache.c
> > > @@ -917,9 +917,11 @@ static void gid_table_cleanup_one(struct ib_device *ib_dev)
> > >   {
> > >   	u32 p;
> > > -	rdma_for_each_port (ib_dev, p)
> > > +	rdma_for_each_port (ib_dev, p) {
> > > +		ib_dev->port_data[p].cache_is_initialized = 0;
> > 
> > I think that this line is not needed, we are removing device anyway and
> > and query_port is not allowed at this stage.
> > 
> We have kept this for code completeness purposes. Just as we did with
> set_bit() and clear_bit() APIs.

You are not using *_bit() API now, so let's not clear here.
It is not completeness, but misleading. It gives false assumption
that cache_is_initialized is used later in the code.

> 
> > >   		cleanup_gid_table_port(ib_dev, p,
> > >   				       ib_dev->port_data[p].cache.gid);
> > > +	}
> > >   }
> > >   static int gid_table_setup_one(struct ib_device *ib_dev)
> > > @@ -1466,6 +1468,7 @@ static int config_non_roce_gid_cache(struct ib_device *device,
> > >   	struct ib_port_attr       *tprops = NULL;
> > >   	struct ib_pkey_cache      *pkey_cache = NULL;
> > >   	struct ib_pkey_cache      *old_pkey_cache = NULL;
> > > +	union ib_gid               gid;
> > >   	int                        i;
> > >   	int                        ret;
> > > @@ -1523,13 +1526,21 @@ static int config_non_roce_gid_cache(struct ib_device *device,
> > >   	device->port_data[port].cache.lmc = tprops->lmc;
> > >   	device->port_data[port].cache.port_state = tprops->state;
> > > -	device->port_data[port].cache.subnet_prefix = tprops->subnet_prefix;
> > > +	ret = rdma_query_gid(device, port, 0, &gid);
> > > +	if (ret) {
> > > +		write_unlock_irq(&device->cache.lock);
> > > +		goto err;
> > > +	}
> > > +
> > > +	device->port_data[port].cache.subnet_prefix =
> > > +			be64_to_cpu(gid.global.subnet_prefix);
> > > +
> > >   	write_unlock_irq(&device->cache_lock);
> > >   	if (enforce_security)
> > >   		ib_security_cache_change(device,
> > >   					 port,
> > > -					 tprops->subnet_prefix);
> > > +					 be64_to_cpu(gid.global.subnet_prefix));
> > >   	kfree(old_pkey_cache);
> > >   	kfree(tprops);
> > > @@ -1629,6 +1640,7 @@ int ib_cache_setup_one(struct ib_device *device)
> > >   		err = ib_cache_update(device, p, true, true, true);
> > >   		if (err)
> > >   			return err;
> > > +		device->port_data[p].cache_is_initialized = 1;
> > >   	}
> > >   	return 0;
> > > diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
> > > index 7a617e4..57b9039 100644
> > > --- a/drivers/infiniband/core/device.c
> > > +++ b/drivers/infiniband/core/device.c
> > > @@ -2057,6 +2057,15 @@ static int __ib_query_port(struct ib_device *device,
> > >   	    IB_LINK_LAYER_INFINIBAND)
> > >   		return 0;
> > > +	if (!ib_cache_is_initialised(device, port_num))
> > > +		goto query_gid_from_device;
> > 
> > IMHO, we don't need this new function and can access ib_port_data
> > directly. In device.c, we have plenty of places that does it.
> > 
> > Not critical.
> > 
> Added this function to have a way to check validity of cache, such that it
> could be used in future for the same check in areas to which ib_port_data is
> opaque.


It is ok, just call directly if (!device->port_data[port_num].cache_is_initialized).

Thanks
kernel test robot June 17, 2021, 10:05 a.m. UTC | #7
Hi Anand,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on rdma/for-next]
[also build test ERROR on next-20210616]
[cannot apply to linux/master linus/master v5.13-rc6]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Anand-Khoje/IB-core-Obtaining-subnet_prefix-from-cache-in/20210617-102611
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git for-next
config: powerpc64-randconfig-r024-20210617 (attached as .config)
compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project 64720f57bea6a6bf033feef4a5751ab9c0c3b401)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install powerpc64 cross compiling tool for clang build
        # apt-get install binutils-powerpc64-linux-gnu
        # https://github.com/0day-ci/linux/commit/495253a987df586d8c5f4c525999cdf39c5823f0
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Anand-Khoje/IB-core-Obtaining-subnet_prefix-from-cache-in/20210617-102611
        git checkout 495253a987df586d8c5f4c525999cdf39c5823f0
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=powerpc64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from drivers/infiniband/core/cache.c:36:
   In file included from include/linux/module.h:12:
   In file included from include/linux/list.h:9:
   In file included from include/linux/kernel.h:12:
   In file included from include/linux/bitops.h:32:
   In file included from arch/powerpc/include/asm/bitops.h:62:
   arch/powerpc/include/asm/barrier.h:49:9: warning: '__lwsync' macro redefined [-Wmacro-redefined]
   #define __lwsync()      __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory")
           ^
   <built-in>:308:9: note: previous definition is here
   #define __lwsync __builtin_ppc_lwsync
           ^
>> drivers/infiniband/core/cache.c:1531:29: error: no member named 'cache' in 'struct ib_device'
                   write_unlock_irq(&device->cache.lock);
                                     ~~~~~~  ^
   include/linux/rwlock.h:108:55: note: expanded from macro 'write_unlock_irq'
   #define write_unlock_irq(lock)          _raw_write_unlock_irq(lock)
                                                                 ^~~~
   1 warning and 1 error generated.


vim +1531 drivers/infiniband/core/cache.c

  1463	
  1464	static int
  1465	ib_cache_update(struct ib_device *device, u32 port, bool update_gids,
  1466			bool update_pkeys, bool enforce_security)
  1467	{
  1468		struct ib_port_attr       *tprops = NULL;
  1469		struct ib_pkey_cache      *pkey_cache = NULL;
  1470		struct ib_pkey_cache      *old_pkey_cache = NULL;
  1471		union ib_gid               gid;
  1472		int                        i;
  1473		int                        ret;
  1474	
  1475		if (!rdma_is_port_valid(device, port))
  1476			return -EINVAL;
  1477	
  1478		tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
  1479		if (!tprops)
  1480			return -ENOMEM;
  1481	
  1482		ret = ib_query_port(device, port, tprops);
  1483		if (ret) {
  1484			dev_warn(&device->dev, "ib_query_port failed (%d)\n", ret);
  1485			goto err;
  1486		}
  1487	
  1488		if (!rdma_protocol_roce(device, port) && update_gids) {
  1489			ret = config_non_roce_gid_cache(device, port,
  1490							tprops->gid_tbl_len);
  1491			if (ret)
  1492				goto err;
  1493		}
  1494	
  1495		update_pkeys &= !!tprops->pkey_tbl_len;
  1496	
  1497		if (update_pkeys) {
  1498			pkey_cache = kmalloc(struct_size(pkey_cache, table,
  1499							 tprops->pkey_tbl_len),
  1500					     GFP_KERNEL);
  1501			if (!pkey_cache) {
  1502				ret = -ENOMEM;
  1503				goto err;
  1504			}
  1505	
  1506			pkey_cache->table_len = tprops->pkey_tbl_len;
  1507	
  1508			for (i = 0; i < pkey_cache->table_len; ++i) {
  1509				ret = ib_query_pkey(device, port, i,
  1510						    pkey_cache->table + i);
  1511				if (ret) {
  1512					dev_warn(&device->dev,
  1513						 "ib_query_pkey failed (%d) for index %d\n",
  1514						 ret, i);
  1515					goto err;
  1516				}
  1517			}
  1518		}
  1519	
  1520		write_lock_irq(&device->cache_lock);
  1521	
  1522		if (update_pkeys) {
  1523			old_pkey_cache = device->port_data[port].cache.pkey;
  1524			device->port_data[port].cache.pkey = pkey_cache;
  1525		}
  1526		device->port_data[port].cache.lmc = tprops->lmc;
  1527		device->port_data[port].cache.port_state = tprops->state;
  1528	
  1529		ret = rdma_query_gid(device, port, 0, &gid);
  1530		if (ret) {
> 1531			write_unlock_irq(&device->cache.lock);
  1532			goto err;
  1533		}
  1534	
  1535		device->port_data[port].cache.subnet_prefix =
  1536				be64_to_cpu(gid.global.subnet_prefix);
  1537	
  1538		write_unlock_irq(&device->cache_lock);
  1539	
  1540		if (enforce_security)
  1541			ib_security_cache_change(device,
  1542						 port,
  1543						 be64_to_cpu(gid.global.subnet_prefix));
  1544	
  1545		kfree(old_pkey_cache);
  1546		kfree(tprops);
  1547		return 0;
  1548	
  1549	err:
  1550		kfree(pkey_cache);
  1551		kfree(tprops);
  1552		return ret;
  1553	}
  1554	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
kernel test robot June 17, 2021, 10:11 a.m. UTC | #8
Hi Anand,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on rdma/for-next]
[also build test ERROR on next-20210616]
[cannot apply to linux/master linus/master v5.13-rc6]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Anand-Khoje/IB-core-Obtaining-subnet_prefix-from-cache-in/20210617-102611
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git for-next
config: powerpc64-randconfig-r012-20210617 (attached as .config)
compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project 64720f57bea6a6bf033feef4a5751ab9c0c3b401)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install powerpc64 cross compiling tool for clang build
        # apt-get install binutils-powerpc64-linux-gnu
        # https://github.com/0day-ci/linux/commit/495253a987df586d8c5f4c525999cdf39c5823f0
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Anand-Khoje/IB-core-Obtaining-subnet_prefix-from-cache-in/20210617-102611
        git checkout 495253a987df586d8c5f4c525999cdf39c5823f0
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=powerpc64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from drivers/infiniband/core/cache.c:36:
   In file included from include/linux/module.h:12:
   In file included from include/linux/list.h:9:
   In file included from include/linux/kernel.h:12:
   In file included from include/linux/bitops.h:32:
   In file included from arch/powerpc/include/asm/bitops.h:62:
   arch/powerpc/include/asm/barrier.h:49:9: warning: '__lwsync' macro redefined [-Wmacro-redefined]
   #define __lwsync()      __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory")
           ^
   <built-in>:310:9: note: previous definition is here
   #define __lwsync __builtin_ppc_lwsync
           ^
>> drivers/infiniband/core/cache.c:1531:29: error: no member named 'cache' in 'struct ib_device'
                   write_unlock_irq(&device->cache.lock);
                                     ~~~~~~  ^
   include/linux/rwlock.h:108:55: note: expanded from macro 'write_unlock_irq'
   #define write_unlock_irq(lock)          _raw_write_unlock_irq(lock)
                                                                 ^~~~
   1 warning and 1 error generated.


vim +1531 drivers/infiniband/core/cache.c

  1463	
  1464	static int
  1465	ib_cache_update(struct ib_device *device, u32 port, bool update_gids,
  1466			bool update_pkeys, bool enforce_security)
  1467	{
  1468		struct ib_port_attr       *tprops = NULL;
  1469		struct ib_pkey_cache      *pkey_cache = NULL;
  1470		struct ib_pkey_cache      *old_pkey_cache = NULL;
  1471		union ib_gid               gid;
  1472		int                        i;
  1473		int                        ret;
  1474	
  1475		if (!rdma_is_port_valid(device, port))
  1476			return -EINVAL;
  1477	
  1478		tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
  1479		if (!tprops)
  1480			return -ENOMEM;
  1481	
  1482		ret = ib_query_port(device, port, tprops);
  1483		if (ret) {
  1484			dev_warn(&device->dev, "ib_query_port failed (%d)\n", ret);
  1485			goto err;
  1486		}
  1487	
  1488		if (!rdma_protocol_roce(device, port) && update_gids) {
  1489			ret = config_non_roce_gid_cache(device, port,
  1490							tprops->gid_tbl_len);
  1491			if (ret)
  1492				goto err;
  1493		}
  1494	
  1495		update_pkeys &= !!tprops->pkey_tbl_len;
  1496	
  1497		if (update_pkeys) {
  1498			pkey_cache = kmalloc(struct_size(pkey_cache, table,
  1499							 tprops->pkey_tbl_len),
  1500					     GFP_KERNEL);
  1501			if (!pkey_cache) {
  1502				ret = -ENOMEM;
  1503				goto err;
  1504			}
  1505	
  1506			pkey_cache->table_len = tprops->pkey_tbl_len;
  1507	
  1508			for (i = 0; i < pkey_cache->table_len; ++i) {
  1509				ret = ib_query_pkey(device, port, i,
  1510						    pkey_cache->table + i);
  1511				if (ret) {
  1512					dev_warn(&device->dev,
  1513						 "ib_query_pkey failed (%d) for index %d\n",
  1514						 ret, i);
  1515					goto err;
  1516				}
  1517			}
  1518		}
  1519	
  1520		write_lock_irq(&device->cache_lock);
  1521	
  1522		if (update_pkeys) {
  1523			old_pkey_cache = device->port_data[port].cache.pkey;
  1524			device->port_data[port].cache.pkey = pkey_cache;
  1525		}
  1526		device->port_data[port].cache.lmc = tprops->lmc;
  1527		device->port_data[port].cache.port_state = tprops->state;
  1528	
  1529		ret = rdma_query_gid(device, port, 0, &gid);
  1530		if (ret) {
> 1531			write_unlock_irq(&device->cache.lock);
  1532			goto err;
  1533		}
  1534	
  1535		device->port_data[port].cache.subnet_prefix =
  1536				be64_to_cpu(gid.global.subnet_prefix);
  1537	
  1538		write_unlock_irq(&device->cache_lock);
  1539	
  1540		if (enforce_security)
  1541			ib_security_cache_change(device,
  1542						 port,
  1543						 be64_to_cpu(gid.global.subnet_prefix));
  1544	
  1545		kfree(old_pkey_cache);
  1546		kfree(tprops);
  1547		return 0;
  1548	
  1549	err:
  1550		kfree(pkey_cache);
  1551		kfree(tprops);
  1552		return ret;
  1553	}
  1554	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff mbox series

Patch

diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 2325171..cd99c46 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -917,9 +917,11 @@  static void gid_table_cleanup_one(struct ib_device *ib_dev)
 {
 	u32 p;
 
-	rdma_for_each_port (ib_dev, p)
+	rdma_for_each_port (ib_dev, p) {
+		ib_dev->port_data[p].cache_is_initialized = 0;
 		cleanup_gid_table_port(ib_dev, p,
 				       ib_dev->port_data[p].cache.gid);
+	}
 }
 
 static int gid_table_setup_one(struct ib_device *ib_dev)
@@ -1466,6 +1468,7 @@  static int config_non_roce_gid_cache(struct ib_device *device,
 	struct ib_port_attr       *tprops = NULL;
 	struct ib_pkey_cache      *pkey_cache = NULL;
 	struct ib_pkey_cache      *old_pkey_cache = NULL;
+	union ib_gid               gid;
 	int                        i;
 	int                        ret;
 
@@ -1523,13 +1526,21 @@  static int config_non_roce_gid_cache(struct ib_device *device,
 	device->port_data[port].cache.lmc = tprops->lmc;
 	device->port_data[port].cache.port_state = tprops->state;
 
-	device->port_data[port].cache.subnet_prefix = tprops->subnet_prefix;
+	ret = rdma_query_gid(device, port, 0, &gid);
+	if (ret) {
+		write_unlock_irq(&device->cache.lock);
+		goto err;
+	}
+
+	device->port_data[port].cache.subnet_prefix =
+			be64_to_cpu(gid.global.subnet_prefix);
+
 	write_unlock_irq(&device->cache_lock);
 
 	if (enforce_security)
 		ib_security_cache_change(device,
 					 port,
-					 tprops->subnet_prefix);
+					 be64_to_cpu(gid.global.subnet_prefix));
 
 	kfree(old_pkey_cache);
 	kfree(tprops);
@@ -1629,6 +1640,7 @@  int ib_cache_setup_one(struct ib_device *device)
 		err = ib_cache_update(device, p, true, true, true);
 		if (err)
 			return err;
+		device->port_data[p].cache_is_initialized = 1;
 	}
 
 	return 0;
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 7a617e4..57b9039 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -2057,6 +2057,15 @@  static int __ib_query_port(struct ib_device *device,
 	    IB_LINK_LAYER_INFINIBAND)
 		return 0;
 
+	if (!ib_cache_is_initialised(device, port_num))
+		goto query_gid_from_device;
+
+	ib_get_cached_subnet_prefix(device, port_num,
+				    &port_attr->subnet_prefix);
+
+	return 0;
+
+query_gid_from_device:
 	err = device->ops.query_gid(device, port_num, 0, &gid);
 	if (err)
 		return err;
diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h
index 226ae37..46b43a7 100644
--- a/include/rdma/ib_cache.h
+++ b/include/rdma/ib_cache.h
@@ -114,4 +114,9 @@  ssize_t rdma_query_gid_table(struct ib_device *device,
 			     struct ib_uverbs_gid_entry *entries,
 			     size_t max_entries);
 
+static inline bool ib_cache_is_initialised(struct ib_device *device,
+					u32 port_num)
+{
+	return device->port_data[port_num].cache_is_initialized;
+}
 #endif /* _IB_CACHE_H */
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index c96d601..405f7da 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2177,6 +2177,7 @@  struct ib_port_data {
 
 	spinlock_t netdev_lock;
 
+	u8 cache_is_initialized:1;
 	struct list_head pkey_list;
 
 	struct ib_port_cache cache;