diff mbox series

[for-next] IB/core: Only update PKEY and GID caches on respective events

Message ID 1620128784-3283-1-git-send-email-haakon.bugge@oracle.com (mailing list archive)
State Superseded
Headers show
Series [for-next] IB/core: Only update PKEY and GID caches on respective events | expand

Commit Message

Haakon Bugge May 4, 2021, 11:46 a.m. UTC
Both the PKEY and GID tables in an HCA can hold in the order of
hundreds entries. Reading them are expensive. Partly because the API
for retrieving them only returns a single entry at a time. Further, on
certain implementations, e.g., CX-3, the VFs are paravirtualized in
this respect and have to rely on the PF driver to perform the
read. This again demands VF to PF communication.

IB Core's cache is refreshed on all events. Hence, filter the refresh
of the PKEY and GID caches based on the event received being
IB_EVENT_PKEY_CHANGE and IB_EVENT_GID_CHANGE respectively.

Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
---
 drivers/infiniband/core/cache.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

Comments

Zhu Yanjun May 4, 2021, 2:15 p.m. UTC | #1
On Tue, May 4, 2021 at 7:50 PM Håkon Bugge <haakon.bugge@oracle.com> wrote:
>
> Both the PKEY and GID tables in an HCA can hold in the order of
> hundreds entries. Reading them are expensive. Partly because the API
> for retrieving them only returns a single entry at a time. Further, on
> certain implementations, e.g., CX-3, the VFs are paravirtualized in
> this respect and have to rely on the PF driver to perform the
> read. This again demands VF to PF communication.
>
> IB Core's cache is refreshed on all events. Hence, filter the refresh
> of the PKEY and GID caches based on the event received being
> IB_EVENT_PKEY_CHANGE and IB_EVENT_GID_CHANGE respectively.
>

Missing Fixes?

Zhu Yanjun

> Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
> ---
>  drivers/infiniband/core/cache.c | 25 ++++++++++++++++---------
>  1 file changed, 16 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
> index 5c9fac7..531ae6b 100644
> --- a/drivers/infiniband/core/cache.c
> +++ b/drivers/infiniband/core/cache.c
> @@ -1472,10 +1472,14 @@ static int config_non_roce_gid_cache(struct ib_device *device,
>  }
>
>  static int
> -ib_cache_update(struct ib_device *device, u8 port, bool enforce_security)
> +ib_cache_update(struct ib_device *device, u8 port, enum ib_event_type event,
> +               bool reg_dev, bool enforce_security)
>  {
>         struct ib_port_attr       *tprops = NULL;
> -       struct ib_pkey_cache      *pkey_cache = NULL, *old_pkey_cache;
> +       struct ib_pkey_cache      *pkey_cache = NULL;
> +       struct ib_pkey_cache      *old_pkey_cache = NULL;
> +       bool                       update_pkey_cache = (reg_dev || event == IB_EVENT_PKEY_CHANGE);
> +       bool                       update_gid_cache = (reg_dev || event == IB_EVENT_GID_CHANGE);
>         int                        i;
>         int                        ret;
>
> @@ -1492,14 +1496,16 @@ static int config_non_roce_gid_cache(struct ib_device *device,
>                 goto err;
>         }
>
> -       if (!rdma_protocol_roce(device, port)) {
> +       if (!rdma_protocol_roce(device, port) && update_gid_cache) {
>                 ret = config_non_roce_gid_cache(device, port,
>                                                 tprops->gid_tbl_len);
>                 if (ret)
>                         goto err;
>         }
>
> -       if (tprops->pkey_tbl_len) {
> +       update_pkey_cache &= !!tprops->pkey_tbl_len;
> +
> +       if (update_pkey_cache) {
>                 pkey_cache = kmalloc(struct_size(pkey_cache, table,
>                                                  tprops->pkey_tbl_len),
>                                      GFP_KERNEL);
> @@ -1524,9 +1530,10 @@ static int config_non_roce_gid_cache(struct ib_device *device,
>
>         write_lock_irq(&device->cache_lock);
>
> -       old_pkey_cache = device->port_data[port].cache.pkey;
> -
> -       device->port_data[port].cache.pkey = pkey_cache;
> +       if (update_pkey_cache) {
> +               old_pkey_cache = device->port_data[port].cache.pkey;
> +               device->port_data[port].cache.pkey = pkey_cache;
> +       }
>         device->port_data[port].cache.lmc = tprops->lmc;
>         device->port_data[port].cache.port_state = tprops->state;
>
> @@ -1558,7 +1565,7 @@ static void ib_cache_event_task(struct work_struct *_work)
>          * the cache.
>          */
>         ret = ib_cache_update(work->event.device, work->event.element.port_num,
> -                             work->enforce_security);
> +                             work->event.event, false, work->enforce_security);
>
>         /* GID event is notified already for individual GID entries by
>          * dispatch_gid_change_event(). Hence, notifiy for rest of the
> @@ -1631,7 +1638,7 @@ int ib_cache_setup_one(struct ib_device *device)
>                 return err;
>
>         rdma_for_each_port (device, p) {
> -               err = ib_cache_update(device, p, true);
> +               err = ib_cache_update(device, p, 0, true, true);
>                 if (err)
>                         return err;
>         }
> --
> 1.8.3.1
>
Haakon Bugge May 4, 2021, 2:35 p.m. UTC | #2
> On 4 May 2021, at 16:15, Zhu Yanjun <zyjzyj2000@gmail.com> wrote:
> 
> On Tue, May 4, 2021 at 7:50 PM Håkon Bugge <haakon.bugge@oracle.com> wrote:
>> 
>> Both the PKEY and GID tables in an HCA can hold in the order of
>> hundreds entries. Reading them are expensive. Partly because the API
>> for retrieving them only returns a single entry at a time. Further, on
>> certain implementations, e.g., CX-3, the VFs are paravirtualized in
>> this respect and have to rely on the PF driver to perform the
>> read. This again demands VF to PF communication.
>> 
>> IB Core's cache is refreshed on all events. Hence, filter the refresh
>> of the PKEY and GID caches based on the event received being
>> IB_EVENT_PKEY_CHANGE and IB_EVENT_GID_CHANGE respectively.
>> 
> 
> Missing Fixes?

I was thinking that this is a performance fix, not a bug fix. Probably not severe enough to go into stable. But by any means, it

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")


Thxs, Håkon



> Zhu Yanjun
> 
>> Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
>> ---
>> drivers/infiniband/core/cache.c | 25 ++++++++++++++++---------
>> 1 file changed, 16 insertions(+), 9 deletions(-)
>> 
>> diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
>> index 5c9fac7..531ae6b 100644
>> --- a/drivers/infiniband/core/cache.c
>> +++ b/drivers/infiniband/core/cache.c
>> @@ -1472,10 +1472,14 @@ static int config_non_roce_gid_cache(struct ib_device *device,
>> }
>> 
>> static int
>> -ib_cache_update(struct ib_device *device, u8 port, bool enforce_security)
>> +ib_cache_update(struct ib_device *device, u8 port, enum ib_event_type event,
>> +               bool reg_dev, bool enforce_security)
>> {
>>        struct ib_port_attr       *tprops = NULL;
>> -       struct ib_pkey_cache      *pkey_cache = NULL, *old_pkey_cache;
>> +       struct ib_pkey_cache      *pkey_cache = NULL;
>> +       struct ib_pkey_cache      *old_pkey_cache = NULL;
>> +       bool                       update_pkey_cache = (reg_dev || event == IB_EVENT_PKEY_CHANGE);
>> +       bool                       update_gid_cache = (reg_dev || event == IB_EVENT_GID_CHANGE);
>>        int                        i;
>>        int                        ret;
>> 
>> @@ -1492,14 +1496,16 @@ static int config_non_roce_gid_cache(struct ib_device *device,
>>                goto err;
>>        }
>> 
>> -       if (!rdma_protocol_roce(device, port)) {
>> +       if (!rdma_protocol_roce(device, port) && update_gid_cache) {
>>                ret = config_non_roce_gid_cache(device, port,
>>                                                tprops->gid_tbl_len);
>>                if (ret)
>>                        goto err;
>>        }
>> 
>> -       if (tprops->pkey_tbl_len) {
>> +       update_pkey_cache &= !!tprops->pkey_tbl_len;
>> +
>> +       if (update_pkey_cache) {
>>                pkey_cache = kmalloc(struct_size(pkey_cache, table,
>>                                                 tprops->pkey_tbl_len),
>>                                     GFP_KERNEL);
>> @@ -1524,9 +1530,10 @@ static int config_non_roce_gid_cache(struct ib_device *device,
>> 
>>        write_lock_irq(&device->cache_lock);
>> 
>> -       old_pkey_cache = device->port_data[port].cache.pkey;
>> -
>> -       device->port_data[port].cache.pkey = pkey_cache;
>> +       if (update_pkey_cache) {
>> +               old_pkey_cache = device->port_data[port].cache.pkey;
>> +               device->port_data[port].cache.pkey = pkey_cache;
>> +       }
>>        device->port_data[port].cache.lmc = tprops->lmc;
>>        device->port_data[port].cache.port_state = tprops->state;
>> 
>> @@ -1558,7 +1565,7 @@ static void ib_cache_event_task(struct work_struct *_work)
>>         * the cache.
>>         */
>>        ret = ib_cache_update(work->event.device, work->event.element.port_num,
>> -                             work->enforce_security);
>> +                             work->event.event, false, work->enforce_security);
>> 
>>        /* GID event is notified already for individual GID entries by
>>         * dispatch_gid_change_event(). Hence, notifiy for rest of the
>> @@ -1631,7 +1638,7 @@ int ib_cache_setup_one(struct ib_device *device)
>>                return err;
>> 
>>        rdma_for_each_port (device, p) {
>> -               err = ib_cache_update(device, p, true);
>> +               err = ib_cache_update(device, p, 0, true, true);
>>                if (err)
>>                        return err;
>>        }
>> --
>> 1.8.3.1
Leon Romanovsky May 6, 2021, 7:29 a.m. UTC | #3
On Tue, May 04, 2021 at 01:46:24PM +0200, Håkon Bugge wrote:
> Both the PKEY and GID tables in an HCA can hold in the order of
> hundreds entries. Reading them are expensive. Partly because the API
> for retrieving them only returns a single entry at a time. Further, on
> certain implementations, e.g., CX-3, the VFs are paravirtualized in
> this respect and have to rely on the PF driver to perform the
> read. This again demands VF to PF communication.
> 
> IB Core's cache is refreshed on all events. Hence, filter the refresh
> of the PKEY and GID caches based on the event received being
> IB_EVENT_PKEY_CHANGE and IB_EVENT_GID_CHANGE respectively.
> 
> Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
> ---
>  drivers/infiniband/core/cache.c | 25 ++++++++++++++++---------
>  1 file changed, 16 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
> index 5c9fac7..531ae6b 100644
> --- a/drivers/infiniband/core/cache.c
> +++ b/drivers/infiniband/core/cache.c
> @@ -1472,10 +1472,14 @@ static int config_non_roce_gid_cache(struct ib_device *device,
>  }
>  
>  static int
> -ib_cache_update(struct ib_device *device, u8 port, bool enforce_security)
> +ib_cache_update(struct ib_device *device, u8 port, enum ib_event_type event,
> +		bool reg_dev, bool enforce_security)
>  {
>  	struct ib_port_attr       *tprops = NULL;
> -	struct ib_pkey_cache      *pkey_cache = NULL, *old_pkey_cache;
> +	struct ib_pkey_cache      *pkey_cache = NULL;
> +	struct ib_pkey_cache      *old_pkey_cache = NULL;
> +	bool			   update_pkey_cache = (reg_dev || event == IB_EVENT_PKEY_CHANGE);
> +	bool			   update_gid_cache = (reg_dev || event == IB_EVENT_GID_CHANGE);

I'm not super excited to see "events" in this function and would be more
happy to see it is handled in ib_cache_event_task() while the function
signature will be:
ib_cache_update(struct ib_device *device, u8 port, bool all_gids, bool
all_pkeys, bool enforce_security)

Thanks
Haakon Bugge May 6, 2021, 7:43 a.m. UTC | #4
> On 6 May 2021, at 09:29, Leon Romanovsky <leon@kernel.org> wrote:
> 
> On Tue, May 04, 2021 at 01:46:24PM +0200, Håkon Bugge wrote:
>> Both the PKEY and GID tables in an HCA can hold in the order of
>> hundreds entries. Reading them are expensive. Partly because the API
>> for retrieving them only returns a single entry at a time. Further, on
>> certain implementations, e.g., CX-3, the VFs are paravirtualized in
>> this respect and have to rely on the PF driver to perform the
>> read. This again demands VF to PF communication.
>> 
>> IB Core's cache is refreshed on all events. Hence, filter the refresh
>> of the PKEY and GID caches based on the event received being
>> IB_EVENT_PKEY_CHANGE and IB_EVENT_GID_CHANGE respectively.
>> 
>> Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
>> ---
>> drivers/infiniband/core/cache.c | 25 ++++++++++++++++---------
>> 1 file changed, 16 insertions(+), 9 deletions(-)
>> 
>> diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
>> index 5c9fac7..531ae6b 100644
>> --- a/drivers/infiniband/core/cache.c
>> +++ b/drivers/infiniband/core/cache.c
>> @@ -1472,10 +1472,14 @@ static int config_non_roce_gid_cache(struct ib_device *device,
>> }
>> 
>> static int
>> -ib_cache_update(struct ib_device *device, u8 port, bool enforce_security)
>> +ib_cache_update(struct ib_device *device, u8 port, enum ib_event_type event,
>> +		bool reg_dev, bool enforce_security)
>> {
>> 	struct ib_port_attr       *tprops = NULL;
>> -	struct ib_pkey_cache      *pkey_cache = NULL, *old_pkey_cache;
>> +	struct ib_pkey_cache      *pkey_cache = NULL;
>> +	struct ib_pkey_cache      *old_pkey_cache = NULL;
>> +	bool			   update_pkey_cache = (reg_dev || event == IB_EVENT_PKEY_CHANGE);
>> +	bool			   update_gid_cache = (reg_dev || event == IB_EVENT_GID_CHANGE);
> 
> I'm not super excited to see "events" in this function and would be more
> happy to see it is handled in ib_cache_event_task() while the function
> signature will be:
> ib_cache_update(struct ib_device *device, u8 port, bool all_gids, bool
> all_pkeys, bool enforce_security)

I was thinking the other way around; if a new entity, FOO, gets cached, to be updated on IB_EVENT_FOO_CHANGE, the change to support this would be more contained; you only need to change ib_cache_update().

But by all means, will send a v2 based on you recommendation.


Thxs, Håkon



> 
> Thanks
Leon Romanovsky May 6, 2021, 8:06 a.m. UTC | #5
On Thu, May 06, 2021 at 07:43:38AM +0000, Haakon Bugge wrote:
> 
> 
> > On 6 May 2021, at 09:29, Leon Romanovsky <leon@kernel.org> wrote:
> > 
> > On Tue, May 04, 2021 at 01:46:24PM +0200, Håkon Bugge wrote:
> >> Both the PKEY and GID tables in an HCA can hold in the order of
> >> hundreds entries. Reading them are expensive. Partly because the API
> >> for retrieving them only returns a single entry at a time. Further, on
> >> certain implementations, e.g., CX-3, the VFs are paravirtualized in
> >> this respect and have to rely on the PF driver to perform the
> >> read. This again demands VF to PF communication.
> >> 
> >> IB Core's cache is refreshed on all events. Hence, filter the refresh
> >> of the PKEY and GID caches based on the event received being
> >> IB_EVENT_PKEY_CHANGE and IB_EVENT_GID_CHANGE respectively.
> >> 
> >> Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
> >> ---
> >> drivers/infiniband/core/cache.c | 25 ++++++++++++++++---------
> >> 1 file changed, 16 insertions(+), 9 deletions(-)
> >> 
> >> diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
> >> index 5c9fac7..531ae6b 100644
> >> --- a/drivers/infiniband/core/cache.c
> >> +++ b/drivers/infiniband/core/cache.c
> >> @@ -1472,10 +1472,14 @@ static int config_non_roce_gid_cache(struct ib_device *device,
> >> }
> >> 
> >> static int
> >> -ib_cache_update(struct ib_device *device, u8 port, bool enforce_security)
> >> +ib_cache_update(struct ib_device *device, u8 port, enum ib_event_type event,
> >> +		bool reg_dev, bool enforce_security)
> >> {
> >> 	struct ib_port_attr       *tprops = NULL;
> >> -	struct ib_pkey_cache      *pkey_cache = NULL, *old_pkey_cache;
> >> +	struct ib_pkey_cache      *pkey_cache = NULL;
> >> +	struct ib_pkey_cache      *old_pkey_cache = NULL;
> >> +	bool			   update_pkey_cache = (reg_dev || event == IB_EVENT_PKEY_CHANGE);
> >> +	bool			   update_gid_cache = (reg_dev || event == IB_EVENT_GID_CHANGE);
> > 
> > I'm not super excited to see "events" in this function and would be more
> > happy to see it is handled in ib_cache_event_task() while the function
> > signature will be:
> > ib_cache_update(struct ib_device *device, u8 port, bool all_gids, bool
> > all_pkeys, bool enforce_security)
> 
> I was thinking the other way around; if a new entity, FOO, gets cached, to be updated on IB_EVENT_FOO_CHANGE, the change to support this would be more contained; you only need to change ib_cache_update().

We are already handling enforce_security outside of ib_cache_update().

> 
> But by all means, will send a v2 based on you recommendation.

Thanks

> 
> 
> Thxs, Håkon
> 
> 
> 
> > 
> > Thanks
>
diff mbox series

Patch

diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 5c9fac7..531ae6b 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -1472,10 +1472,14 @@  static int config_non_roce_gid_cache(struct ib_device *device,
 }
 
 static int
-ib_cache_update(struct ib_device *device, u8 port, bool enforce_security)
+ib_cache_update(struct ib_device *device, u8 port, enum ib_event_type event,
+		bool reg_dev, bool enforce_security)
 {
 	struct ib_port_attr       *tprops = NULL;
-	struct ib_pkey_cache      *pkey_cache = NULL, *old_pkey_cache;
+	struct ib_pkey_cache      *pkey_cache = NULL;
+	struct ib_pkey_cache      *old_pkey_cache = NULL;
+	bool			   update_pkey_cache = (reg_dev || event == IB_EVENT_PKEY_CHANGE);
+	bool			   update_gid_cache = (reg_dev || event == IB_EVENT_GID_CHANGE);
 	int                        i;
 	int                        ret;
 
@@ -1492,14 +1496,16 @@  static int config_non_roce_gid_cache(struct ib_device *device,
 		goto err;
 	}
 
-	if (!rdma_protocol_roce(device, port)) {
+	if (!rdma_protocol_roce(device, port) && update_gid_cache) {
 		ret = config_non_roce_gid_cache(device, port,
 						tprops->gid_tbl_len);
 		if (ret)
 			goto err;
 	}
 
-	if (tprops->pkey_tbl_len) {
+	update_pkey_cache &= !!tprops->pkey_tbl_len;
+
+	if (update_pkey_cache) {
 		pkey_cache = kmalloc(struct_size(pkey_cache, table,
 						 tprops->pkey_tbl_len),
 				     GFP_KERNEL);
@@ -1524,9 +1530,10 @@  static int config_non_roce_gid_cache(struct ib_device *device,
 
 	write_lock_irq(&device->cache_lock);
 
-	old_pkey_cache = device->port_data[port].cache.pkey;
-
-	device->port_data[port].cache.pkey = pkey_cache;
+	if (update_pkey_cache) {
+		old_pkey_cache = device->port_data[port].cache.pkey;
+		device->port_data[port].cache.pkey = pkey_cache;
+	}
 	device->port_data[port].cache.lmc = tprops->lmc;
 	device->port_data[port].cache.port_state = tprops->state;
 
@@ -1558,7 +1565,7 @@  static void ib_cache_event_task(struct work_struct *_work)
 	 * the cache.
 	 */
 	ret = ib_cache_update(work->event.device, work->event.element.port_num,
-			      work->enforce_security);
+			      work->event.event, false, work->enforce_security);
 
 	/* GID event is notified already for individual GID entries by
 	 * dispatch_gid_change_event(). Hence, notifiy for rest of the
@@ -1631,7 +1638,7 @@  int ib_cache_setup_one(struct ib_device *device)
 		return err;
 
 	rdma_for_each_port (device, p) {
-		err = ib_cache_update(device, p, true);
+		err = ib_cache_update(device, p, 0, true, true);
 		if (err)
 			return err;
 	}