diff mbox series

[net,2/4] ice: Correctly handle aux device when num channels change

Message ID 20221207211040.1099708-3-anthony.l.nguyen@intel.com (mailing list archive)
State Not Applicable
Headers show
Series None | expand

Commit Message

Tony Nguyen Dec. 7, 2022, 9:10 p.m. UTC
From: Dave Ertman <david.m.ertman@intel.com>

When the number of channels/queues changes on an interface, it is necessary
to change how those resources are distributed to the auxiliary device for
maintaining RDMA functionality.  To do this, the best way is to unplug, and
then re-plug the auxiliary device.  This will cause all current resource
allocation to be released, and then re-requested under the new state.

Since the set_channel command from ethtool comes in while holding the RTNL
lock, it is necessary to offset the plugging and unplugging of auxiliary
device to another context.  For this purpose, set the flags for UNPLUG and
PLUG in the PF state, then respond to them in the service task.

Also, since the auxiliary device will be unplugged/plugged at the end of
the flow, it is better to not send the event for TCs changing in the
middle of the flow.  This will prevent a timing issue between the events
and the probe/release calls conflicting.

Fixes: 348048e724a0 ("ice: Implement iidc operations")
Signed-off-by: Dave Ertman <david.m.ertman@intel.com>
Tested-by: Gurucharan G <gurucharanx.g@intel.com> (A Contingent worker at Intel)
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h         | 2 ++
 drivers/net/ethernet/intel/ice/ice_ethtool.c | 6 ++++++
 drivers/net/ethernet/intel/ice/ice_idc.c     | 3 +++
 drivers/net/ethernet/intel/ice/ice_main.c    | 3 +++
 4 files changed, 14 insertions(+)

Comments

Saeed Mahameed Dec. 7, 2022, 10:25 p.m. UTC | #1
On 07 Dec 13:10, Tony Nguyen wrote:
>From: Dave Ertman <david.m.ertman@intel.com>
>
>When the number of channels/queues changes on an interface, it is necessary
>to change how those resources are distributed to the auxiliary device for
>maintaining RDMA functionality.  To do this, the best way is to unplug, and

Can you please explain how an ethtool can affect RDMA functionality ?
don't you have full bifurcation between the two eth and rdma interfaces .. 

>then re-plug the auxiliary device.  This will cause all current resource
>allocation to be released, and then re-requested under the new state.
>

I find this really disruptive, changing number of netdev queues to cause
full aux devs restart !

>Since the set_channel command from ethtool comes in while holding the RTNL
>lock, it is necessary to offset the plugging and unplugging of auxiliary
>device to another context.  For this purpose, set the flags for UNPLUG and
>PLUG in the PF state, then respond to them in the service task.
>
>Also, since the auxiliary device will be unplugged/plugged at the end of
>the flow, it is better to not send the event for TCs changing in the
>middle of the flow.  This will prevent a timing issue between the events
>and the probe/release calls conflicting.
>
>Fixes: 348048e724a0 ("ice: Implement iidc operations")
>Signed-off-by: Dave Ertman <david.m.ertman@intel.com>
>Tested-by: Gurucharan G <gurucharanx.g@intel.com> (A Contingent worker at Intel)
>Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
>---
> drivers/net/ethernet/intel/ice/ice.h         | 2 ++
> drivers/net/ethernet/intel/ice/ice_ethtool.c | 6 ++++++
> drivers/net/ethernet/intel/ice/ice_idc.c     | 3 +++
> drivers/net/ethernet/intel/ice/ice_main.c    | 3 +++
> 4 files changed, 14 insertions(+)
>
>diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
>index 001500afc4a6..092e572768fe 100644
>--- a/drivers/net/ethernet/intel/ice/ice.h
>+++ b/drivers/net/ethernet/intel/ice/ice.h
>@@ -281,6 +281,7 @@ enum ice_pf_state {
> 	ICE_FLTR_OVERFLOW_PROMISC,
> 	ICE_VF_DIS,
> 	ICE_CFG_BUSY,
>+	ICE_SET_CHANNELS,
> 	ICE_SERVICE_SCHED,
> 	ICE_SERVICE_DIS,
> 	ICE_FD_FLUSH_REQ,
>@@ -485,6 +486,7 @@ enum ice_pf_flags {
> 	ICE_FLAG_VF_VLAN_PRUNING,
> 	ICE_FLAG_LINK_LENIENT_MODE_ENA,
> 	ICE_FLAG_PLUG_AUX_DEV,
>+	ICE_FLAG_UNPLUG_AUX_DEV,
> 	ICE_FLAG_MTU_CHANGED,
> 	ICE_FLAG_GNSS,			/* GNSS successfully initialized */
> 	ICE_PF_FLAGS_NBITS		/* must be last */
>diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
>index b7be84bbe72d..37e174a19860 100644
>--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
>+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
>@@ -3536,6 +3536,8 @@ static int ice_set_channels(struct net_device *dev, struct ethtool_channels *ch)
> 		return -EINVAL;
> 	}
>
>+	set_bit(ICE_SET_CHANNELS, pf->state);
>+
> 	ice_vsi_recfg_qs(vsi, new_rx, new_tx);
>
> 	if (!netif_is_rxfh_configured(dev))
>@@ -3543,6 +3545,10 @@ static int ice_set_channels(struct net_device *dev, struct ethtool_channels *ch)
>
> 	/* Update rss_size due to change in Rx queues */
> 	vsi->rss_size = ice_get_valid_rss_size(&pf->hw, new_rx);
>+	clear_bit(ICE_SET_CHANNELS, pf->state);
>+

you just set this new state a few lines ago, clearing the bit in the same
function few lines later seems to be an abuse of the pf state machine, 
couldn't you just pass a parameter to the functions which needed this
information ? 

>+	set_bit(ICE_FLAG_UNPLUG_AUX_DEV, pf->flags);
>+	set_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags);
>
> 	return 0;
> }
>diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c
>index 895c32bcc8b5..9bf6fa5ed4c8 100644
>--- a/drivers/net/ethernet/intel/ice/ice_idc.c
>+++ b/drivers/net/ethernet/intel/ice/ice_idc.c
>@@ -37,6 +37,9 @@ void ice_send_event_to_aux(struct ice_pf *pf, struct iidc_event *event)
> 	if (WARN_ON_ONCE(!in_task()))
> 		return;
>
>+	if (test_bit(ICE_SET_CHANNELS, pf->state))
>+		return;
>+
> 	mutex_lock(&pf->adev_mutex);
> 	if (!pf->adev)
> 		goto finish;
>diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
>index d0f14e73e8da..d58f55a72ab3 100644
>--- a/drivers/net/ethernet/intel/ice/ice_main.c
>+++ b/drivers/net/ethernet/intel/ice/ice_main.c
>@@ -2300,6 +2300,9 @@ static void ice_service_task(struct work_struct *work)
> 		}
> 	}
>
>+	if (test_and_clear_bit(ICE_FLAG_UNPLUG_AUX_DEV, pf->flags))
>+		ice_unplug_aux_dev(pf);
>+
> 	if (test_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags)) {
> 		/* Plug aux device per request */
> 		ice_plug_aux_dev(pf);
>-- 
>2.35.1
>
Ertman, David M Dec. 9, 2022, 5:21 p.m. UTC | #2
> -----Original Message-----
> From: Saeed Mahameed <saeed@kernel.org>
> Sent: Wednesday, December 7, 2022 2:26 PM
> To: Nguyen, Anthony L <anthony.l.nguyen@intel.com>
> Cc: davem@davemloft.net; kuba@kernel.org; pabeni@redhat.com;
> edumazet@google.com; Ertman, David M <david.m.ertman@intel.com>;
> netdev@vger.kernel.org; Saleem, Shiraz <shiraz.saleem@intel.com>; Ismail,
> Mustafa <mustafa.ismail@intel.com>; jgg@nvidia.com; leonro@nvidia.com;
> linux-rdma@vger.kernel.org; G, GurucharanX <gurucharanx.g@intel.com>
> Subject: Re: [PATCH net 2/4] ice: Correctly handle aux device when num
> channels change
> 
> On 07 Dec 13:10, Tony Nguyen wrote:
> >From: Dave Ertman <david.m.ertman@intel.com>
> >
> >When the number of channels/queues changes on an interface, it is
> necessary
> >to change how those resources are distributed to the auxiliary device for
> >maintaining RDMA functionality.  To do this, the best way is to unplug, and
> 
> Can you please explain how an ethtool can affect RDMA functionality ?
> don't you have full bifurcation between the two eth and rdma interfaces ..
> 
This patch is to address a bug where the number of queues for the interface was
changed and the RDMA lost functionality due to queues being re-assigned.

The PF is managing and setting aside resources for the RDMA aux dev. Then the 
RDMA aux driver will request resources from the PF driver.  Changes in
the total number of resources make it so that resources previously
allocated to RDMA aux driver may not be available any more.  A re-allocation
is necessary to ensure that RDMA has all of the queues that it thinks it does.

> >then re-plug the auxiliary device.  This will cause all current resource
> >allocation to be released, and then re-requested under the new state.
> >
> 
> I find this really disruptive, changing number of netdev queues to cause
> full aux devs restart !
> 

Changing the number of queues available to the interface *is* a disruptive action.
The netdev  and VSI have to be re-configured for queues per TC and the RDMA aux
driver has to re-allocate qsets to attach queue-pairs to.

> >Since the set_channel command from ethtool comes in while holding the
> RTNL
> >lock, it is necessary to offset the plugging and unplugging of auxiliary
> >device to another context.  For this purpose, set the flags for UNPLUG and
> >PLUG in the PF state, then respond to them in the service task.
> >
> >Also, since the auxiliary device will be unplugged/plugged at the end of
> >the flow, it is better to not send the event for TCs changing in the
> >middle of the flow.  This will prevent a timing issue between the events
> >and the probe/release calls conflicting.
> >
> >Fixes: 348048e724a0 ("ice: Implement iidc operations")
> >Signed-off-by: Dave Ertman <david.m.ertman@intel.com>
> >Tested-by: Gurucharan G <gurucharanx.g@intel.com> (A Contingent
> worker at Intel)
> >Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
> >---
> > drivers/net/ethernet/intel/ice/ice.h         | 2 ++
> > drivers/net/ethernet/intel/ice/ice_ethtool.c | 6 ++++++
> > drivers/net/ethernet/intel/ice/ice_idc.c     | 3 +++
> > drivers/net/ethernet/intel/ice/ice_main.c    | 3 +++
> > 4 files changed, 14 insertions(+)
> >
> >diff --git a/drivers/net/ethernet/intel/ice/ice.h
> b/drivers/net/ethernet/intel/ice/ice.h
> >index 001500afc4a6..092e572768fe 100644
> >--- a/drivers/net/ethernet/intel/ice/ice.h
> >+++ b/drivers/net/ethernet/intel/ice/ice.h
> >@@ -281,6 +281,7 @@ enum ice_pf_state {
> > 	ICE_FLTR_OVERFLOW_PROMISC,
> > 	ICE_VF_DIS,
> > 	ICE_CFG_BUSY,
> >+	ICE_SET_CHANNELS,
> > 	ICE_SERVICE_SCHED,
> > 	ICE_SERVICE_DIS,
> > 	ICE_FD_FLUSH_REQ,
> >@@ -485,6 +486,7 @@ enum ice_pf_flags {
> > 	ICE_FLAG_VF_VLAN_PRUNING,
> > 	ICE_FLAG_LINK_LENIENT_MODE_ENA,
> > 	ICE_FLAG_PLUG_AUX_DEV,
> >+	ICE_FLAG_UNPLUG_AUX_DEV,
> > 	ICE_FLAG_MTU_CHANGED,
> > 	ICE_FLAG_GNSS,			/* GNSS successfully
> initialized */
> > 	ICE_PF_FLAGS_NBITS		/* must be last */
> >diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c
> b/drivers/net/ethernet/intel/ice/ice_ethtool.c
> >index b7be84bbe72d..37e174a19860 100644
> >--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
> >+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
> >@@ -3536,6 +3536,8 @@ static int ice_set_channels(struct net_device
> *dev, struct ethtool_channels *ch)
> > 		return -EINVAL;
> > 	}
> >
> >+	set_bit(ICE_SET_CHANNELS, pf->state);
> >+
> > 	ice_vsi_recfg_qs(vsi, new_rx, new_tx);
> >
> > 	if (!netif_is_rxfh_configured(dev))
> >@@ -3543,6 +3545,10 @@ static int ice_set_channels(struct net_device
> *dev, struct ethtool_channels *ch)
> >
> > 	/* Update rss_size due to change in Rx queues */
> > 	vsi->rss_size = ice_get_valid_rss_size(&pf->hw, new_rx);
> >+	clear_bit(ICE_SET_CHANNELS, pf->state);
> >+
> 
> you just set this new state a few lines ago, clearing the bit in the same
> function few lines later seems to be an abuse of the pf state machine,
> couldn't you just pass a parameter to the functions which needed this
> information ?
> 

How is this abusing the PF state machine?  There is a 3 deep function call that needs
the information that this is a set_channel context, and each of those functions is called
from several locations - how is changing all of those functions to include a parameter
(that will be false for all of them but this instance) be less abusive than setting and
clearing a bit?

> >+	set_bit(ICE_FLAG_UNPLUG_AUX_DEV, pf->flags);
> >+	set_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags);
> >
> > 	return 0;
> > }
Saeed Mahameed Dec. 9, 2022, 7:28 p.m. UTC | #3
On 09 Dec 17:21, Ertman, David M wrote:
>> -----Original Message-----
>> From: Saeed Mahameed <saeed@kernel.org>
>> Sent: Wednesday, December 7, 2022 2:26 PM
>> To: Nguyen, Anthony L <anthony.l.nguyen@intel.com>
>> Cc: davem@davemloft.net; kuba@kernel.org; pabeni@redhat.com;
>> edumazet@google.com; Ertman, David M <david.m.ertman@intel.com>;
>> netdev@vger.kernel.org; Saleem, Shiraz <shiraz.saleem@intel.com>; Ismail,
>> Mustafa <mustafa.ismail@intel.com>; jgg@nvidia.com; leonro@nvidia.com;
>> linux-rdma@vger.kernel.org; G, GurucharanX <gurucharanx.g@intel.com>
>> Subject: Re: [PATCH net 2/4] ice: Correctly handle aux device when num
>> channels change
>>
>> On 07 Dec 13:10, Tony Nguyen wrote:
>> >From: Dave Ertman <david.m.ertman@intel.com>
>> >
>> >When the number of channels/queues changes on an interface, it is
>> necessary
>> >to change how those resources are distributed to the auxiliary device for
>> >maintaining RDMA functionality.  To do this, the best way is to unplug, and
>>
>> Can you please explain how an ethtool can affect RDMA functionality ?
>> don't you have full bifurcation between the two eth and rdma interfaces ..
>>
>This patch is to address a bug where the number of queues for the interface was
>changed and the RDMA lost functionality due to queues being re-assigned.
>

sounds like an rdma or PF bug.

>The PF is managing and setting aside resources for the RDMA aux dev. Then the
>RDMA aux driver will request resources from the PF driver.  Changes in
>the total number of resources make it so that resources previously
>allocated to RDMA aux driver may not be available any more.  A re-allocation
>is necessary to ensure that RDMA has all of the queues that it thinks it does.
>

IMO it's wrong to re-initialize a parallel subsystems due to an ethtool, 
ethtool is meant to control the netdev interface, not rdma. Either
statically allocate these resources on boot or just store them in a free 
pool in PF until next time rdma reloads by an rdma user command outside of
netdev.

>> >then re-plug the auxiliary device.  This will cause all current resource
>> >allocation to be released, and then re-requested under the new state.
>> >
>>
>> I find this really disruptive, changing number of netdev queues to cause
>> full aux devs restart !
>>
>
>Changing the number of queues available to the interface *is* a disruptive action.

yes to the netdev, not to rdma or usb, or the pci bus, you get my point.

>The netdev  and VSI have to be re-configured for queues per TC and the RDMA aux
>driver has to re-allocate qsets to attach queue-pairs to.
>

sure for netdev, but it doesn't make sense to reload rdma, if rdma was
using X queues, it should continue using X queues.. if you can't support
dynamic netdev changes without disturbing rdma, then block ethtool until
user unloads rdma. 

>> >Since the set_channel command from ethtool comes in while holding the
>> RTNL
>> >lock, it is necessary to offset the plugging and unplugging of auxiliary
>> >device to another context.  For this purpose, set the flags for UNPLUG and
>> >PLUG in the PF state, then respond to them in the service task.
>> >
>> >Also, since the auxiliary device will be unplugged/plugged at the end of
>> >the flow, it is better to not send the event for TCs changing in the
>> >middle of the flow.  This will prevent a timing issue between the events
>> >and the probe/release calls conflicting.
>> >
>> >Fixes: 348048e724a0 ("ice: Implement iidc operations")
>> >Signed-off-by: Dave Ertman <david.m.ertman@intel.com>
>> >Tested-by: Gurucharan G <gurucharanx.g@intel.com> (A Contingent
>> worker at Intel)
>> >Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
>> >---
>> > drivers/net/ethernet/intel/ice/ice.h         | 2 ++
>> > drivers/net/ethernet/intel/ice/ice_ethtool.c | 6 ++++++
>> > drivers/net/ethernet/intel/ice/ice_idc.c     | 3 +++
>> > drivers/net/ethernet/intel/ice/ice_main.c    | 3 +++
>> > 4 files changed, 14 insertions(+)
>> >
>> >diff --git a/drivers/net/ethernet/intel/ice/ice.h
>> b/drivers/net/ethernet/intel/ice/ice.h
>> >index 001500afc4a6..092e572768fe 100644
>> >--- a/drivers/net/ethernet/intel/ice/ice.h
>> >+++ b/drivers/net/ethernet/intel/ice/ice.h
>> >@@ -281,6 +281,7 @@ enum ice_pf_state {
>> > 	ICE_FLTR_OVERFLOW_PROMISC,
>> > 	ICE_VF_DIS,
>> > 	ICE_CFG_BUSY,
>> >+	ICE_SET_CHANNELS,
>> > 	ICE_SERVICE_SCHED,
>> > 	ICE_SERVICE_DIS,
>> > 	ICE_FD_FLUSH_REQ,
>> >@@ -485,6 +486,7 @@ enum ice_pf_flags {
>> > 	ICE_FLAG_VF_VLAN_PRUNING,
>> > 	ICE_FLAG_LINK_LENIENT_MODE_ENA,
>> > 	ICE_FLAG_PLUG_AUX_DEV,
>> >+	ICE_FLAG_UNPLUG_AUX_DEV,
>> > 	ICE_FLAG_MTU_CHANGED,
>> > 	ICE_FLAG_GNSS,			/* GNSS successfully
>> initialized */
>> > 	ICE_PF_FLAGS_NBITS		/* must be last */
>> >diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c
>> b/drivers/net/ethernet/intel/ice/ice_ethtool.c
>> >index b7be84bbe72d..37e174a19860 100644
>> >--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
>> >+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
>> >@@ -3536,6 +3536,8 @@ static int ice_set_channels(struct net_device
>> *dev, struct ethtool_channels *ch)
>> > 		return -EINVAL;
>> > 	}
>> >
>> >+	set_bit(ICE_SET_CHANNELS, pf->state);
>> >+
>> > 	ice_vsi_recfg_qs(vsi, new_rx, new_tx);
>> >
>> > 	if (!netif_is_rxfh_configured(dev))
>> >@@ -3543,6 +3545,10 @@ static int ice_set_channels(struct net_device
>> *dev, struct ethtool_channels *ch)
>> >
>> > 	/* Update rss_size due to change in Rx queues */
>> > 	vsi->rss_size = ice_get_valid_rss_size(&pf->hw, new_rx);
>> >+	clear_bit(ICE_SET_CHANNELS, pf->state);
>> >+
>>
>> you just set this new state a few lines ago, clearing the bit in the same
>> function few lines later seems to be an abuse of the pf state machine,
>> couldn't you just pass a parameter to the functions which needed this
>> information ?
>>
>
>How is this abusing the PF state machine?  There is a 3 deep function call that needs
>the information that this is a set_channel context, and each of those functions is called
>from several locations - how is changing all of those functions to include a parameter

this is exactly the abuse i was talking about, setting a flag on a global
state field because the function call is too deep.

>(that will be false for all of them but this instance) be less abusive than setting and
>clearing a bit?

this is not future sustainable and not reviewer friendly, it's a local
parameter and shouldn't be a global flag.

Anyway this is your driver, i am not going to force you to do something you
don't like.

but for the reloading of rdma due to an ethtool is a no for me..
let's find a common place for all vendors to express such limitations,
e.g. devlink .. 

>
>> >+	set_bit(ICE_FLAG_UNPLUG_AUX_DEV, pf->flags);
>> >+	set_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags);
>> >
>> > 	return 0;
>> > }
>
Jason Gunthorpe Dec. 9, 2022, 7:32 p.m. UTC | #4
On Fri, Dec 09, 2022 at 11:28:28AM -0800, Saeed Mahameed wrote:

> IMO it's wrong to re-initialize a parallel subsystems due to an ethtool,
> ethtool is meant to control the netdev interface, not rdma.

We've gotten into locking trouble doing stuff like this before.

If you are holding any locks do not try to unplug/plug an aux device.

Jason
Ertman, David M Dec. 12, 2022, 5:03 p.m. UTC | #5
> -----Original Message-----
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Friday, December 9, 2022 11:33 AM
> To: Saeed Mahameed <saeed@kernel.org>
> Cc: Ertman, David M <david.m.ertman@intel.com>; Nguyen, Anthony L
> <anthony.l.nguyen@intel.com>; davem@davemloft.net; kuba@kernel.org;
> pabeni@redhat.com; edumazet@google.com; netdev@vger.kernel.org;
> Saleem, Shiraz <shiraz.saleem@intel.com>; Ismail, Mustafa
> <mustafa.ismail@intel.com>; leonro@nvidia.com; linux-
> rdma@vger.kernel.org; G, GurucharanX <gurucharanx.g@intel.com>
> Subject: Re: [PATCH net 2/4] ice: Correctly handle aux device when num
> channels change
> 
> On Fri, Dec 09, 2022 at 11:28:28AM -0800, Saeed Mahameed wrote:
> 
> > IMO it's wrong to re-initialize a parallel subsystems due to an ethtool,
> > ethtool is meant to control the netdev interface, not rdma.
> 
> We've gotten into locking trouble doing stuff like this before.
> 
> If you are holding any locks do not try to unplug/plug an aux device.
> 
> Jason

The unplug/plug is done outside the ethtool context.  No locks are being held.

DaveE
Jason Gunthorpe Dec. 12, 2022, 11:53 p.m. UTC | #6
On Mon, Dec 12, 2022 at 05:03:39PM +0000, Ertman, David M wrote:
> > On Fri, Dec 09, 2022 at 11:28:28AM -0800, Saeed Mahameed wrote:
> > 
> > > IMO it's wrong to re-initialize a parallel subsystems due to an ethtool,
> > > ethtool is meant to control the netdev interface, not rdma.
> > 
> > We've gotten into locking trouble doing stuff like this before.
> > 
> > If you are holding any locks do not try to unplug/plug an aux device.
> > 
> > Jason
> 
> The unplug/plug is done outside the ethtool context.  No locks are being held.

That's a good, trick, so I'm skeptical *no* locks are held.

Jason
Ertman, David M Dec. 16, 2022, 7:08 p.m. UTC | #7
> -----Original Message-----
> From: Saeed Mahameed <saeed@kernel.org>
> Sent: Friday, December 9, 2022 11:28 AM
> To: Ertman, David M <david.m.ertman@intel.com>
> Cc: Nguyen, Anthony L <anthony.l.nguyen@intel.com>;
> davem@davemloft.net; kuba@kernel.org; pabeni@redhat.com;
> edumazet@google.com; netdev@vger.kernel.org; Saleem, Shiraz
> <shiraz.saleem@intel.com>; Ismail, Mustafa <mustafa.ismail@intel.com>;
> jgg@nvidia.com; leonro@nvidia.com; linux-rdma@vger.kernel.org; G,
> GurucharanX <gurucharanx.g@intel.com>
> Subject: Re: [PATCH net 2/4] ice: Correctly handle aux device when num
> channels change
> 
> On 09 Dec 17:21, Ertman, David M wrote:
> >> -----Original Message-----
> >> From: Saeed Mahameed <saeed@kernel.org>
> >> Sent: Wednesday, December 7, 2022 2:26 PM
> >> To: Nguyen, Anthony L <anthony.l.nguyen@intel.com>
> >> Cc: davem@davemloft.net; kuba@kernel.org; pabeni@redhat.com;
> >> edumazet@google.com; Ertman, David M <david.m.ertman@intel.com>;
> >> netdev@vger.kernel.org; Saleem, Shiraz <shiraz.saleem@intel.com>;
> Ismail,
> >> Mustafa <mustafa.ismail@intel.com>; jgg@nvidia.com;
> leonro@nvidia.com;
> >> linux-rdma@vger.kernel.org; G, GurucharanX
> <gurucharanx.g@intel.com>
> >> Subject: Re: [PATCH net 2/4] ice: Correctly handle aux device when num
> >> channels change
> >>
> >> On 07 Dec 13:10, Tony Nguyen wrote:
> >> >From: Dave Ertman <david.m.ertman@intel.com>
> >> >
> >> >When the number of channels/queues changes on an interface, it is
> >> necessary
> >> >to change how those resources are distributed to the auxiliary device for
> >> >maintaining RDMA functionality.  To do this, the best way is to unplug,
> and
> >>
> >> Can you please explain how an ethtool can affect RDMA functionality ?
> >> don't you have full bifurcation between the two eth and rdma interfaces
> ..
> >>
> >This patch is to address a bug where the number of queues for the
> interface was
> >changed and the RDMA lost functionality due to queues being re-assigned.
> >
> 
> sounds like an rdma or PF bug.
> 
> >The PF is managing and setting aside resources for the RDMA aux dev. Then
> the
> >RDMA aux driver will request resources from the PF driver.  Changes in
> >the total number of resources make it so that resources previously
> >allocated to RDMA aux driver may not be available any more.  A re-
> allocation
> >is necessary to ensure that RDMA has all of the queues that it thinks it does.
> >
> 
> IMO it's wrong to re-initialize a parallel subsystems due to an ethtool,
> ethtool is meant to control the netdev interface, not rdma. Either
> statically allocate these resources on boot or just store them in a free
> pool in PF until next time rdma reloads by an rdma user command outside of
> netdev.
> 
> >> >then re-plug the auxiliary device.  This will cause all current resource
> >> >allocation to be released, and then re-requested under the new state.
> >> >
> >>
> >> I find this really disruptive, changing number of netdev queues to cause
> >> full aux devs restart !
> >>
> >
> >Changing the number of queues available to the interface *is* a disruptive
> action.
> 
> yes to the netdev, not to rdma or usb, or the pci bus, you get my point.
> 
> >The netdev  and VSI have to be re-configured for queues per TC and the
> RDMA aux
> >driver has to re-allocate qsets to attach queue-pairs to.
> >
> 
> sure for netdev, but it doesn't make sense to reload rdma, if rdma was
> using X queues, it should continue using X queues.. if you can't support
> dynamic netdev changes without disturbing rdma, then block ethtool until
> user unloads rdma.
> 
> >> >Since the set_channel command from ethtool comes in while holding
> the
> >> RTNL
> >> >lock, it is necessary to offset the plugging and unplugging of auxiliary
> >> >device to another context.  For this purpose, set the flags for UNPLUG
> and
> >> >PLUG in the PF state, then respond to them in the service task.
> >> >
> >> >Also, since the auxiliary device will be unplugged/plugged at the end of
> >> >the flow, it is better to not send the event for TCs changing in the
> >> >middle of the flow.  This will prevent a timing issue between the events
> >> >and the probe/release calls conflicting.
> >> >
> >> >Fixes: 348048e724a0 ("ice: Implement iidc operations")
> >> >Signed-off-by: Dave Ertman <david.m.ertman@intel.com>
> >> >Tested-by: Gurucharan G <gurucharanx.g@intel.com> (A Contingent
> >> worker at Intel)
> >> >Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
> >> >---
> >> > drivers/net/ethernet/intel/ice/ice.h         | 2 ++
> >> > drivers/net/ethernet/intel/ice/ice_ethtool.c | 6 ++++++
> >> > drivers/net/ethernet/intel/ice/ice_idc.c     | 3 +++
> >> > drivers/net/ethernet/intel/ice/ice_main.c    | 3 +++
> >> > 4 files changed, 14 insertions(+)
> >> >
> >> >diff --git a/drivers/net/ethernet/intel/ice/ice.h
> >> b/drivers/net/ethernet/intel/ice/ice.h
> >> >index 001500afc4a6..092e572768fe 100644
> >> >--- a/drivers/net/ethernet/intel/ice/ice.h
> >> >+++ b/drivers/net/ethernet/intel/ice/ice.h
> >> >@@ -281,6 +281,7 @@ enum ice_pf_state {
> >> > 	ICE_FLTR_OVERFLOW_PROMISC,
> >> > 	ICE_VF_DIS,
> >> > 	ICE_CFG_BUSY,
> >> >+	ICE_SET_CHANNELS,
> >> > 	ICE_SERVICE_SCHED,
> >> > 	ICE_SERVICE_DIS,
> >> > 	ICE_FD_FLUSH_REQ,
> >> >@@ -485,6 +486,7 @@ enum ice_pf_flags {
> >> > 	ICE_FLAG_VF_VLAN_PRUNING,
> >> > 	ICE_FLAG_LINK_LENIENT_MODE_ENA,
> >> > 	ICE_FLAG_PLUG_AUX_DEV,
> >> >+	ICE_FLAG_UNPLUG_AUX_DEV,
> >> > 	ICE_FLAG_MTU_CHANGED,
> >> > 	ICE_FLAG_GNSS,			/* GNSS successfully
> >> initialized */
> >> > 	ICE_PF_FLAGS_NBITS		/* must be last */
> >> >diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c
> >> b/drivers/net/ethernet/intel/ice/ice_ethtool.c
> >> >index b7be84bbe72d..37e174a19860 100644
> >> >--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
> >> >+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
> >> >@@ -3536,6 +3536,8 @@ static int ice_set_channels(struct net_device
> >> *dev, struct ethtool_channels *ch)
> >> > 		return -EINVAL;
> >> > 	}
> >> >
> >> >+	set_bit(ICE_SET_CHANNELS, pf->state);
> >> >+
> >> > 	ice_vsi_recfg_qs(vsi, new_rx, new_tx);
> >> >
> >> > 	if (!netif_is_rxfh_configured(dev))
> >> >@@ -3543,6 +3545,10 @@ static int ice_set_channels(struct net_device
> >> *dev, struct ethtool_channels *ch)
> >> >
> >> > 	/* Update rss_size due to change in Rx queues */
> >> > 	vsi->rss_size = ice_get_valid_rss_size(&pf->hw, new_rx);
> >> >+	clear_bit(ICE_SET_CHANNELS, pf->state);
> >> >+
> >>
> >> you just set this new state a few lines ago, clearing the bit in the same
> >> function few lines later seems to be an abuse of the pf state machine,
> >> couldn't you just pass a parameter to the functions which needed this
> >> information ?
> >>
> >
> >How is this abusing the PF state machine?  There is a 3 deep function call
> that needs
> >the information that this is a set_channel context, and each of those
> functions is called
> >from several locations - how is changing all of those functions to include a
> parameter
> 
> this is exactly the abuse i was talking about, setting a flag on a global
> state field because the function call is too deep.
> 
> >(that will be false for all of them but this instance) be less abusive than
> setting and
> >clearing a bit?
> 
> this is not future sustainable and not reviewer friendly, it's a local
> parameter and shouldn't be a global flag.
> 
> Anyway this is your driver, i am not going to force you to do something you
> don't like.
> 
> but for the reloading of rdma due to an ethtool is a no for me..
> let's find a common place for all vendors to express such limitations,
> e.g. devlink ..

The set_channels function will cause the VSI controlling the pool of resources that
RDMA uses to be re-allocated ad reconfigured.  So, based on this, I will submit a new patch
to prevent set_channels when RDMA is probed as you suggested.

Please abandon this patch, new one will be submitted.

DaveE



> 
> >
> >> >+	set_bit(ICE_FLAG_UNPLUG_AUX_DEV, pf->flags);
> >> >+	set_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags);
> >> >
> >> > 	return 0;
> >> > }
> >
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 001500afc4a6..092e572768fe 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -281,6 +281,7 @@  enum ice_pf_state {
 	ICE_FLTR_OVERFLOW_PROMISC,
 	ICE_VF_DIS,
 	ICE_CFG_BUSY,
+	ICE_SET_CHANNELS,
 	ICE_SERVICE_SCHED,
 	ICE_SERVICE_DIS,
 	ICE_FD_FLUSH_REQ,
@@ -485,6 +486,7 @@  enum ice_pf_flags {
 	ICE_FLAG_VF_VLAN_PRUNING,
 	ICE_FLAG_LINK_LENIENT_MODE_ENA,
 	ICE_FLAG_PLUG_AUX_DEV,
+	ICE_FLAG_UNPLUG_AUX_DEV,
 	ICE_FLAG_MTU_CHANGED,
 	ICE_FLAG_GNSS,			/* GNSS successfully initialized */
 	ICE_PF_FLAGS_NBITS		/* must be last */
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index b7be84bbe72d..37e174a19860 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -3536,6 +3536,8 @@  static int ice_set_channels(struct net_device *dev, struct ethtool_channels *ch)
 		return -EINVAL;
 	}
 
+	set_bit(ICE_SET_CHANNELS, pf->state);
+
 	ice_vsi_recfg_qs(vsi, new_rx, new_tx);
 
 	if (!netif_is_rxfh_configured(dev))
@@ -3543,6 +3545,10 @@  static int ice_set_channels(struct net_device *dev, struct ethtool_channels *ch)
 
 	/* Update rss_size due to change in Rx queues */
 	vsi->rss_size = ice_get_valid_rss_size(&pf->hw, new_rx);
+	clear_bit(ICE_SET_CHANNELS, pf->state);
+
+	set_bit(ICE_FLAG_UNPLUG_AUX_DEV, pf->flags);
+	set_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c
index 895c32bcc8b5..9bf6fa5ed4c8 100644
--- a/drivers/net/ethernet/intel/ice/ice_idc.c
+++ b/drivers/net/ethernet/intel/ice/ice_idc.c
@@ -37,6 +37,9 @@  void ice_send_event_to_aux(struct ice_pf *pf, struct iidc_event *event)
 	if (WARN_ON_ONCE(!in_task()))
 		return;
 
+	if (test_bit(ICE_SET_CHANNELS, pf->state))
+		return;
+
 	mutex_lock(&pf->adev_mutex);
 	if (!pf->adev)
 		goto finish;
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index d0f14e73e8da..d58f55a72ab3 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2300,6 +2300,9 @@  static void ice_service_task(struct work_struct *work)
 		}
 	}
 
+	if (test_and_clear_bit(ICE_FLAG_UNPLUG_AUX_DEV, pf->flags))
+		ice_unplug_aux_dev(pf);
+
 	if (test_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags)) {
 		/* Plug aux device per request */
 		ice_plug_aux_dev(pf);