diff mbox series

[V4,22/28] PCI: tegra: Access endpoint config only if PCIe link is up

Message ID 20190516055307.25737-23-mmaddireddy@nvidia.com (mailing list archive)
State Superseded, archived
Headers show
Series Enable Tegra PCIe root port features | expand

Commit Message

Manikanta Maddireddy May 16, 2019, 5:53 a.m. UTC
Few endpoints like Wi-Fi supports power on/off and to leverage that
root port must support hot-plug and hot-unplug. Tegra PCIe doesn't
support hot-plug and hot-unplug, however it supports endpoint power
on/off feature as follows,
 - Power off sequence:
   - Transition of PCIe link to L2
   - Power off endpoint
   - Leave root port in power up state with the link in L2
 - Power on sequence:
   - Power on endpoint
   - Apply hot reset to get PCIe link up

PCIe client driver stops accessing PCIe endpoint config and BAR registers
after endpoint is powered off. However, software applications like x11
server or lspci can access endpoint config registers in which case
host controller raises "response decoding" errors. To avoid this scenario,
add PCIe link up check in config read and write callback functions before
accessing endpoint config registers.

Signed-off-by: Manikanta Maddireddy <mmaddireddy@nvidia.com>
---
V4: No change

V3: Update the commit log with explanation for the need of this patch

V2: Change tegra_pcie_link_status() to tegra_pcie_link_up()

 drivers/pci/controller/pci-tegra.c | 38 ++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

Comments

Thierry Reding June 4, 2019, 1:14 p.m. UTC | #1
On Thu, May 16, 2019 at 11:23:01AM +0530, Manikanta Maddireddy wrote:
> Few endpoints like Wi-Fi supports power on/off and to leverage that
> root port must support hot-plug and hot-unplug. Tegra PCIe doesn't
> support hot-plug and hot-unplug, however it supports endpoint power
> on/off feature as follows,
>  - Power off sequence:
>    - Transition of PCIe link to L2
>    - Power off endpoint
>    - Leave root port in power up state with the link in L2
>  - Power on sequence:
>    - Power on endpoint
>    - Apply hot reset to get PCIe link up
> 
> PCIe client driver stops accessing PCIe endpoint config and BAR registers
> after endpoint is powered off. However, software applications like x11
> server or lspci can access endpoint config registers in which case
> host controller raises "response decoding" errors. To avoid this scenario,
> add PCIe link up check in config read and write callback functions before
> accessing endpoint config registers.
> 
> Signed-off-by: Manikanta Maddireddy <mmaddireddy@nvidia.com>
> ---
> V4: No change
> 
> V3: Update the commit log with explanation for the need of this patch
> 
> V2: Change tegra_pcie_link_status() to tegra_pcie_link_up()
> 
>  drivers/pci/controller/pci-tegra.c | 38 ++++++++++++++++++++++++++++++
>  1 file changed, 38 insertions(+)

This still doesn't look right to me conceptually. If somebody wants to
access the PCI devices after the kernel has powered them off, why can't
we just power the devices back on so that we allow userspace to properly
access the devices?

Or if that's not what we want, shouldn't we add something to the core
PCI infrastructure to let us deal with this? It seems like this is some
general problem that would apply to every PCI device and host bridge
driver. Having each driver implement this logic separately doesn't seem
like a good idea to me.

Thierry

> diff --git a/drivers/pci/controller/pci-tegra.c b/drivers/pci/controller/pci-tegra.c
> index d20c88a79e00..33f4dfab9e35 100644
> --- a/drivers/pci/controller/pci-tegra.c
> +++ b/drivers/pci/controller/pci-tegra.c
> @@ -428,6 +428,14 @@ static inline u32 pads_readl(struct tegra_pcie *pcie, unsigned long offset)
>  	return readl(pcie->pads + offset);
>  }
>  
> +static bool tegra_pcie_link_up(struct tegra_pcie_port *port)
> +{
> +	u32 value;
> +
> +	value = readl(port->base + RP_LINK_CONTROL_STATUS);
> +	return !!(value & RP_LINK_CONTROL_STATUS_DL_LINK_ACTIVE);
> +}
> +
>  /*
>   * The configuration space mapping on Tegra is somewhat similar to the ECAM
>   * defined by PCIe. However it deviates a bit in how the 4 bits for extended
> @@ -493,20 +501,50 @@ static void __iomem *tegra_pcie_map_bus(struct pci_bus *bus,
>  static int tegra_pcie_config_read(struct pci_bus *bus, unsigned int devfn,
>  				  int where, int size, u32 *value)
>  {
> +	struct tegra_pcie *pcie = bus->sysdata;
> +	struct pci_dev *bridge;
> +	struct tegra_pcie_port *port;
> +
>  	if (bus->number == 0)
>  		return pci_generic_config_read32(bus, devfn, where, size,
>  						 value);
>  
> +	bridge = pcie_find_root_port(bus->self);
> +
> +	list_for_each_entry(port, &pcie->ports, list)
> +		if (port->index + 1 == PCI_SLOT(bridge->devfn))
> +			break;
> +
> +	/* If there is no link, then there is no device */
> +	if (!tegra_pcie_link_up(port)) {
> +		*value = 0xffffffff;
> +		return PCIBIOS_DEVICE_NOT_FOUND;
> +	}
> +
>  	return pci_generic_config_read(bus, devfn, where, size, value);
>  }
>  
>  static int tegra_pcie_config_write(struct pci_bus *bus, unsigned int devfn,
>  				   int where, int size, u32 value)
>  {
> +	struct tegra_pcie *pcie = bus->sysdata;
> +	struct tegra_pcie_port *port;
> +	struct pci_dev *bridge;
> +
>  	if (bus->number == 0)
>  		return pci_generic_config_write32(bus, devfn, where, size,
>  						  value);
>  
> +	bridge = pcie_find_root_port(bus->self);
> +
> +	list_for_each_entry(port, &pcie->ports, list)
> +		if (port->index + 1 == PCI_SLOT(bridge->devfn))
> +			break;
> +
> +	/* If there is no link, then there is no device */
> +	if (!tegra_pcie_link_up(port))
> +		return PCIBIOS_DEVICE_NOT_FOUND;
> +
>  	return pci_generic_config_write(bus, devfn, where, size, value);
>  }
>  
> -- 
> 2.17.1
>
Manikanta Maddireddy June 4, 2019, 2:10 p.m. UTC | #2
On 04-Jun-19 6:44 PM, Thierry Reding wrote:
> On Thu, May 16, 2019 at 11:23:01AM +0530, Manikanta Maddireddy wrote:
>> Few endpoints like Wi-Fi supports power on/off and to leverage that
>> root port must support hot-plug and hot-unplug. Tegra PCIe doesn't
>> support hot-plug and hot-unplug, however it supports endpoint power
>> on/off feature as follows,
>>  - Power off sequence:
>>    - Transition of PCIe link to L2
>>    - Power off endpoint
>>    - Leave root port in power up state with the link in L2
>>  - Power on sequence:
>>    - Power on endpoint
>>    - Apply hot reset to get PCIe link up
>>
>> PCIe client driver stops accessing PCIe endpoint config and BAR registers
>> after endpoint is powered off. However, software applications like x11
>> server or lspci can access endpoint config registers in which case
>> host controller raises "response decoding" errors. To avoid this scenario,
>> add PCIe link up check in config read and write callback functions before
>> accessing endpoint config registers.
>>
>> Signed-off-by: Manikanta Maddireddy <mmaddireddy@nvidia.com>
>> ---
>> V4: No change
>>
>> V3: Update the commit log with explanation for the need of this patch
>>
>> V2: Change tegra_pcie_link_status() to tegra_pcie_link_up()
>>
>>  drivers/pci/controller/pci-tegra.c | 38 ++++++++++++++++++++++++++++++
>>  1 file changed, 38 insertions(+)
> This still doesn't look right to me conceptually. If somebody wants to
> access the PCI devices after the kernel has powered them off, why can't
> we just power the devices back on so that we allow userspace to properly
> access the devices?

1. WiFi devices provides power-off feature for power saving in mobiles.
When WiFi is turned off we shouldn't power on the HW back without user
turning it back on.
2. When ever user process tries to access config space, it'll end up
in these functions. We cannot have is_powered_on check in config read/write
callbacks.
3. WiFi power on/off is device specific feature, we shouldn't handle it
in PCI subsystem or host controller driver.

>
> Or if that's not what we want, shouldn't we add something to the core
> PCI infrastructure to let us deal with this? It seems like this is some
> general problem that would apply to every PCI device and host bridge
> driver. Having each driver implement this logic separately doesn't seem
> like a good idea to me.
>
> Thierry

This should be handled by hotplug feature, whenever endpoint is powered-off/
removed from the slot, hot unplug event should take care of it. Unfortunately
Tegra PCIe doesn't support hotplug feature.

Manikanta

>> diff --git a/drivers/pci/controller/pci-tegra.c b/drivers/pci/controller/pci-tegra.c
>> index d20c88a79e00..33f4dfab9e35 100644
>> --- a/drivers/pci/controller/pci-tegra.c
>> +++ b/drivers/pci/controller/pci-tegra.c
>> @@ -428,6 +428,14 @@ static inline u32 pads_readl(struct tegra_pcie *pcie, unsigned long offset)
>>  	return readl(pcie->pads + offset);
>>  }
>>  
>> +static bool tegra_pcie_link_up(struct tegra_pcie_port *port)
>> +{
>> +	u32 value;
>> +
>> +	value = readl(port->base + RP_LINK_CONTROL_STATUS);
>> +	return !!(value & RP_LINK_CONTROL_STATUS_DL_LINK_ACTIVE);
>> +}
>> +
>>  /*
>>   * The configuration space mapping on Tegra is somewhat similar to the ECAM
>>   * defined by PCIe. However it deviates a bit in how the 4 bits for extended
>> @@ -493,20 +501,50 @@ static void __iomem *tegra_pcie_map_bus(struct pci_bus *bus,
>>  static int tegra_pcie_config_read(struct pci_bus *bus, unsigned int devfn,
>>  				  int where, int size, u32 *value)
>>  {
>> +	struct tegra_pcie *pcie = bus->sysdata;
>> +	struct pci_dev *bridge;
>> +	struct tegra_pcie_port *port;
>> +
>>  	if (bus->number == 0)
>>  		return pci_generic_config_read32(bus, devfn, where, size,
>>  						 value);
>>  
>> +	bridge = pcie_find_root_port(bus->self);
>> +
>> +	list_for_each_entry(port, &pcie->ports, list)
>> +		if (port->index + 1 == PCI_SLOT(bridge->devfn))
>> +			break;
>> +
>> +	/* If there is no link, then there is no device */
>> +	if (!tegra_pcie_link_up(port)) {
>> +		*value = 0xffffffff;
>> +		return PCIBIOS_DEVICE_NOT_FOUND;
>> +	}
>> +
>>  	return pci_generic_config_read(bus, devfn, where, size, value);
>>  }
>>  
>>  static int tegra_pcie_config_write(struct pci_bus *bus, unsigned int devfn,
>>  				   int where, int size, u32 value)
>>  {
>> +	struct tegra_pcie *pcie = bus->sysdata;
>> +	struct tegra_pcie_port *port;
>> +	struct pci_dev *bridge;
>> +
>>  	if (bus->number == 0)
>>  		return pci_generic_config_write32(bus, devfn, where, size,
>>  						  value);
>>  
>> +	bridge = pcie_find_root_port(bus->self);
>> +
>> +	list_for_each_entry(port, &pcie->ports, list)
>> +		if (port->index + 1 == PCI_SLOT(bridge->devfn))
>> +			break;
>> +
>> +	/* If there is no link, then there is no device */
>> +	if (!tegra_pcie_link_up(port))
>> +		return PCIBIOS_DEVICE_NOT_FOUND;
>> +
>>  	return pci_generic_config_write(bus, devfn, where, size, value);
>>  }
>>  
>> -- 
>> 2.17.1
>>
Manikanta Maddireddy June 10, 2019, 4:38 a.m. UTC | #3
On 04-Jun-19 7:40 PM, Manikanta Maddireddy wrote:
>
> On 04-Jun-19 6:44 PM, Thierry Reding wrote:
>> On Thu, May 16, 2019 at 11:23:01AM +0530, Manikanta Maddireddy wrote:
>>> Few endpoints like Wi-Fi supports power on/off and to leverage that
>>> root port must support hot-plug and hot-unplug. Tegra PCIe doesn't
>>> support hot-plug and hot-unplug, however it supports endpoint power
>>> on/off feature as follows,
>>>  - Power off sequence:
>>>    - Transition of PCIe link to L2
>>>    - Power off endpoint
>>>    - Leave root port in power up state with the link in L2
>>>  - Power on sequence:
>>>    - Power on endpoint
>>>    - Apply hot reset to get PCIe link up
>>>
>>> PCIe client driver stops accessing PCIe endpoint config and BAR registers
>>> after endpoint is powered off. However, software applications like x11
>>> server or lspci can access endpoint config registers in which case
>>> host controller raises "response decoding" errors. To avoid this scenario,
>>> add PCIe link up check in config read and write callback functions before
>>> accessing endpoint config registers.
>>>
>>> Signed-off-by: Manikanta Maddireddy <mmaddireddy@nvidia.com>
>>> ---
>>> V4: No change
>>>
>>> V3: Update the commit log with explanation for the need of this patch
>>>
>>> V2: Change tegra_pcie_link_status() to tegra_pcie_link_up()
>>>
>>>  drivers/pci/controller/pci-tegra.c | 38 ++++++++++++++++++++++++++++++
>>>  1 file changed, 38 insertions(+)
>> This still doesn't look right to me conceptually. If somebody wants to
>> access the PCI devices after the kernel has powered them off, why can't
>> we just power the devices back on so that we allow userspace to properly
>> access the devices?
> 1. WiFi devices provides power-off feature for power saving in mobiles.
> When WiFi is turned off we shouldn't power on the HW back without user
> turning it back on.
> 2. When ever user process tries to access config space, it'll end up
> in these functions. We cannot have is_powered_on check in config read/write
> callbacks.
> 3. WiFi power on/off is device specific feature, we shouldn't handle it
> in PCI subsystem or host controller driver.
>
>> Or if that's not what we want, shouldn't we add something to the core
>> PCI infrastructure to let us deal with this? It seems like this is some
>> general problem that would apply to every PCI device and host bridge
>> driver. Having each driver implement this logic separately doesn't seem
>> like a good idea to me.
>>
>> Thierry
> This should be handled by hotplug feature, whenever endpoint is powered-off/
> removed from the slot, hot unplug event should take care of it. Unfortunately
> Tegra PCIe doesn't support hotplug feature.
>
> Manikanta

Hi Bjorn,

I thought about your comment in https://patchwork.ozlabs.org/patch/1084204/ again.
What if I add link up check in tegra_pcie_isr() and make "response decoding error"
as debug print? EP Config access will happen when link is down, but "Response
decoding error" print comes only if debug log is enabled. This way we can avoid
race issue in config accessors and we get prints when debug logs are enabled.

Thierry,
Please share your inputs as well.

Manikanta
 

>>> diff --git a/drivers/pci/controller/pci-tegra.c b/drivers/pci/controller/pci-tegra.c
>>> index d20c88a79e00..33f4dfab9e35 100644
>>> --- a/drivers/pci/controller/pci-tegra.c
>>> +++ b/drivers/pci/controller/pci-tegra.c
>>> @@ -428,6 +428,14 @@ static inline u32 pads_readl(struct tegra_pcie *pcie, unsigned long offset)
>>>  	return readl(pcie->pads + offset);
>>>  }
>>>  
>>> +static bool tegra_pcie_link_up(struct tegra_pcie_port *port)
>>> +{
>>> +	u32 value;
>>> +
>>> +	value = readl(port->base + RP_LINK_CONTROL_STATUS);
>>> +	return !!(value & RP_LINK_CONTROL_STATUS_DL_LINK_ACTIVE);
>>> +}
>>> +
>>>  /*
>>>   * The configuration space mapping on Tegra is somewhat similar to the ECAM
>>>   * defined by PCIe. However it deviates a bit in how the 4 bits for extended
>>> @@ -493,20 +501,50 @@ static void __iomem *tegra_pcie_map_bus(struct pci_bus *bus,
>>>  static int tegra_pcie_config_read(struct pci_bus *bus, unsigned int devfn,
>>>  				  int where, int size, u32 *value)
>>>  {
>>> +	struct tegra_pcie *pcie = bus->sysdata;
>>> +	struct pci_dev *bridge;
>>> +	struct tegra_pcie_port *port;
>>> +
>>>  	if (bus->number == 0)
>>>  		return pci_generic_config_read32(bus, devfn, where, size,
>>>  						 value);
>>>  
>>> +	bridge = pcie_find_root_port(bus->self);
>>> +
>>> +	list_for_each_entry(port, &pcie->ports, list)
>>> +		if (port->index + 1 == PCI_SLOT(bridge->devfn))
>>> +			break;
>>> +
>>> +	/* If there is no link, then there is no device */
>>> +	if (!tegra_pcie_link_up(port)) {
>>> +		*value = 0xffffffff;
>>> +		return PCIBIOS_DEVICE_NOT_FOUND;
>>> +	}
>>> +
>>>  	return pci_generic_config_read(bus, devfn, where, size, value);
>>>  }
>>>  
>>>  static int tegra_pcie_config_write(struct pci_bus *bus, unsigned int devfn,
>>>  				   int where, int size, u32 value)
>>>  {
>>> +	struct tegra_pcie *pcie = bus->sysdata;
>>> +	struct tegra_pcie_port *port;
>>> +	struct pci_dev *bridge;
>>> +
>>>  	if (bus->number == 0)
>>>  		return pci_generic_config_write32(bus, devfn, where, size,
>>>  						  value);
>>>  
>>> +	bridge = pcie_find_root_port(bus->self);
>>> +
>>> +	list_for_each_entry(port, &pcie->ports, list)
>>> +		if (port->index + 1 == PCI_SLOT(bridge->devfn))
>>> +			break;
>>> +
>>> +	/* If there is no link, then there is no device */
>>> +	if (!tegra_pcie_link_up(port))
>>> +		return PCIBIOS_DEVICE_NOT_FOUND;
>>> +
>>>  	return pci_generic_config_write(bus, devfn, where, size, value);
>>>  }
>>>  
>>> -- 
>>> 2.17.1
>>>
Lorenzo Pieralisi June 13, 2019, 2:39 p.m. UTC | #4
On Mon, Jun 10, 2019 at 10:08:16AM +0530, Manikanta Maddireddy wrote:
> 
> 
> On 04-Jun-19 7:40 PM, Manikanta Maddireddy wrote:
> >
> > On 04-Jun-19 6:44 PM, Thierry Reding wrote:
> >> On Thu, May 16, 2019 at 11:23:01AM +0530, Manikanta Maddireddy wrote:
> >>> Few endpoints like Wi-Fi supports power on/off and to leverage that
> >>> root port must support hot-plug and hot-unplug. Tegra PCIe doesn't
> >>> support hot-plug and hot-unplug, however it supports endpoint power
> >>> on/off feature as follows,
> >>>  - Power off sequence:
> >>>    - Transition of PCIe link to L2
> >>>    - Power off endpoint
> >>>    - Leave root port in power up state with the link in L2
> >>>  - Power on sequence:
> >>>    - Power on endpoint
> >>>    - Apply hot reset to get PCIe link up
> >>>
> >>> PCIe client driver stops accessing PCIe endpoint config and BAR registers
> >>> after endpoint is powered off. However, software applications like x11
> >>> server or lspci can access endpoint config registers in which case
> >>> host controller raises "response decoding" errors. To avoid this scenario,
> >>> add PCIe link up check in config read and write callback functions before
> >>> accessing endpoint config registers.
> >>>
> >>> Signed-off-by: Manikanta Maddireddy <mmaddireddy@nvidia.com>
> >>> ---
> >>> V4: No change
> >>>
> >>> V3: Update the commit log with explanation for the need of this patch
> >>>
> >>> V2: Change tegra_pcie_link_status() to tegra_pcie_link_up()
> >>>
> >>>  drivers/pci/controller/pci-tegra.c | 38 ++++++++++++++++++++++++++++++
> >>>  1 file changed, 38 insertions(+)
> >> This still doesn't look right to me conceptually. If somebody wants to
> >> access the PCI devices after the kernel has powered them off, why can't
> >> we just power the devices back on so that we allow userspace to properly
> >> access the devices?
> > 1. WiFi devices provides power-off feature for power saving in mobiles.
> > When WiFi is turned off we shouldn't power on the HW back without user
> > turning it back on.
> > 2. When ever user process tries to access config space, it'll end up
> > in these functions. We cannot have is_powered_on check in config read/write
> > callbacks.
> > 3. WiFi power on/off is device specific feature, we shouldn't handle it
> > in PCI subsystem or host controller driver.
> >
> >> Or if that's not what we want, shouldn't we add something to the core
> >> PCI infrastructure to let us deal with this? It seems like this is some
> >> general problem that would apply to every PCI device and host bridge
> >> driver. Having each driver implement this logic separately doesn't seem
> >> like a good idea to me.
> >>
> >> Thierry
> > This should be handled by hotplug feature, whenever endpoint is powered-off/
> > removed from the slot, hot unplug event should take care of it. Unfortunately
> > Tegra PCIe doesn't support hotplug feature.
> >
> > Manikanta
> 
> Hi Bjorn,
> 
> I thought about your comment in
> https://patchwork.ozlabs.org/patch/1084204/ again.  What if I add link
> up check in tegra_pcie_isr() and make "response decoding error" as
> debug print? EP Config access will happen when link is down, but
> "Response decoding error" print comes only if debug log is enabled.
> This way we can avoid race issue in config accessors and we get prints
> when debug logs are enabled.

I still do not see what you are actually solving. This patch should
be dropped.

Thanks,
Lorenzo

> Thierry,
> Please share your inputs as well.
> 
> Manikanta
>  
> 
> >>> diff --git a/drivers/pci/controller/pci-tegra.c b/drivers/pci/controller/pci-tegra.c
> >>> index d20c88a79e00..33f4dfab9e35 100644
> >>> --- a/drivers/pci/controller/pci-tegra.c
> >>> +++ b/drivers/pci/controller/pci-tegra.c
> >>> @@ -428,6 +428,14 @@ static inline u32 pads_readl(struct tegra_pcie *pcie, unsigned long offset)
> >>>  	return readl(pcie->pads + offset);
> >>>  }
> >>>  
> >>> +static bool tegra_pcie_link_up(struct tegra_pcie_port *port)
> >>> +{
> >>> +	u32 value;
> >>> +
> >>> +	value = readl(port->base + RP_LINK_CONTROL_STATUS);
> >>> +	return !!(value & RP_LINK_CONTROL_STATUS_DL_LINK_ACTIVE);
> >>> +}
> >>> +
> >>>  /*
> >>>   * The configuration space mapping on Tegra is somewhat similar to the ECAM
> >>>   * defined by PCIe. However it deviates a bit in how the 4 bits for extended
> >>> @@ -493,20 +501,50 @@ static void __iomem *tegra_pcie_map_bus(struct pci_bus *bus,
> >>>  static int tegra_pcie_config_read(struct pci_bus *bus, unsigned int devfn,
> >>>  				  int where, int size, u32 *value)
> >>>  {
> >>> +	struct tegra_pcie *pcie = bus->sysdata;
> >>> +	struct pci_dev *bridge;
> >>> +	struct tegra_pcie_port *port;
> >>> +
> >>>  	if (bus->number == 0)
> >>>  		return pci_generic_config_read32(bus, devfn, where, size,
> >>>  						 value);
> >>>  
> >>> +	bridge = pcie_find_root_port(bus->self);
> >>> +
> >>> +	list_for_each_entry(port, &pcie->ports, list)
> >>> +		if (port->index + 1 == PCI_SLOT(bridge->devfn))
> >>> +			break;
> >>> +
> >>> +	/* If there is no link, then there is no device */
> >>> +	if (!tegra_pcie_link_up(port)) {
> >>> +		*value = 0xffffffff;
> >>> +		return PCIBIOS_DEVICE_NOT_FOUND;
> >>> +	}
> >>> +
> >>>  	return pci_generic_config_read(bus, devfn, where, size, value);
> >>>  }
> >>>  
> >>>  static int tegra_pcie_config_write(struct pci_bus *bus, unsigned int devfn,
> >>>  				   int where, int size, u32 value)
> >>>  {
> >>> +	struct tegra_pcie *pcie = bus->sysdata;
> >>> +	struct tegra_pcie_port *port;
> >>> +	struct pci_dev *bridge;
> >>> +
> >>>  	if (bus->number == 0)
> >>>  		return pci_generic_config_write32(bus, devfn, where, size,
> >>>  						  value);
> >>>  
> >>> +	bridge = pcie_find_root_port(bus->self);
> >>> +
> >>> +	list_for_each_entry(port, &pcie->ports, list)
> >>> +		if (port->index + 1 == PCI_SLOT(bridge->devfn))
> >>> +			break;
> >>> +
> >>> +	/* If there is no link, then there is no device */
> >>> +	if (!tegra_pcie_link_up(port))
> >>> +		return PCIBIOS_DEVICE_NOT_FOUND;
> >>> +
> >>>  	return pci_generic_config_write(bus, devfn, where, size, value);
> >>>  }
> >>>  
> >>> -- 
> >>> 2.17.1
> >>>
>
Thierry Reding June 13, 2019, 3:42 p.m. UTC | #5
On Thu, Jun 13, 2019 at 03:39:46PM +0100, Lorenzo Pieralisi wrote:
> On Mon, Jun 10, 2019 at 10:08:16AM +0530, Manikanta Maddireddy wrote:
> > 
> > 
> > On 04-Jun-19 7:40 PM, Manikanta Maddireddy wrote:
> > >
> > > On 04-Jun-19 6:44 PM, Thierry Reding wrote:
> > >> On Thu, May 16, 2019 at 11:23:01AM +0530, Manikanta Maddireddy wrote:
> > >>> Few endpoints like Wi-Fi supports power on/off and to leverage that
> > >>> root port must support hot-plug and hot-unplug. Tegra PCIe doesn't
> > >>> support hot-plug and hot-unplug, however it supports endpoint power
> > >>> on/off feature as follows,
> > >>>  - Power off sequence:
> > >>>    - Transition of PCIe link to L2
> > >>>    - Power off endpoint
> > >>>    - Leave root port in power up state with the link in L2
> > >>>  - Power on sequence:
> > >>>    - Power on endpoint
> > >>>    - Apply hot reset to get PCIe link up
> > >>>
> > >>> PCIe client driver stops accessing PCIe endpoint config and BAR registers
> > >>> after endpoint is powered off. However, software applications like x11
> > >>> server or lspci can access endpoint config registers in which case
> > >>> host controller raises "response decoding" errors. To avoid this scenario,
> > >>> add PCIe link up check in config read and write callback functions before
> > >>> accessing endpoint config registers.
> > >>>
> > >>> Signed-off-by: Manikanta Maddireddy <mmaddireddy@nvidia.com>
> > >>> ---
> > >>> V4: No change
> > >>>
> > >>> V3: Update the commit log with explanation for the need of this patch
> > >>>
> > >>> V2: Change tegra_pcie_link_status() to tegra_pcie_link_up()
> > >>>
> > >>>  drivers/pci/controller/pci-tegra.c | 38 ++++++++++++++++++++++++++++++
> > >>>  1 file changed, 38 insertions(+)
> > >> This still doesn't look right to me conceptually. If somebody wants to
> > >> access the PCI devices after the kernel has powered them off, why can't
> > >> we just power the devices back on so that we allow userspace to properly
> > >> access the devices?
> > > 1. WiFi devices provides power-off feature for power saving in mobiles.
> > > When WiFi is turned off we shouldn't power on the HW back without user
> > > turning it back on.
> > > 2. When ever user process tries to access config space, it'll end up
> > > in these functions. We cannot have is_powered_on check in config read/write
> > > callbacks.
> > > 3. WiFi power on/off is device specific feature, we shouldn't handle it
> > > in PCI subsystem or host controller driver.
> > >
> > >> Or if that's not what we want, shouldn't we add something to the core
> > >> PCI infrastructure to let us deal with this? It seems like this is some
> > >> general problem that would apply to every PCI device and host bridge
> > >> driver. Having each driver implement this logic separately doesn't seem
> > >> like a good idea to me.
> > >>
> > >> Thierry
> > > This should be handled by hotplug feature, whenever endpoint is powered-off/
> > > removed from the slot, hot unplug event should take care of it. Unfortunately
> > > Tegra PCIe doesn't support hotplug feature.
> > >
> > > Manikanta
> > 
> > Hi Bjorn,
> > 
> > I thought about your comment in
> > https://patchwork.ozlabs.org/patch/1084204/ again.  What if I add link
> > up check in tegra_pcie_isr() and make "response decoding error" as
> > debug print? EP Config access will happen when link is down, but
> > "Response decoding error" print comes only if debug log is enabled.
> > This way we can avoid race issue in config accessors and we get prints
> > when debug logs are enabled.
> 
> I still do not see what you are actually solving. This patch should
> be dropped.

The problem that Manikanta is trying to solve here occurs in this
situation (Manikanta, correct me if I've got this wrong): on some
setups, a WiFi module connected over PCI will toggle a power GPIO as
part of runtime suspend. This effectively causes the module to disappear
from the PCI bus (i.e. it can no longer be accessed until the power GPIO
is toggled again).

This is fine from a kernel point of view because the kernel keeps track
of what devices are suspended. However, userspace will occasionally try
to read the configuration space access of all devices, and since it
doesn't have any knowledge about the suspend state of these devices, it
doesn't know which ones to leave alone. I think this happens when the
X.Org server is running.

One thing that Manikanta and I had discussed was that perhaps the device
should be hot-unplugged when it goes into this low-power state. However,
we don't support hotplug on Tegra210 where this is needed, so we'd need
some sort of software-induced hot-unplug. However, this low power state
is entered when the WiFi interface is taken down (i.e. ip link set dev
<interface> down). If we were to remove the PCI device in that case, it
means that the interface goes away completely, which is completely
unexpected from a user's perspective. After all, taking a link down and
up may be something that scripts are doing all the time. They'd fall
over if after taking the interface down, the interface completely
disappears.

It's also not entirely clear to me how we get the device back onto the
bus again after it is in low power. If we hot-unplug the device, then
the driver will be unbound. Presumably the driver is what's controlling
the power GPIO, so there won't be any entity that can be used to bring
the chip back to life. Unless we deal with that power GPIO elsewhere
(rfkill switch perhaps?).

Perhaps one other way to deal with this would be to track the suspend
state of devices and then have the code that implements the PCI access
from userspace refuse accesses to devices that are asleep. I suppose
this is somewhat of an odd use-case because traditionally I guess PCI
devices never power down to a state where their configuration space can
no longer be accessed. At least that's what would explain why this has
never been an issue before. Or perhaps it has?

The last resort would be to just never put the WiFi chip into that low
power mode, though I'm not exactly sure what that means for the power
consumption on the affected systems.

Manikanta, can you fill in some of the blanks above?

Thierry

> > Thierry,
> > Please share your inputs as well.
> > 
> > Manikanta
> >  
> > 
> > >>> diff --git a/drivers/pci/controller/pci-tegra.c b/drivers/pci/controller/pci-tegra.c
> > >>> index d20c88a79e00..33f4dfab9e35 100644
> > >>> --- a/drivers/pci/controller/pci-tegra.c
> > >>> +++ b/drivers/pci/controller/pci-tegra.c
> > >>> @@ -428,6 +428,14 @@ static inline u32 pads_readl(struct tegra_pcie *pcie, unsigned long offset)
> > >>>  	return readl(pcie->pads + offset);
> > >>>  }
> > >>>  
> > >>> +static bool tegra_pcie_link_up(struct tegra_pcie_port *port)
> > >>> +{
> > >>> +	u32 value;
> > >>> +
> > >>> +	value = readl(port->base + RP_LINK_CONTROL_STATUS);
> > >>> +	return !!(value & RP_LINK_CONTROL_STATUS_DL_LINK_ACTIVE);
> > >>> +}
> > >>> +
> > >>>  /*
> > >>>   * The configuration space mapping on Tegra is somewhat similar to the ECAM
> > >>>   * defined by PCIe. However it deviates a bit in how the 4 bits for extended
> > >>> @@ -493,20 +501,50 @@ static void __iomem *tegra_pcie_map_bus(struct pci_bus *bus,
> > >>>  static int tegra_pcie_config_read(struct pci_bus *bus, unsigned int devfn,
> > >>>  				  int where, int size, u32 *value)
> > >>>  {
> > >>> +	struct tegra_pcie *pcie = bus->sysdata;
> > >>> +	struct pci_dev *bridge;
> > >>> +	struct tegra_pcie_port *port;
> > >>> +
> > >>>  	if (bus->number == 0)
> > >>>  		return pci_generic_config_read32(bus, devfn, where, size,
> > >>>  						 value);
> > >>>  
> > >>> +	bridge = pcie_find_root_port(bus->self);
> > >>> +
> > >>> +	list_for_each_entry(port, &pcie->ports, list)
> > >>> +		if (port->index + 1 == PCI_SLOT(bridge->devfn))
> > >>> +			break;
> > >>> +
> > >>> +	/* If there is no link, then there is no device */
> > >>> +	if (!tegra_pcie_link_up(port)) {
> > >>> +		*value = 0xffffffff;
> > >>> +		return PCIBIOS_DEVICE_NOT_FOUND;
> > >>> +	}
> > >>> +
> > >>>  	return pci_generic_config_read(bus, devfn, where, size, value);
> > >>>  }
> > >>>  
> > >>>  static int tegra_pcie_config_write(struct pci_bus *bus, unsigned int devfn,
> > >>>  				   int where, int size, u32 value)
> > >>>  {
> > >>> +	struct tegra_pcie *pcie = bus->sysdata;
> > >>> +	struct tegra_pcie_port *port;
> > >>> +	struct pci_dev *bridge;
> > >>> +
> > >>>  	if (bus->number == 0)
> > >>>  		return pci_generic_config_write32(bus, devfn, where, size,
> > >>>  						  value);
> > >>>  
> > >>> +	bridge = pcie_find_root_port(bus->self);
> > >>> +
> > >>> +	list_for_each_entry(port, &pcie->ports, list)
> > >>> +		if (port->index + 1 == PCI_SLOT(bridge->devfn))
> > >>> +			break;
> > >>> +
> > >>> +	/* If there is no link, then there is no device */
> > >>> +	if (!tegra_pcie_link_up(port))
> > >>> +		return PCIBIOS_DEVICE_NOT_FOUND;
> > >>> +
> > >>>  	return pci_generic_config_write(bus, devfn, where, size, value);
> > >>>  }
> > >>>  
> > >>> -- 
> > >>> 2.17.1
> > >>>
> >
Manikanta Maddireddy June 17, 2019, 10:01 a.m. UTC | #6
On 13-Jun-19 9:12 PM, Thierry Reding wrote:
> On Thu, Jun 13, 2019 at 03:39:46PM +0100, Lorenzo Pieralisi wrote:
>> On Mon, Jun 10, 2019 at 10:08:16AM +0530, Manikanta Maddireddy wrote:
>>>
>>> On 04-Jun-19 7:40 PM, Manikanta Maddireddy wrote:
>>>> On 04-Jun-19 6:44 PM, Thierry Reding wrote:
>>>>> On Thu, May 16, 2019 at 11:23:01AM +0530, Manikanta Maddireddy wrote:
>>>>>> Few endpoints like Wi-Fi supports power on/off and to leverage that
>>>>>> root port must support hot-plug and hot-unplug. Tegra PCIe doesn't
>>>>>> support hot-plug and hot-unplug, however it supports endpoint power
>>>>>> on/off feature as follows,
>>>>>>  - Power off sequence:
>>>>>>    - Transition of PCIe link to L2
>>>>>>    - Power off endpoint
>>>>>>    - Leave root port in power up state with the link in L2
>>>>>>  - Power on sequence:
>>>>>>    - Power on endpoint
>>>>>>    - Apply hot reset to get PCIe link up
>>>>>>
>>>>>> PCIe client driver stops accessing PCIe endpoint config and BAR registers
>>>>>> after endpoint is powered off. However, software applications like x11
>>>>>> server or lspci can access endpoint config registers in which case
>>>>>> host controller raises "response decoding" errors. To avoid this scenario,
>>>>>> add PCIe link up check in config read and write callback functions before
>>>>>> accessing endpoint config registers.
>>>>>>
>>>>>> Signed-off-by: Manikanta Maddireddy <mmaddireddy@nvidia.com>
>>>>>> ---
>>>>>> V4: No change
>>>>>>
>>>>>> V3: Update the commit log with explanation for the need of this patch
>>>>>>
>>>>>> V2: Change tegra_pcie_link_status() to tegra_pcie_link_up()
>>>>>>
>>>>>>  drivers/pci/controller/pci-tegra.c | 38 ++++++++++++++++++++++++++++++
>>>>>>  1 file changed, 38 insertions(+)
>>>>> This still doesn't look right to me conceptually. If somebody wants to
>>>>> access the PCI devices after the kernel has powered them off, why can't
>>>>> we just power the devices back on so that we allow userspace to properly
>>>>> access the devices?
>>>> 1. WiFi devices provides power-off feature for power saving in mobiles.
>>>> When WiFi is turned off we shouldn't power on the HW back without user
>>>> turning it back on.
>>>> 2. When ever user process tries to access config space, it'll end up
>>>> in these functions. We cannot have is_powered_on check in config read/write
>>>> callbacks.
>>>> 3. WiFi power on/off is device specific feature, we shouldn't handle it
>>>> in PCI subsystem or host controller driver.
>>>>
>>>>> Or if that's not what we want, shouldn't we add something to the core
>>>>> PCI infrastructure to let us deal with this? It seems like this is some
>>>>> general problem that would apply to every PCI device and host bridge
>>>>> driver. Having each driver implement this logic separately doesn't seem
>>>>> like a good idea to me.
>>>>>
>>>>> Thierry
>>>> This should be handled by hotplug feature, whenever endpoint is powered-off/
>>>> removed from the slot, hot unplug event should take care of it. Unfortunately
>>>> Tegra PCIe doesn't support hotplug feature.
>>>>
>>>> Manikanta
>>> Hi Bjorn,
>>>
>>> I thought about your comment in
>>> https://patchwork.ozlabs.org/patch/1084204/ again.  What if I add link
>>> up check in tegra_pcie_isr() and make "response decoding error" as
>>> debug print? EP Config access will happen when link is down, but
>>> "Response decoding error" print comes only if debug log is enabled.
>>> This way we can avoid race issue in config accessors and we get prints
>>> when debug logs are enabled.
>> I still do not see what you are actually solving. This patch should
>> be dropped.
> The problem that Manikanta is trying to solve here occurs in this
> situation (Manikanta, correct me if I've got this wrong): on some
> setups, a WiFi module connected over PCI will toggle a power GPIO as
> part of runtime suspend. This effectively causes the module to disappear
> from the PCI bus (i.e. it can no longer be accessed until the power GPIO
> is toggled again).

GPIO is toggled as part of WiFi on/off, can be triggered from network manager UI.

>
> This is fine from a kernel point of view because the kernel keeps track
> of what devices are suspended. However, userspace will occasionally try
> to read the configuration space access of all devices, and since it
> doesn't have any knowledge about the suspend state of these devices, it
> doesn't know which ones to leave alone. I think this happens when the
> X.Org server is running.

This is fine from a kernel point of view because PCI client driver
doesn't initiate any PCIe transaction until network interface
is up during WiFi on.

>
> One thing that Manikanta and I had discussed was that perhaps the device
> should be hot-unplugged when it goes into this low-power state. However,
> we don't support hotplug on Tegra210 where this is needed, so we'd need
> some sort of software-induced hot-unplug. However, this low power state
> is entered when the WiFi interface is taken down (i.e. ip link set dev
> <interface> down). If we were to remove the PCI device in that case, it
> means that the interface goes away completely, which is completely
> unexpected from a user's perspective. After all, taking a link down and
> up may be something that scripts are doing all the time. They'd fall
> over if after taking the interface down, the interface completely
> disappears.
>
> It's also not entirely clear to me how we get the device back onto the
> bus again after it is in low power. If we hot-unplug the device, then
> the driver will be unbound. Presumably the driver is what's controlling
> the power GPIO, so there won't be any entity that can be used to bring
> the chip back to life. Unless we deal with that power GPIO elsewhere
> (rfkill switch perhaps?).

Correct, rfkill switch should handle the GPIO.
Sequence will be,
 - WiFi ON
   - rfkill switch enables the WiFi GPIO
   - Tegra PCIe receives hot plug event
   - Tegra PCIe hot plug driver rescans PCI bus and enumerates the device
   - PCI client driver is probed, which will create network interface
 - WiFi OFF
   - rfkill switch disables the WiFi GPIO
   - Tegra PCIe receives hot unplug event
   - Tegra PCIe hot plug driver removes PCI devices under the bus
   - PCI client driver remove is executed, which will remove network interface

We don't need current patch in this case because PCI device is not present
in the PCI hierarchy, so there cannot be EP config access with link down.
However Tegra doesn't support hot plug and unplug events. I am not sure
if we have any software based hot plug event trigger.

I will drop current patch and pursue if above sequence can be
implemented for Tegra.

Manikanta

>
> Perhaps one other way to deal with this would be to track the suspend
> state of devices and then have the code that implements the PCI access
> from userspace refuse accesses to devices that are asleep. I suppose
> this is somewhat of an odd use-case because traditionally I guess PCI
> devices never power down to a state where their configuration space can
> no longer be accessed. At least that's what would explain why this has
> never been an issue before. Or perhaps it has?
>
> The last resort would be to just never put the WiFi chip into that low
> power mode, though I'm not exactly sure what that means for the power
> consumption on the affected systems.
>
> Manikanta, can you fill in some of the blanks above?
>
> Thierry
>>> Thierry,
>>> Please share your inputs as well.
>>>
>>> Manikanta
>>>  
>>>
>>>>>> diff --git a/drivers/pci/controller/pci-tegra.c b/drivers/pci/controller/pci-tegra.c
>>>>>> index d20c88a79e00..33f4dfab9e35 100644
>>>>>> --- a/drivers/pci/controller/pci-tegra.c
>>>>>> +++ b/drivers/pci/controller/pci-tegra.c
>>>>>> @@ -428,6 +428,14 @@ static inline u32 pads_readl(struct tegra_pcie *pcie, unsigned long offset)
>>>>>>  	return readl(pcie->pads + offset);
>>>>>>  }
>>>>>>  
>>>>>> +static bool tegra_pcie_link_up(struct tegra_pcie_port *port)
>>>>>> +{
>>>>>> +	u32 value;
>>>>>> +
>>>>>> +	value = readl(port->base + RP_LINK_CONTROL_STATUS);
>>>>>> +	return !!(value & RP_LINK_CONTROL_STATUS_DL_LINK_ACTIVE);
>>>>>> +}
>>>>>> +
>>>>>>  /*
>>>>>>   * The configuration space mapping on Tegra is somewhat similar to the ECAM
>>>>>>   * defined by PCIe. However it deviates a bit in how the 4 bits for extended
>>>>>> @@ -493,20 +501,50 @@ static void __iomem *tegra_pcie_map_bus(struct pci_bus *bus,
>>>>>>  static int tegra_pcie_config_read(struct pci_bus *bus, unsigned int devfn,
>>>>>>  				  int where, int size, u32 *value)
>>>>>>  {
>>>>>> +	struct tegra_pcie *pcie = bus->sysdata;
>>>>>> +	struct pci_dev *bridge;
>>>>>> +	struct tegra_pcie_port *port;
>>>>>> +
>>>>>>  	if (bus->number == 0)
>>>>>>  		return pci_generic_config_read32(bus, devfn, where, size,
>>>>>>  						 value);
>>>>>>  
>>>>>> +	bridge = pcie_find_root_port(bus->self);
>>>>>> +
>>>>>> +	list_for_each_entry(port, &pcie->ports, list)
>>>>>> +		if (port->index + 1 == PCI_SLOT(bridge->devfn))
>>>>>> +			break;
>>>>>> +
>>>>>> +	/* If there is no link, then there is no device */
>>>>>> +	if (!tegra_pcie_link_up(port)) {
>>>>>> +		*value = 0xffffffff;
>>>>>> +		return PCIBIOS_DEVICE_NOT_FOUND;
>>>>>> +	}
>>>>>> +
>>>>>>  	return pci_generic_config_read(bus, devfn, where, size, value);
>>>>>>  }
>>>>>>  
>>>>>>  static int tegra_pcie_config_write(struct pci_bus *bus, unsigned int devfn,
>>>>>>  				   int where, int size, u32 value)
>>>>>>  {
>>>>>> +	struct tegra_pcie *pcie = bus->sysdata;
>>>>>> +	struct tegra_pcie_port *port;
>>>>>> +	struct pci_dev *bridge;
>>>>>> +
>>>>>>  	if (bus->number == 0)
>>>>>>  		return pci_generic_config_write32(bus, devfn, where, size,
>>>>>>  						  value);
>>>>>>  
>>>>>> +	bridge = pcie_find_root_port(bus->self);
>>>>>> +
>>>>>> +	list_for_each_entry(port, &pcie->ports, list)
>>>>>> +		if (port->index + 1 == PCI_SLOT(bridge->devfn))
>>>>>> +			break;
>>>>>> +
>>>>>> +	/* If there is no link, then there is no device */
>>>>>> +	if (!tegra_pcie_link_up(port))
>>>>>> +		return PCIBIOS_DEVICE_NOT_FOUND;
>>>>>> +
>>>>>>  	return pci_generic_config_write(bus, devfn, where, size, value);
>>>>>>  }
>>>>>>  
>>>>>> -- 
>>>>>> 2.17.1
>>>>>>
Thierry Reding June 17, 2019, 11:47 a.m. UTC | #7
On Mon, Jun 17, 2019 at 03:31:38PM +0530, Manikanta Maddireddy wrote:
> 
> 
> On 13-Jun-19 9:12 PM, Thierry Reding wrote:
> > On Thu, Jun 13, 2019 at 03:39:46PM +0100, Lorenzo Pieralisi wrote:
> >> On Mon, Jun 10, 2019 at 10:08:16AM +0530, Manikanta Maddireddy wrote:
> >>>
> >>> On 04-Jun-19 7:40 PM, Manikanta Maddireddy wrote:
> >>>> On 04-Jun-19 6:44 PM, Thierry Reding wrote:
> >>>>> On Thu, May 16, 2019 at 11:23:01AM +0530, Manikanta Maddireddy wrote:
> >>>>>> Few endpoints like Wi-Fi supports power on/off and to leverage that
> >>>>>> root port must support hot-plug and hot-unplug. Tegra PCIe doesn't
> >>>>>> support hot-plug and hot-unplug, however it supports endpoint power
> >>>>>> on/off feature as follows,
> >>>>>>  - Power off sequence:
> >>>>>>    - Transition of PCIe link to L2
> >>>>>>    - Power off endpoint
> >>>>>>    - Leave root port in power up state with the link in L2
> >>>>>>  - Power on sequence:
> >>>>>>    - Power on endpoint
> >>>>>>    - Apply hot reset to get PCIe link up
> >>>>>>
> >>>>>> PCIe client driver stops accessing PCIe endpoint config and BAR registers
> >>>>>> after endpoint is powered off. However, software applications like x11
> >>>>>> server or lspci can access endpoint config registers in which case
> >>>>>> host controller raises "response decoding" errors. To avoid this scenario,
> >>>>>> add PCIe link up check in config read and write callback functions before
> >>>>>> accessing endpoint config registers.
> >>>>>>
> >>>>>> Signed-off-by: Manikanta Maddireddy <mmaddireddy@nvidia.com>
> >>>>>> ---
> >>>>>> V4: No change
> >>>>>>
> >>>>>> V3: Update the commit log with explanation for the need of this patch
> >>>>>>
> >>>>>> V2: Change tegra_pcie_link_status() to tegra_pcie_link_up()
> >>>>>>
> >>>>>>  drivers/pci/controller/pci-tegra.c | 38 ++++++++++++++++++++++++++++++
> >>>>>>  1 file changed, 38 insertions(+)
> >>>>> This still doesn't look right to me conceptually. If somebody wants to
> >>>>> access the PCI devices after the kernel has powered them off, why can't
> >>>>> we just power the devices back on so that we allow userspace to properly
> >>>>> access the devices?
> >>>> 1. WiFi devices provides power-off feature for power saving in mobiles.
> >>>> When WiFi is turned off we shouldn't power on the HW back without user
> >>>> turning it back on.
> >>>> 2. When ever user process tries to access config space, it'll end up
> >>>> in these functions. We cannot have is_powered_on check in config read/write
> >>>> callbacks.
> >>>> 3. WiFi power on/off is device specific feature, we shouldn't handle it
> >>>> in PCI subsystem or host controller driver.
> >>>>
> >>>>> Or if that's not what we want, shouldn't we add something to the core
> >>>>> PCI infrastructure to let us deal with this? It seems like this is some
> >>>>> general problem that would apply to every PCI device and host bridge
> >>>>> driver. Having each driver implement this logic separately doesn't seem
> >>>>> like a good idea to me.
> >>>>>
> >>>>> Thierry
> >>>> This should be handled by hotplug feature, whenever endpoint is powered-off/
> >>>> removed from the slot, hot unplug event should take care of it. Unfortunately
> >>>> Tegra PCIe doesn't support hotplug feature.
> >>>>
> >>>> Manikanta
> >>> Hi Bjorn,
> >>>
> >>> I thought about your comment in
> >>> https://patchwork.ozlabs.org/patch/1084204/ again.  What if I add link
> >>> up check in tegra_pcie_isr() and make "response decoding error" as
> >>> debug print? EP Config access will happen when link is down, but
> >>> "Response decoding error" print comes only if debug log is enabled.
> >>> This way we can avoid race issue in config accessors and we get prints
> >>> when debug logs are enabled.
> >> I still do not see what you are actually solving. This patch should
> >> be dropped.
> > The problem that Manikanta is trying to solve here occurs in this
> > situation (Manikanta, correct me if I've got this wrong): on some
> > setups, a WiFi module connected over PCI will toggle a power GPIO as
> > part of runtime suspend. This effectively causes the module to disappear
> > from the PCI bus (i.e. it can no longer be accessed until the power GPIO
> > is toggled again).
> 
> GPIO is toggled as part of WiFi on/off, can be triggered from network manager UI.
> 
> >
> > This is fine from a kernel point of view because the kernel keeps track
> > of what devices are suspended. However, userspace will occasionally try
> > to read the configuration space access of all devices, and since it
> > doesn't have any knowledge about the suspend state of these devices, it
> > doesn't know which ones to leave alone. I think this happens when the
> > X.Org server is running.
> 
> This is fine from a kernel point of view because PCI client driver
> doesn't initiate any PCIe transaction until network interface
> is up during WiFi on.
> 
> >
> > One thing that Manikanta and I had discussed was that perhaps the device
> > should be hot-unplugged when it goes into this low-power state. However,
> > we don't support hotplug on Tegra210 where this is needed, so we'd need
> > some sort of software-induced hot-unplug. However, this low power state
> > is entered when the WiFi interface is taken down (i.e. ip link set dev
> > <interface> down). If we were to remove the PCI device in that case, it
> > means that the interface goes away completely, which is completely
> > unexpected from a user's perspective. After all, taking a link down and
> > up may be something that scripts are doing all the time. They'd fall
> > over if after taking the interface down, the interface completely
> > disappears.
> >
> > It's also not entirely clear to me how we get the device back onto the
> > bus again after it is in low power. If we hot-unplug the device, then
> > the driver will be unbound. Presumably the driver is what's controlling
> > the power GPIO, so there won't be any entity that can be used to bring
> > the chip back to life. Unless we deal with that power GPIO elsewhere
> > (rfkill switch perhaps?).
> 
> Correct, rfkill switch should handle the GPIO.
> Sequence will be,
>  - WiFi ON
>    - rfkill switch enables the WiFi GPIO
>    - Tegra PCIe receives hot plug event
>    - Tegra PCIe hot plug driver rescans PCI bus and enumerates the device
>    - PCI client driver is probed, which will create network interface
>  - WiFi OFF
>    - rfkill switch disables the WiFi GPIO
>    - Tegra PCIe receives hot unplug event
>    - Tegra PCIe hot plug driver removes PCI devices under the bus
>    - PCI client driver remove is executed, which will remove network interface
> 
> We don't need current patch in this case because PCI device is not present
> in the PCI hierarchy, so there cannot be EP config access with link down.
> However Tegra doesn't support hot plug and unplug events. I am not sure
> if we have any software based hot plug event trigger.
> 
> I will drop current patch and pursue if above sequence can be
> implemented for Tegra.

I just recalled that we have these messages in the kernel log:

	# dmesg | grep tegra-pcie
	[    1.055761] tegra-pcie 1003000.pcie: 4x1, 1x1 configuration
	[    2.745764] tegra-pcie 1003000.pcie: 4x1, 1x1 configuration
	[    2.753073] tegra-pcie 1003000.pcie: probing port 0, using 4 lanes
	[    2.761334] tegra-pcie 1003000.pcie: Slot present pin change, signature: 00000008
	[    3.177607] tegra-pcie 1003000.pcie: link 0 down, retrying
	[    3.585605] tegra-pcie 1003000.pcie: link 0 down, retrying
	[    3.993606] tegra-pcie 1003000.pcie: link 0 down, retrying
	[    4.001214] tegra-pcie 1003000.pcie: link 0 down, ignoring
	[    4.006733] tegra-pcie 1003000.pcie: probing port 1, using 1 lanes
	[    4.015042] tegra-pcie 1003000.pcie: Slot present pin change, signature: 00000000
	[    4.031177] tegra-pcie 1003000.pcie: PCI host bridge to bus 0000:00

These "slot present pin change" message do look a lot like hotplug
related messages. Could we perhaps use those to our advantage for this
case? Do you see these when you run on the platform where WiFi is
enabled/disabled using rfkill?

Given that rfkill is completely decoupled from PCI, I don't see how we
would trigger any software-based hotplug mechanism. Perhaps one thing
that we could do is the equivalent of this:

	# echo 1 > /sys/bus/pci/rescan

from some script that's perhaps tied to the rfkill somehow. I'm not sure
if that's possible, or generic enough.

Thierry

> > Perhaps one other way to deal with this would be to track the suspend
> > state of devices and then have the code that implements the PCI access
> > from userspace refuse accesses to devices that are asleep. I suppose
> > this is somewhat of an odd use-case because traditionally I guess PCI
> > devices never power down to a state where their configuration space can
> > no longer be accessed. At least that's what would explain why this has
> > never been an issue before. Or perhaps it has?
> >
> > The last resort would be to just never put the WiFi chip into that low
> > power mode, though I'm not exactly sure what that means for the power
> > consumption on the affected systems.
> >
> > Manikanta, can you fill in some of the blanks above?
> >
> > Thierry
> >>> Thierry,
> >>> Please share your inputs as well.
> >>>
> >>> Manikanta
> >>>  
> >>>
> >>>>>> diff --git a/drivers/pci/controller/pci-tegra.c b/drivers/pci/controller/pci-tegra.c
> >>>>>> index d20c88a79e00..33f4dfab9e35 100644
> >>>>>> --- a/drivers/pci/controller/pci-tegra.c
> >>>>>> +++ b/drivers/pci/controller/pci-tegra.c
> >>>>>> @@ -428,6 +428,14 @@ static inline u32 pads_readl(struct tegra_pcie *pcie, unsigned long offset)
> >>>>>>  	return readl(pcie->pads + offset);
> >>>>>>  }
> >>>>>>  
> >>>>>> +static bool tegra_pcie_link_up(struct tegra_pcie_port *port)
> >>>>>> +{
> >>>>>> +	u32 value;
> >>>>>> +
> >>>>>> +	value = readl(port->base + RP_LINK_CONTROL_STATUS);
> >>>>>> +	return !!(value & RP_LINK_CONTROL_STATUS_DL_LINK_ACTIVE);
> >>>>>> +}
> >>>>>> +
> >>>>>>  /*
> >>>>>>   * The configuration space mapping on Tegra is somewhat similar to the ECAM
> >>>>>>   * defined by PCIe. However it deviates a bit in how the 4 bits for extended
> >>>>>> @@ -493,20 +501,50 @@ static void __iomem *tegra_pcie_map_bus(struct pci_bus *bus,
> >>>>>>  static int tegra_pcie_config_read(struct pci_bus *bus, unsigned int devfn,
> >>>>>>  				  int where, int size, u32 *value)
> >>>>>>  {
> >>>>>> +	struct tegra_pcie *pcie = bus->sysdata;
> >>>>>> +	struct pci_dev *bridge;
> >>>>>> +	struct tegra_pcie_port *port;
> >>>>>> +
> >>>>>>  	if (bus->number == 0)
> >>>>>>  		return pci_generic_config_read32(bus, devfn, where, size,
> >>>>>>  						 value);
> >>>>>>  
> >>>>>> +	bridge = pcie_find_root_port(bus->self);
> >>>>>> +
> >>>>>> +	list_for_each_entry(port, &pcie->ports, list)
> >>>>>> +		if (port->index + 1 == PCI_SLOT(bridge->devfn))
> >>>>>> +			break;
> >>>>>> +
> >>>>>> +	/* If there is no link, then there is no device */
> >>>>>> +	if (!tegra_pcie_link_up(port)) {
> >>>>>> +		*value = 0xffffffff;
> >>>>>> +		return PCIBIOS_DEVICE_NOT_FOUND;
> >>>>>> +	}
> >>>>>> +
> >>>>>>  	return pci_generic_config_read(bus, devfn, where, size, value);
> >>>>>>  }
> >>>>>>  
> >>>>>>  static int tegra_pcie_config_write(struct pci_bus *bus, unsigned int devfn,
> >>>>>>  				   int where, int size, u32 value)
> >>>>>>  {
> >>>>>> +	struct tegra_pcie *pcie = bus->sysdata;
> >>>>>> +	struct tegra_pcie_port *port;
> >>>>>> +	struct pci_dev *bridge;
> >>>>>> +
> >>>>>>  	if (bus->number == 0)
> >>>>>>  		return pci_generic_config_write32(bus, devfn, where, size,
> >>>>>>  						  value);
> >>>>>>  
> >>>>>> +	bridge = pcie_find_root_port(bus->self);
> >>>>>> +
> >>>>>> +	list_for_each_entry(port, &pcie->ports, list)
> >>>>>> +		if (port->index + 1 == PCI_SLOT(bridge->devfn))
> >>>>>> +			break;
> >>>>>> +
> >>>>>> +	/* If there is no link, then there is no device */
> >>>>>> +	if (!tegra_pcie_link_up(port))
> >>>>>> +		return PCIBIOS_DEVICE_NOT_FOUND;
> >>>>>> +
> >>>>>>  	return pci_generic_config_write(bus, devfn, where, size, value);
> >>>>>>  }
> >>>>>>  
> >>>>>> -- 
> >>>>>> 2.17.1
> >>>>>>
>
Bjorn Helgaas June 17, 2019, 7:30 p.m. UTC | #8
[+cc Rafael, linux-pm, in case they have insights on how rfkill works]

On Mon, Jun 17, 2019 at 01:47:45PM +0200, Thierry Reding wrote:
> On Mon, Jun 17, 2019 at 03:31:38PM +0530, Manikanta Maddireddy wrote:
> > On 13-Jun-19 9:12 PM, Thierry Reding wrote:
> > > On Thu, Jun 13, 2019 at 03:39:46PM +0100, Lorenzo Pieralisi wrote:
> > >> On Mon, Jun 10, 2019 at 10:08:16AM +0530, Manikanta Maddireddy wrote:
> > >>> On 04-Jun-19 7:40 PM, Manikanta Maddireddy wrote:
> > >>>> On 04-Jun-19 6:44 PM, Thierry Reding wrote:
> > >>>>> On Thu, May 16, 2019 at 11:23:01AM +0530, Manikanta Maddireddy wrote:
> > >>>>>> Few endpoints like Wi-Fi supports power on/off and to leverage that
> > >>>>>> root port must support hot-plug and hot-unplug. Tegra PCIe doesn't
> > >>>>>> support hot-plug and hot-unplug, however it supports endpoint power
> > >>>>>> on/off feature as follows,
> > >>>>>>  - Power off sequence:
> > >>>>>>    - Transition of PCIe link to L2
> > >>>>>>    - Power off endpoint
> > >>>>>>    - Leave root port in power up state with the link in L2
> > >>>>>>  - Power on sequence:
> > >>>>>>    - Power on endpoint
> > >>>>>>    - Apply hot reset to get PCIe link up
> > >>>>>>
> > >>>>>> PCIe client driver stops accessing PCIe endpoint config and
> > >>>>>> BAR registers after endpoint is powered off. However,
> > >>>>>> software applications like x11 server or lspci can access
> > >>>>>> endpoint config registers in which case host controller
> > >>>>>> raises "response decoding" errors. To avoid this scenario,
> > >>>>>> add PCIe link up check in config read and write callback
> > >>>>>> functions before accessing endpoint config registers.

> > >>>>>> Signed-off-by: Manikanta Maddireddy <mmaddireddy@nvidia.com>
> > >>>>>> ---
> > >>>>>> V4: No change
> > >>>>>>
> > >>>>>> V3: Update the commit log with explanation for the need of this patch
> > >>>>>>
> > >>>>>> V2: Change tegra_pcie_link_status() to tegra_pcie_link_up()
> > >>>>>>
> > >>>>>>  drivers/pci/controller/pci-tegra.c | 38 ++++++++++++++++++++++++++++++
> > >>>>>>  1 file changed, 38 insertions(+)

> > >>>>> This still doesn't look right to me conceptually. If
> > >>>>> somebody wants to access the PCI devices after the kernel
> > >>>>> has powered them off, why can't we just power the devices
> > >>>>> back on so that we allow userspace to properly access the
> > >>>>> devices?

> > >>>> 1. WiFi devices provides power-off feature for power saving
> > >>>> in mobiles.  When WiFi is turned off we shouldn't power on
> > >>>> the HW back without user turning it back on.

> > >>>> 2. When ever user process tries to access config space, it'll
> > >>>> end up in these functions. We cannot have is_powered_on check
> > >>>> in config read/write callbacks.

> > >>>> 3. WiFi power on/off is device specific feature, we shouldn't
> > >>>> handle it in PCI subsystem or host controller driver.

> > >>>>
> > >>>>> Or if that's not what we want, shouldn't we add something to
> > >>>>> the core PCI infrastructure to let us deal with this? It
> > >>>>> seems like this is some general problem that would apply to
> > >>>>> every PCI device and host bridge driver. Having each driver
> > >>>>> implement this logic separately doesn't seem like a good
> > >>>>> idea to me.

> > >>>> This should be handled by hotplug feature, whenever endpoint
> > >>>> is powered-off/ removed from the slot, hot unplug event
> > >>>> should take care of it. Unfortunately Tegra PCIe doesn't
> > >>>> support hotplug feature.

> > >>> I thought about your comment in
> > >>> https://patchwork.ozlabs.org/patch/1084204/ again.  What if I
> > >>> add link up check in tegra_pcie_isr() and make "response
> > >>> decoding error" as debug print? EP Config access will happen
> > >>> when link is down, but "Response decoding error" print comes
> > >>> only if debug log is enabled.  This way we can avoid race
> > >>> issue in config accessors and we get prints when debug logs
> > >>> are enabled.

> > > The problem that Manikanta is trying to solve here occurs in
> > > this situation (Manikanta, correct me if I've got this wrong):
> > > on some setups, a WiFi module connected over PCI will toggle a
> > > power GPIO as part of runtime suspend. This effectively causes
> > > the module to disappear from the PCI bus (i.e. it can no longer
> > > be accessed until the power GPIO is toggled again).
> > 
> > GPIO is toggled as part of WiFi on/off, can be triggered from
> > network manager UI.

> > > This is fine from a kernel point of view because the kernel keeps track
> > > of what devices are suspended. However, userspace will occasionally try
> > > to read the configuration space access of all devices, and since it
> > > doesn't have any knowledge about the suspend state of these devices, it
> > > doesn't know which ones to leave alone. I think this happens when the
> > > X.Org server is running.
> > 
> > This is fine from a kernel point of view because PCI client driver
> > doesn't initiate any PCIe transaction until network interface
> > is up during WiFi on.
> > 
> > > One thing that Manikanta and I had discussed was that perhaps
> > > the device should be hot-unplugged when it goes into this
> > > low-power state. However, we don't support hotplug on Tegra210
> > > where this is needed, so we'd need some sort of software-induced
> > > hot-unplug. However, this low power state is entered when the
> > > WiFi interface is taken down (i.e. ip link set dev <interface>
> > > down). If we were to remove the PCI device in that case, it
> > > means that the interface goes away completely, which is
> > > completely unexpected from a user's perspective. After all,
> > > taking a link down and up may be something that scripts are
> > > doing all the time. They'd fall over if after taking the
> > > interface down, the interface completely disappears.

> > > It's also not entirely clear to me how we get the device back
> > > onto the bus again after it is in low power. If we hot-unplug
> > > the device, then the driver will be unbound. Presumably the
> > > driver is what's controlling the power GPIO, so there won't be
> > > any entity that can be used to bring the chip back to life.
> > > Unless we deal with that power GPIO elsewhere (rfkill switch
> > > perhaps?).

> > Correct, rfkill switch should handle the GPIO.
> > Sequence will be,
> >  - WiFi ON
> >    - rfkill switch enables the WiFi GPIO
> >    - Tegra PCIe receives hot plug event
> >    - Tegra PCIe hot plug driver rescans PCI bus and enumerates the device
> >    - PCI client driver is probed, which will create network interface
> >  - WiFi OFF
> >    - rfkill switch disables the WiFi GPIO
> >    - Tegra PCIe receives hot unplug event
> >    - Tegra PCIe hot plug driver removes PCI devices under the bus
> >    - PCI client driver remove is executed, which will remove
> >      network interface

> > We don't need current patch in this case because PCI device is not
> > present in the PCI hierarchy, so there cannot be EP config access
> > with link down.  However Tegra doesn't support hot plug and unplug
> > events. I am not sure if we have any software based hot plug event
> > trigger.

> > I will drop current patch and pursue if above sequence can be
> > implemented for Tegra.
> 
> I just recalled that we have these messages in the kernel log:
> 
> 	# dmesg | grep tegra-pcie
> 	[    1.055761] tegra-pcie 1003000.pcie: 4x1, 1x1 configuration
> 	[    2.745764] tegra-pcie 1003000.pcie: 4x1, 1x1 configuration
> 	[    2.753073] tegra-pcie 1003000.pcie: probing port 0, using 4 lanes
> 	[    2.761334] tegra-pcie 1003000.pcie: Slot present pin change, signature: 00000008
> 	[    3.177607] tegra-pcie 1003000.pcie: link 0 down, retrying
> 	[    3.585605] tegra-pcie 1003000.pcie: link 0 down, retrying
> 	[    3.993606] tegra-pcie 1003000.pcie: link 0 down, retrying
> 	[    4.001214] tegra-pcie 1003000.pcie: link 0 down, ignoring
> 	[    4.006733] tegra-pcie 1003000.pcie: probing port 1, using 1 lanes
> 	[    4.015042] tegra-pcie 1003000.pcie: Slot present pin change, signature: 00000000
> 	[    4.031177] tegra-pcie 1003000.pcie: PCI host bridge to bus 0000:00
> 
> These "slot present pin change" message do look a lot like hotplug
> related messages. Could we perhaps use those to our advantage for this
> case? Do you see these when you run on the platform where WiFi is
> enabled/disabled using rfkill?
> 
> Given that rfkill is completely decoupled from PCI, I don't see how we
> would trigger any software-based hotplug mechanism. Perhaps one thing
> that we could do is the equivalent of this:
> 
> 	# echo 1 > /sys/bus/pci/rescan
> 
> from some script that's perhaps tied to the rfkill somehow. I'm not sure
> if that's possible, or generic enough.

How does rfkill work?  It sounds like it completely removes power from
the wifi device, putting it in D3cold.  Is there any software
notification other than the "Slot present pin change" (which looks
like a Tegra-specific thing)?

If the device is in D3cold, it won't respond to any PCI transactions,
and there's no standard PCI mechanism to wake it up.  Probably the
cleanest way to handle this is to make it a hot-unplug.

If this were an ACPI system, the rfkill might be visible as some sort
of ACPI power management event, and there might be a corresponding way
for software to bring the device back to D0 temporarily.  That would
make lspci and X config reads work.  But I don't think this system has
ACPI.

> > > Perhaps one other way to deal with this would be to track the
> > > suspend state of devices and then have the code that implements
> > > the PCI access from userspace refuse accesses to devices that
> > > are asleep. I suppose this is somewhat of an odd use-case
> > > because traditionally I guess PCI devices never power down to a
> > > state where their configuration space can no longer be accessed.
> > > At least that's what would explain why this has never been an
> > > issue before. Or perhaps it has?
> > >
> > > The last resort would be to just never put the WiFi chip into
> > > that low power mode, though I'm not exactly sure what that means
> > > for the power consumption on the affected systems.

> > >>>>>> diff --git a/drivers/pci/controller/pci-tegra.c b/drivers/pci/controller/pci-tegra.c
> > >>>>>> index d20c88a79e00..33f4dfab9e35 100644
> > >>>>>> --- a/drivers/pci/controller/pci-tegra.c
> > >>>>>> +++ b/drivers/pci/controller/pci-tegra.c
> > >>>>>> @@ -428,6 +428,14 @@ static inline u32 pads_readl(struct tegra_pcie *pcie, unsigned long offset)
> > >>>>>>  	return readl(pcie->pads + offset);
> > >>>>>>  }
> > >>>>>>  
> > >>>>>> +static bool tegra_pcie_link_up(struct tegra_pcie_port *port)
> > >>>>>> +{
> > >>>>>> +	u32 value;
> > >>>>>> +
> > >>>>>> +	value = readl(port->base + RP_LINK_CONTROL_STATUS);
> > >>>>>> +	return !!(value & RP_LINK_CONTROL_STATUS_DL_LINK_ACTIVE);
> > >>>>>> +}
> > >>>>>> +
> > >>>>>>  /*
> > >>>>>>   * The configuration space mapping on Tegra is somewhat similar to the ECAM
> > >>>>>>   * defined by PCIe. However it deviates a bit in how the 4 bits for extended
> > >>>>>> @@ -493,20 +501,50 @@ static void __iomem *tegra_pcie_map_bus(struct pci_bus *bus,
> > >>>>>>  static int tegra_pcie_config_read(struct pci_bus *bus, unsigned int devfn,
> > >>>>>>  				  int where, int size, u32 *value)
> > >>>>>>  {
> > >>>>>> +	struct tegra_pcie *pcie = bus->sysdata;
> > >>>>>> +	struct pci_dev *bridge;
> > >>>>>> +	struct tegra_pcie_port *port;
> > >>>>>> +
> > >>>>>>  	if (bus->number == 0)
> > >>>>>>  		return pci_generic_config_read32(bus, devfn, where, size,
> > >>>>>>  						 value);
> > >>>>>>  
> > >>>>>> +	bridge = pcie_find_root_port(bus->self);
> > >>>>>> +
> > >>>>>> +	list_for_each_entry(port, &pcie->ports, list)
> > >>>>>> +		if (port->index + 1 == PCI_SLOT(bridge->devfn))
> > >>>>>> +			break;
> > >>>>>> +
> > >>>>>> +	/* If there is no link, then there is no device */
> > >>>>>> +	if (!tegra_pcie_link_up(port)) {
> > >>>>>> +		*value = 0xffffffff;
> > >>>>>> +		return PCIBIOS_DEVICE_NOT_FOUND;
> > >>>>>> +	}
> > >>>>>> +
> > >>>>>>  	return pci_generic_config_read(bus, devfn, where, size, value);
> > >>>>>>  }
> > >>>>>>  
> > >>>>>>  static int tegra_pcie_config_write(struct pci_bus *bus, unsigned int devfn,
> > >>>>>>  				   int where, int size, u32 value)
> > >>>>>>  {
> > >>>>>> +	struct tegra_pcie *pcie = bus->sysdata;
> > >>>>>> +	struct tegra_pcie_port *port;
> > >>>>>> +	struct pci_dev *bridge;
> > >>>>>> +
> > >>>>>>  	if (bus->number == 0)
> > >>>>>>  		return pci_generic_config_write32(bus, devfn, where, size,
> > >>>>>>  						  value);
> > >>>>>>  
> > >>>>>> +	bridge = pcie_find_root_port(bus->self);
> > >>>>>> +
> > >>>>>> +	list_for_each_entry(port, &pcie->ports, list)
> > >>>>>> +		if (port->index + 1 == PCI_SLOT(bridge->devfn))
> > >>>>>> +			break;
> > >>>>>> +
> > >>>>>> +	/* If there is no link, then there is no device */
> > >>>>>> +	if (!tegra_pcie_link_up(port))
> > >>>>>> +		return PCIBIOS_DEVICE_NOT_FOUND;
> > >>>>>> +
> > >>>>>>  	return pci_generic_config_write(bus, devfn, where, size, value);
> > >>>>>>  }
> > >>>>>>  
> > >>>>>> -- 
> > >>>>>> 2.17.1
> > >>>>>>
> >
Manikanta Maddireddy June 18, 2019, 5:36 a.m. UTC | #9
On 18-Jun-19 1:00 AM, Bjorn Helgaas wrote:
> [+cc Rafael, linux-pm, in case they have insights on how rfkill works]
>
> On Mon, Jun 17, 2019 at 01:47:45PM +0200, Thierry Reding wrote:
>> On Mon, Jun 17, 2019 at 03:31:38PM +0530, Manikanta Maddireddy wrote:
>>> On 13-Jun-19 9:12 PM, Thierry Reding wrote:
>>>> On Thu, Jun 13, 2019 at 03:39:46PM +0100, Lorenzo Pieralisi wrote:
>>>>> On Mon, Jun 10, 2019 at 10:08:16AM +0530, Manikanta Maddireddy wrote:
>>>>>> On 04-Jun-19 7:40 PM, Manikanta Maddireddy wrote:
>>>>>>> On 04-Jun-19 6:44 PM, Thierry Reding wrote:
>>>>>>>> On Thu, May 16, 2019 at 11:23:01AM +0530, Manikanta Maddireddy wrote:
>>>>>>>>> Few endpoints like Wi-Fi supports power on/off and to leverage that
>>>>>>>>> root port must support hot-plug and hot-unplug. Tegra PCIe doesn't
>>>>>>>>> support hot-plug and hot-unplug, however it supports endpoint power
>>>>>>>>> on/off feature as follows,
>>>>>>>>>  - Power off sequence:
>>>>>>>>>    - Transition of PCIe link to L2
>>>>>>>>>    - Power off endpoint
>>>>>>>>>    - Leave root port in power up state with the link in L2
>>>>>>>>>  - Power on sequence:
>>>>>>>>>    - Power on endpoint
>>>>>>>>>    - Apply hot reset to get PCIe link up
>>>>>>>>>
>>>>>>>>> PCIe client driver stops accessing PCIe endpoint config and
>>>>>>>>> BAR registers after endpoint is powered off. However,
>>>>>>>>> software applications like x11 server or lspci can access
>>>>>>>>> endpoint config registers in which case host controller
>>>>>>>>> raises "response decoding" errors. To avoid this scenario,
>>>>>>>>> add PCIe link up check in config read and write callback
>>>>>>>>> functions before accessing endpoint config registers.
>>>>>>>>> Signed-off-by: Manikanta Maddireddy <mmaddireddy@nvidia.com>
>>>>>>>>> ---
>>>>>>>>> V4: No change
>>>>>>>>>
>>>>>>>>> V3: Update the commit log with explanation for the need of this patch
>>>>>>>>>
>>>>>>>>> V2: Change tegra_pcie_link_status() to tegra_pcie_link_up()
>>>>>>>>>
>>>>>>>>>  drivers/pci/controller/pci-tegra.c | 38 ++++++++++++++++++++++++++++++
>>>>>>>>>  1 file changed, 38 insertions(+)
>>>>>>>> This still doesn't look right to me conceptually. If
>>>>>>>> somebody wants to access the PCI devices after the kernel
>>>>>>>> has powered them off, why can't we just power the devices
>>>>>>>> back on so that we allow userspace to properly access the
>>>>>>>> devices?
>>>>>>> 1. WiFi devices provides power-off feature for power saving
>>>>>>> in mobiles.  When WiFi is turned off we shouldn't power on
>>>>>>> the HW back without user turning it back on.
>>>>>>> 2. When ever user process tries to access config space, it'll
>>>>>>> end up in these functions. We cannot have is_powered_on check
>>>>>>> in config read/write callbacks.
>>>>>>> 3. WiFi power on/off is device specific feature, we shouldn't
>>>>>>> handle it in PCI subsystem or host controller driver.
>>>>>>>> Or if that's not what we want, shouldn't we add something to
>>>>>>>> the core PCI infrastructure to let us deal with this? It
>>>>>>>> seems like this is some general problem that would apply to
>>>>>>>> every PCI device and host bridge driver. Having each driver
>>>>>>>> implement this logic separately doesn't seem like a good
>>>>>>>> idea to me.
>>>>>>> This should be handled by hotplug feature, whenever endpoint
>>>>>>> is powered-off/ removed from the slot, hot unplug event
>>>>>>> should take care of it. Unfortunately Tegra PCIe doesn't
>>>>>>> support hotplug feature.
>>>>>> I thought about your comment in
>>>>>> https://patchwork.ozlabs.org/patch/1084204/ again.  What if I
>>>>>> add link up check in tegra_pcie_isr() and make "response
>>>>>> decoding error" as debug print? EP Config access will happen
>>>>>> when link is down, but "Response decoding error" print comes
>>>>>> only if debug log is enabled.  This way we can avoid race
>>>>>> issue in config accessors and we get prints when debug logs
>>>>>> are enabled.
>>>> The problem that Manikanta is trying to solve here occurs in
>>>> this situation (Manikanta, correct me if I've got this wrong):
>>>> on some setups, a WiFi module connected over PCI will toggle a
>>>> power GPIO as part of runtime suspend. This effectively causes
>>>> the module to disappear from the PCI bus (i.e. it can no longer
>>>> be accessed until the power GPIO is toggled again).
>>> GPIO is toggled as part of WiFi on/off, can be triggered from
>>> network manager UI.
>>>> This is fine from a kernel point of view because the kernel keeps track
>>>> of what devices are suspended. However, userspace will occasionally try
>>>> to read the configuration space access of all devices, and since it
>>>> doesn't have any knowledge about the suspend state of these devices, it
>>>> doesn't know which ones to leave alone. I think this happens when the
>>>> X.Org server is running.
>>> This is fine from a kernel point of view because PCI client driver
>>> doesn't initiate any PCIe transaction until network interface
>>> is up during WiFi on.
>>>
>>>> One thing that Manikanta and I had discussed was that perhaps
>>>> the device should be hot-unplugged when it goes into this
>>>> low-power state. However, we don't support hotplug on Tegra210
>>>> where this is needed, so we'd need some sort of software-induced
>>>> hot-unplug. However, this low power state is entered when the
>>>> WiFi interface is taken down (i.e. ip link set dev <interface>
>>>> down). If we were to remove the PCI device in that case, it
>>>> means that the interface goes away completely, which is
>>>> completely unexpected from a user's perspective. After all,
>>>> taking a link down and up may be something that scripts are
>>>> doing all the time. They'd fall over if after taking the
>>>> interface down, the interface completely disappears.
>>>> It's also not entirely clear to me how we get the device back
>>>> onto the bus again after it is in low power. If we hot-unplug
>>>> the device, then the driver will be unbound. Presumably the
>>>> driver is what's controlling the power GPIO, so there won't be
>>>> any entity that can be used to bring the chip back to life.
>>>> Unless we deal with that power GPIO elsewhere (rfkill switch
>>>> perhaps?).
>>> Correct, rfkill switch should handle the GPIO.
>>> Sequence will be,
>>>  - WiFi ON
>>>    - rfkill switch enables the WiFi GPIO
>>>    - Tegra PCIe receives hot plug event
>>>    - Tegra PCIe hot plug driver rescans PCI bus and enumerates the device
>>>    - PCI client driver is probed, which will create network interface
>>>  - WiFi OFF
>>>    - rfkill switch disables the WiFi GPIO
>>>    - Tegra PCIe receives hot unplug event
>>>    - Tegra PCIe hot plug driver removes PCI devices under the bus
>>>    - PCI client driver remove is executed, which will remove
>>>      network interface
>>> We don't need current patch in this case because PCI device is not
>>> present in the PCI hierarchy, so there cannot be EP config access
>>> with link down.  However Tegra doesn't support hot plug and unplug
>>> events. I am not sure if we have any software based hot plug event
>>> trigger.
>>> I will drop current patch and pursue if above sequence can be
>>> implemented for Tegra.
>> I just recalled that we have these messages in the kernel log:
>>
>> 	# dmesg | grep tegra-pcie
>> 	[    1.055761] tegra-pcie 1003000.pcie: 4x1, 1x1 configuration
>> 	[    2.745764] tegra-pcie 1003000.pcie: 4x1, 1x1 configuration
>> 	[    2.753073] tegra-pcie 1003000.pcie: probing port 0, using 4 lanes
>> 	[    2.761334] tegra-pcie 1003000.pcie: Slot present pin change, signature: 00000008
>> 	[    3.177607] tegra-pcie 1003000.pcie: link 0 down, retrying
>> 	[    3.585605] tegra-pcie 1003000.pcie: link 0 down, retrying
>> 	[    3.993606] tegra-pcie 1003000.pcie: link 0 down, retrying
>> 	[    4.001214] tegra-pcie 1003000.pcie: link 0 down, ignoring
>> 	[    4.006733] tegra-pcie 1003000.pcie: probing port 1, using 1 lanes
>> 	[    4.015042] tegra-pcie 1003000.pcie: Slot present pin change, signature: 00000000
>> 	[    4.031177] tegra-pcie 1003000.pcie: PCI host bridge to bus 0000:00
>>
>> These "slot present pin change" message do look a lot like hotplug
>> related messages. Could we perhaps use those to our advantage for this
>> case? Do you see these when you run on the platform where WiFi is
>> enabled/disabled using rfkill?
>>
>> Given that rfkill is completely decoupled from PCI, I don't see how we
>> would trigger any software-based hotplug mechanism. Perhaps one thing
>> that we could do is the equivalent of this:
>>
>> 	# echo 1 > /sys/bus/pci/rescan
>>
>> from some script that's perhaps tied to the rfkill somehow. I'm not sure
>> if that's possible, or generic enough.
> How does rfkill work?  It sounds like it completely removes power from
> the wifi device, putting it in D3cold.  Is there any software
> notification other than the "Slot present pin change" (which looks
> like a Tegra-specific thing)?
>
> If the device is in D3cold, it won't respond to any PCI transactions,
> and there's no standard PCI mechanism to wake it up.  Probably the
> cleanest way to handle this is to make it a hot-unplug.
>
> If this were an ACPI system, the rfkill might be visible as some sort
> of ACPI power management event, and there might be a corresponding way
> for software to bring the device back to D0 temporarily.  That would
> make lspci and X config reads work.  But I don't think this system has
> ACPI.

"Slot present pin change" interrupt is triggered based on the programming
of PRSNT_MAP bit field in PCIE2_RP_PRIV_MISC controller register and
they are not triggered when EP is hot plugged/unplugged. Tegra PCIe
controller doesn't have capability to detect EP hot plug and unplug.
Consider that WiFi off equivalent to SW aware EP hot unplug event.

The rfkill subsystem provides a generic interface for disabling any radio
transmitter in the system. WiFi M.2 form factor cards provide W_DISABLE
GPIO to control the radio transmitter and I have seen some cards provide
control to turn off complete chip through this GPIO. Here we are talking
about second case where device is put in D3cold state. This GPIO can be
registered to rfkill subsystem and rfkill commands (like "rfkill unblock
wifi") can be used to turn on radio transmitter during WiFi on.


Manikanta

>>>> Perhaps one other way to deal with this would be to track the
>>>> suspend state of devices and then have the code that implements
>>>> the PCI access from userspace refuse accesses to devices that
>>>> are asleep. I suppose this is somewhat of an odd use-case
>>>> because traditionally I guess PCI devices never power down to a
>>>> state where their configuration space can no longer be accessed.
>>>> At least that's what would explain why this has never been an
>>>> issue before. Or perhaps it has?
>>>>
>>>> The last resort would be to just never put the WiFi chip into
>>>> that low power mode, though I'm not exactly sure what that means
>>>> for the power consumption on the affected systems.
>>>>>>>>> diff --git a/drivers/pci/controller/pci-tegra.c b/drivers/pci/controller/pci-tegra.c
>>>>>>>>> index d20c88a79e00..33f4dfab9e35 100644
>>>>>>>>> --- a/drivers/pci/controller/pci-tegra.c
>>>>>>>>> +++ b/drivers/pci/controller/pci-tegra.c
>>>>>>>>> @@ -428,6 +428,14 @@ static inline u32 pads_readl(struct tegra_pcie *pcie, unsigned long offset)
>>>>>>>>>  	return readl(pcie->pads + offset);
>>>>>>>>>  }
>>>>>>>>>  
>>>>>>>>> +static bool tegra_pcie_link_up(struct tegra_pcie_port *port)
>>>>>>>>> +{
>>>>>>>>> +	u32 value;
>>>>>>>>> +
>>>>>>>>> +	value = readl(port->base + RP_LINK_CONTROL_STATUS);
>>>>>>>>> +	return !!(value & RP_LINK_CONTROL_STATUS_DL_LINK_ACTIVE);
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>>  /*
>>>>>>>>>   * The configuration space mapping on Tegra is somewhat similar to the ECAM
>>>>>>>>>   * defined by PCIe. However it deviates a bit in how the 4 bits for extended
>>>>>>>>> @@ -493,20 +501,50 @@ static void __iomem *tegra_pcie_map_bus(struct pci_bus *bus,
>>>>>>>>>  static int tegra_pcie_config_read(struct pci_bus *bus, unsigned int devfn,
>>>>>>>>>  				  int where, int size, u32 *value)
>>>>>>>>>  {
>>>>>>>>> +	struct tegra_pcie *pcie = bus->sysdata;
>>>>>>>>> +	struct pci_dev *bridge;
>>>>>>>>> +	struct tegra_pcie_port *port;
>>>>>>>>> +
>>>>>>>>>  	if (bus->number == 0)
>>>>>>>>>  		return pci_generic_config_read32(bus, devfn, where, size,
>>>>>>>>>  						 value);
>>>>>>>>>  
>>>>>>>>> +	bridge = pcie_find_root_port(bus->self);
>>>>>>>>> +
>>>>>>>>> +	list_for_each_entry(port, &pcie->ports, list)
>>>>>>>>> +		if (port->index + 1 == PCI_SLOT(bridge->devfn))
>>>>>>>>> +			break;
>>>>>>>>> +
>>>>>>>>> +	/* If there is no link, then there is no device */
>>>>>>>>> +	if (!tegra_pcie_link_up(port)) {
>>>>>>>>> +		*value = 0xffffffff;
>>>>>>>>> +		return PCIBIOS_DEVICE_NOT_FOUND;
>>>>>>>>> +	}
>>>>>>>>> +
>>>>>>>>>  	return pci_generic_config_read(bus, devfn, where, size, value);
>>>>>>>>>  }
>>>>>>>>>  
>>>>>>>>>  static int tegra_pcie_config_write(struct pci_bus *bus, unsigned int devfn,
>>>>>>>>>  				   int where, int size, u32 value)
>>>>>>>>>  {
>>>>>>>>> +	struct tegra_pcie *pcie = bus->sysdata;
>>>>>>>>> +	struct tegra_pcie_port *port;
>>>>>>>>> +	struct pci_dev *bridge;
>>>>>>>>> +
>>>>>>>>>  	if (bus->number == 0)
>>>>>>>>>  		return pci_generic_config_write32(bus, devfn, where, size,
>>>>>>>>>  						  value);
>>>>>>>>>  
>>>>>>>>> +	bridge = pcie_find_root_port(bus->self);
>>>>>>>>> +
>>>>>>>>> +	list_for_each_entry(port, &pcie->ports, list)
>>>>>>>>> +		if (port->index + 1 == PCI_SLOT(bridge->devfn))
>>>>>>>>> +			break;
>>>>>>>>> +
>>>>>>>>> +	/* If there is no link, then there is no device */
>>>>>>>>> +	if (!tegra_pcie_link_up(port))
>>>>>>>>> +		return PCIBIOS_DEVICE_NOT_FOUND;
>>>>>>>>> +
>>>>>>>>>  	return pci_generic_config_write(bus, devfn, where, size, value);
>>>>>>>>>  }
>>>>>>>>>  
>>>>>>>>> -- 
>>>>>>>>> 2.17.1
>>>>>>>>>
>
Thierry Reding June 18, 2019, 10:49 a.m. UTC | #10
On Tue, Jun 18, 2019 at 11:06:44AM +0530, Manikanta Maddireddy wrote:
> 
> On 18-Jun-19 1:00 AM, Bjorn Helgaas wrote:
> > [+cc Rafael, linux-pm, in case they have insights on how rfkill works]
> >
> > On Mon, Jun 17, 2019 at 01:47:45PM +0200, Thierry Reding wrote:
> >> On Mon, Jun 17, 2019 at 03:31:38PM +0530, Manikanta Maddireddy wrote:
> >>> On 13-Jun-19 9:12 PM, Thierry Reding wrote:
> >>>> On Thu, Jun 13, 2019 at 03:39:46PM +0100, Lorenzo Pieralisi wrote:
> >>>>> On Mon, Jun 10, 2019 at 10:08:16AM +0530, Manikanta Maddireddy wrote:
> >>>>>> On 04-Jun-19 7:40 PM, Manikanta Maddireddy wrote:
> >>>>>>> On 04-Jun-19 6:44 PM, Thierry Reding wrote:
> >>>>>>>> On Thu, May 16, 2019 at 11:23:01AM +0530, Manikanta Maddireddy wrote:
> >>>>>>>>> Few endpoints like Wi-Fi supports power on/off and to leverage that
> >>>>>>>>> root port must support hot-plug and hot-unplug. Tegra PCIe doesn't
> >>>>>>>>> support hot-plug and hot-unplug, however it supports endpoint power
> >>>>>>>>> on/off feature as follows,
> >>>>>>>>>  - Power off sequence:
> >>>>>>>>>    - Transition of PCIe link to L2
> >>>>>>>>>    - Power off endpoint
> >>>>>>>>>    - Leave root port in power up state with the link in L2
> >>>>>>>>>  - Power on sequence:
> >>>>>>>>>    - Power on endpoint
> >>>>>>>>>    - Apply hot reset to get PCIe link up
> >>>>>>>>>
> >>>>>>>>> PCIe client driver stops accessing PCIe endpoint config and
> >>>>>>>>> BAR registers after endpoint is powered off. However,
> >>>>>>>>> software applications like x11 server or lspci can access
> >>>>>>>>> endpoint config registers in which case host controller
> >>>>>>>>> raises "response decoding" errors. To avoid this scenario,
> >>>>>>>>> add PCIe link up check in config read and write callback
> >>>>>>>>> functions before accessing endpoint config registers.
> >>>>>>>>> Signed-off-by: Manikanta Maddireddy <mmaddireddy@nvidia.com>
> >>>>>>>>> ---
> >>>>>>>>> V4: No change
> >>>>>>>>>
> >>>>>>>>> V3: Update the commit log with explanation for the need of this patch
> >>>>>>>>>
> >>>>>>>>> V2: Change tegra_pcie_link_status() to tegra_pcie_link_up()
> >>>>>>>>>
> >>>>>>>>>  drivers/pci/controller/pci-tegra.c | 38 ++++++++++++++++++++++++++++++
> >>>>>>>>>  1 file changed, 38 insertions(+)
> >>>>>>>> This still doesn't look right to me conceptually. If
> >>>>>>>> somebody wants to access the PCI devices after the kernel
> >>>>>>>> has powered them off, why can't we just power the devices
> >>>>>>>> back on so that we allow userspace to properly access the
> >>>>>>>> devices?
> >>>>>>> 1. WiFi devices provides power-off feature for power saving
> >>>>>>> in mobiles.  When WiFi is turned off we shouldn't power on
> >>>>>>> the HW back without user turning it back on.
> >>>>>>> 2. When ever user process tries to access config space, it'll
> >>>>>>> end up in these functions. We cannot have is_powered_on check
> >>>>>>> in config read/write callbacks.
> >>>>>>> 3. WiFi power on/off is device specific feature, we shouldn't
> >>>>>>> handle it in PCI subsystem or host controller driver.
> >>>>>>>> Or if that's not what we want, shouldn't we add something to
> >>>>>>>> the core PCI infrastructure to let us deal with this? It
> >>>>>>>> seems like this is some general problem that would apply to
> >>>>>>>> every PCI device and host bridge driver. Having each driver
> >>>>>>>> implement this logic separately doesn't seem like a good
> >>>>>>>> idea to me.
> >>>>>>> This should be handled by hotplug feature, whenever endpoint
> >>>>>>> is powered-off/ removed from the slot, hot unplug event
> >>>>>>> should take care of it. Unfortunately Tegra PCIe doesn't
> >>>>>>> support hotplug feature.
> >>>>>> I thought about your comment in
> >>>>>> https://patchwork.ozlabs.org/patch/1084204/ again.  What if I
> >>>>>> add link up check in tegra_pcie_isr() and make "response
> >>>>>> decoding error" as debug print? EP Config access will happen
> >>>>>> when link is down, but "Response decoding error" print comes
> >>>>>> only if debug log is enabled.  This way we can avoid race
> >>>>>> issue in config accessors and we get prints when debug logs
> >>>>>> are enabled.
> >>>> The problem that Manikanta is trying to solve here occurs in
> >>>> this situation (Manikanta, correct me if I've got this wrong):
> >>>> on some setups, a WiFi module connected over PCI will toggle a
> >>>> power GPIO as part of runtime suspend. This effectively causes
> >>>> the module to disappear from the PCI bus (i.e. it can no longer
> >>>> be accessed until the power GPIO is toggled again).
> >>> GPIO is toggled as part of WiFi on/off, can be triggered from
> >>> network manager UI.
> >>>> This is fine from a kernel point of view because the kernel keeps track
> >>>> of what devices are suspended. However, userspace will occasionally try
> >>>> to read the configuration space access of all devices, and since it
> >>>> doesn't have any knowledge about the suspend state of these devices, it
> >>>> doesn't know which ones to leave alone. I think this happens when the
> >>>> X.Org server is running.
> >>> This is fine from a kernel point of view because PCI client driver
> >>> doesn't initiate any PCIe transaction until network interface
> >>> is up during WiFi on.
> >>>
> >>>> One thing that Manikanta and I had discussed was that perhaps
> >>>> the device should be hot-unplugged when it goes into this
> >>>> low-power state. However, we don't support hotplug on Tegra210
> >>>> where this is needed, so we'd need some sort of software-induced
> >>>> hot-unplug. However, this low power state is entered when the
> >>>> WiFi interface is taken down (i.e. ip link set dev <interface>
> >>>> down). If we were to remove the PCI device in that case, it
> >>>> means that the interface goes away completely, which is
> >>>> completely unexpected from a user's perspective. After all,
> >>>> taking a link down and up may be something that scripts are
> >>>> doing all the time. They'd fall over if after taking the
> >>>> interface down, the interface completely disappears.
> >>>> It's also not entirely clear to me how we get the device back
> >>>> onto the bus again after it is in low power. If we hot-unplug
> >>>> the device, then the driver will be unbound. Presumably the
> >>>> driver is what's controlling the power GPIO, so there won't be
> >>>> any entity that can be used to bring the chip back to life.
> >>>> Unless we deal with that power GPIO elsewhere (rfkill switch
> >>>> perhaps?).
> >>> Correct, rfkill switch should handle the GPIO.
> >>> Sequence will be,
> >>>  - WiFi ON
> >>>    - rfkill switch enables the WiFi GPIO
> >>>    - Tegra PCIe receives hot plug event
> >>>    - Tegra PCIe hot plug driver rescans PCI bus and enumerates the device
> >>>    - PCI client driver is probed, which will create network interface
> >>>  - WiFi OFF
> >>>    - rfkill switch disables the WiFi GPIO
> >>>    - Tegra PCIe receives hot unplug event
> >>>    - Tegra PCIe hot plug driver removes PCI devices under the bus
> >>>    - PCI client driver remove is executed, which will remove
> >>>      network interface
> >>> We don't need current patch in this case because PCI device is not
> >>> present in the PCI hierarchy, so there cannot be EP config access
> >>> with link down.  However Tegra doesn't support hot plug and unplug
> >>> events. I am not sure if we have any software based hot plug event
> >>> trigger.
> >>> I will drop current patch and pursue if above sequence can be
> >>> implemented for Tegra.
> >> I just recalled that we have these messages in the kernel log:
> >>
> >> 	# dmesg | grep tegra-pcie
> >> 	[    1.055761] tegra-pcie 1003000.pcie: 4x1, 1x1 configuration
> >> 	[    2.745764] tegra-pcie 1003000.pcie: 4x1, 1x1 configuration
> >> 	[    2.753073] tegra-pcie 1003000.pcie: probing port 0, using 4 lanes
> >> 	[    2.761334] tegra-pcie 1003000.pcie: Slot present pin change, signature: 00000008
> >> 	[    3.177607] tegra-pcie 1003000.pcie: link 0 down, retrying
> >> 	[    3.585605] tegra-pcie 1003000.pcie: link 0 down, retrying
> >> 	[    3.993606] tegra-pcie 1003000.pcie: link 0 down, retrying
> >> 	[    4.001214] tegra-pcie 1003000.pcie: link 0 down, ignoring
> >> 	[    4.006733] tegra-pcie 1003000.pcie: probing port 1, using 1 lanes
> >> 	[    4.015042] tegra-pcie 1003000.pcie: Slot present pin change, signature: 00000000
> >> 	[    4.031177] tegra-pcie 1003000.pcie: PCI host bridge to bus 0000:00
> >>
> >> These "slot present pin change" message do look a lot like hotplug
> >> related messages. Could we perhaps use those to our advantage for this
> >> case? Do you see these when you run on the platform where WiFi is
> >> enabled/disabled using rfkill?
> >>
> >> Given that rfkill is completely decoupled from PCI, I don't see how we
> >> would trigger any software-based hotplug mechanism. Perhaps one thing
> >> that we could do is the equivalent of this:
> >>
> >> 	# echo 1 > /sys/bus/pci/rescan
> >>
> >> from some script that's perhaps tied to the rfkill somehow. I'm not sure
> >> if that's possible, or generic enough.
> > How does rfkill work?  It sounds like it completely removes power from
> > the wifi device, putting it in D3cold.  Is there any software
> > notification other than the "Slot present pin change" (which looks
> > like a Tegra-specific thing)?
> >
> > If the device is in D3cold, it won't respond to any PCI transactions,
> > and there's no standard PCI mechanism to wake it up.  Probably the
> > cleanest way to handle this is to make it a hot-unplug.
> >
> > If this were an ACPI system, the rfkill might be visible as some sort
> > of ACPI power management event, and there might be a corresponding way
> > for software to bring the device back to D0 temporarily.  That would
> > make lspci and X config reads work.  But I don't think this system has
> > ACPI.
> 
> "Slot present pin change" interrupt is triggered based on the programming
> of PRSNT_MAP bit field in PCIE2_RP_PRIV_MISC controller register and
> they are not triggered when EP is hot plugged/unplugged. Tegra PCIe
> controller doesn't have capability to detect EP hot plug and unplug.
> Consider that WiFi off equivalent to SW aware EP hot unplug event.
> 
> The rfkill subsystem provides a generic interface for disabling any radio
> transmitter in the system. WiFi M.2 form factor cards provide W_DISABLE
> GPIO to control the radio transmitter and I have seen some cards provide
> control to turn off complete chip through this GPIO. Here we are talking
> about second case where device is put in D3cold state. This GPIO can be
> registered to rfkill subsystem and rfkill commands (like "rfkill unblock
> wifi") can be used to turn on radio transmitter during WiFi on.

Perhaps what we need here is some sort of mechanism to make rfkill and
the PCI host controller interoperate? I could imagine for example that
the PCI host controller would get a new "rfkill" property in device
tree that points at the rfkill device via phandle.

The driver could then get a reference to it using something like:

	rfkill = rfkill_get(dev);
	if (IS_ERR(rfkill)) {
		...
	}

and register for notification:

	err = rfkill_subscribe(rfkill, callback);
	if (err < 0) {
		...
	}

rfkill_unsubscribe() and rfkill_put() would then be used upon driver
unload to detach from the rfkill.

I noticed that there's an rfkill-gpio driver (net/rfkill/rfkill-gpio.c)
that already does pretty much everything that we need, except that it
doesn't support DT yet, but I suspect that that's pretty easy to add.

Johannes, any thoughts on this. In a nutshell what we're trying to solve
here is devices that get removed from/added to PCI based on an rfkill-
type of device. The difference to other implementations is that we have
no way of detecting when the device has gone away (PCI hotplug does not
work). So we'd need some software-triggered mechanism to let the PCI
host controller know when the device is presumably going away or being
added back, so that the PCI bus can be rescanned and the PCI device
removed or added at that point).

Thierry
Johannes Berg June 18, 2019, 12:32 p.m. UTC | #11
I got to this thread really late I guess :-)

On Tue, 2019-06-18 at 12:49 +0200, Thierry Reding wrote:

> > > > > > > > > 1. WiFi devices provides power-off feature for power saving
> > > > > > > > > in mobiles.  When WiFi is turned off we shouldn't power on
> > > > > > > > > the HW back without user turning it back on.

But why would you disconnect the PCIe device just to power it down?!

> > > > > > The problem that Manikanta is trying to solve here occurs in
> > > > > > this situation (Manikanta, correct me if I've got this wrong):
> > > > > > on some setups, a WiFi module connected over PCI will toggle a
> > > > > > power GPIO as part of runtime suspend. This effectively causes
> > > > > > the module to disappear from the PCI bus (i.e. it can no longer
> > > > > > be accessed until the power GPIO is toggled again).
> > > > > 
> > > > > GPIO is toggled as part of WiFi on/off, can be triggered from
> > > > > network manager UI.

That's kinda icky, IMHO.

> > > > > Correct, rfkill switch should handle the GPIO.
> > > > > Sequence will be,
> > > > >  - WiFi ON
> > > > >    - rfkill switch enables the WiFi GPIO
> > > > >    - Tegra PCIe receives hot plug event
> > > > >    - Tegra PCIe hot plug driver rescans PCI bus and enumerates the device
> > > > >    - PCI client driver is probed, which will create network interface
> > > > >  - WiFi OFF
> > > > >    - rfkill switch disables the WiFi GPIO
> > > > >    - Tegra PCIe receives hot unplug event
> > > > >    - Tegra PCIe hot plug driver removes PCI devices under the bus
> > > > >    - PCI client driver remove is executed, which will remove
> > > > >      network interface
> > > > > We don't need current patch in this case because PCI device is not
> > > > > present in the PCI hierarchy, so there cannot be EP config access
> > > > > with link down.  However Tegra doesn't support hot plug and unplug
> > > > > events. I am not sure if we have any software based hot plug event
> > > > > trigger.

Looks reasonable to me.

I guess if you absolutely know in software when the device is present or
not, you don't need "real" PCIe hotplug, just need to tickle the
software right?

> > > How does rfkill work?  It sounds like it completely removes power from
> > > the wifi device, putting it in D3cold.  Is there any software
> > > notification other than the "Slot present pin change" (which looks
> > > like a Tegra-specific thing)?

Well, they said above it's a GPIO that controls it, so the software
already knows and doesn't really need an event?

> > The rfkill subsystem provides a generic interface for disabling any radio
> > transmitter in the system. WiFi M.2 form factor cards provide W_DISABLE
> > GPIO to control the radio transmitter

But it depends on the hardware how this is handled, Intel NICs for
example just trigger an IRQ to the host and don't turn off much, for
them the W_DISABLE pin is just a GPIO in input mode, with edge triggered
interrupt to the driver.

> > and I have seen some cards provide
> > control to turn off complete chip through this GPIO. 

I never heard of this. Which NICs are we talking about?

> Perhaps what we need here is some sort of mechanism to make rfkill and
> the PCI host controller interoperate? I could imagine for example that
> the PCI host controller would get a new "rfkill" property in device
> tree that points at the rfkill device via phandle.

But you don't know which the rfkill device is, do you?

I mean, fundamentally, you just have a GPIO that turns on and off the
W_DISABLE pin. NICs will not generally disappear from the bus when
that's turned on, so you need a NIC driver integration.

I guess you also have an rfkill-gpio driver assigned to this GPIO, which
gets assigned there via DT/platform code?

Ah, but then I guess you could have a phandle in the DT or so that ties
the W_DISABLE-GPIO with the PCIe slot that it controls.

> The driver could then get a reference to it using something like:
> 
> 	rfkill = rfkill_get(dev);
> 	if (IS_ERR(rfkill)) {
> 		...
> 	}
> 
> and register for notification:
> 
> 	err = rfkill_subscribe(rfkill, callback);
> 	if (err < 0) {
> 		...
> 	}
> 
> rfkill_unsubscribe() and rfkill_put() would then be used upon driver
> unload to detach from the rfkill.

This I don't understand.

> I noticed that there's an rfkill-gpio driver (net/rfkill/rfkill-gpio.c)
> that already does pretty much everything that we need, except that it
> doesn't support DT yet, but I suspect that that's pretty easy to add.

Oh, good point, no DT support here - so how *do* you actually
instantiate the rfkill today??

> Johannes, any thoughts on this. In a nutshell what we're trying to solve
> here is devices that get removed from/added to PCI based on an rfkill-
> type of device. The difference to other implementations is that we have
> no way of detecting when the device has gone away (PCI hotplug does not
> work). So we'd need some software-triggered mechanism to let the PCI
> host controller know when the device is presumably going away or being
> added back, so that the PCI bus can be rescanned and the PCI device
> removed or added at that point).

Right.

So, I'm not even sure we need the *driver* to do anything other than say
"I know the device will drop off the bus when rfkill is enabled", right?


But do we actually need rfkill to be involved here?

I mean, let's say first we make rfkill-gpio DT-aware, rather than just
ACPI. This should be simple. Then it drives a GPIO (it can actually
drive two and a clock, not sure I know why).

Now, next we need something that says that the device should be treated
as hotplug/unplug. We could make this in the driver somehow like you
suggested, but that seems like a lot of effort?

Couldn't we put this into the *GPIO* subsystem instead?

I mean - conceivably there could be GPIOs that just power down a device
for example. Not even through something like W_DISABLE, but just having
a GPIO hooked up to a transistor on the voltage pin of the device. That
would have very similar semantics?

So why not just attach the PCIe device/port to the GPIO, and have the
GPIO implementation here call the detach/attach (or detach/rescan?) when
they are toggled?

Not that I'd mind having it in rfkill! But it seems like a special case
to have it there, when you can do so much more with GPIOs.

johannes
Thierry Reding June 18, 2019, 1:40 p.m. UTC | #12
On Tue, Jun 18, 2019 at 02:32:59PM +0200, Johannes Berg wrote:
> I got to this thread really late I guess :-)
> 
> On Tue, 2019-06-18 at 12:49 +0200, Thierry Reding wrote:
> 
> > > > > > > > > > 1. WiFi devices provides power-off feature for power saving
> > > > > > > > > > in mobiles.  When WiFi is turned off we shouldn't power on
> > > > > > > > > > the HW back without user turning it back on.
> 
> But why would you disconnect the PCIe device just to power it down?!

It's a side-effect of asserting that W_DISABLE pin that the bus link
basically goes down. We've had a similar case recently, one that we
haven't quite solved either, where an RTL8169 Ethernet controller is
hooked up to a GPIO that controls the ISOLATEB (I think that was the
name) pin. If that pin is asserted, according to the documentation,
the device stops sampling/driving the PCI signals. So for all intents
and purposes it becomes disconnected.

We could kind of deal with this if the ISOLATEB was deasserted at probe
time, because that would mean that the device is at least enumerated on
PCI. Then when we go into some power down mode (for example when the
interface is taken down), the NIC driver could assert the GPIO and on
resuming from the power down mode deassert it again. Logically the
device would stay around, we just couldn't talk to it over PCI until the
driver has deasserted the ISOLATEB GPIO.

The problem is that it's not exactly defined what the status of the pin
would be at probe time. If it is asserted, the NIC will never show up on
the PCI bus and hence no driver would be registered that could deassert
the ISOLATEB signal. Well, unless we somehow created a "placeholder" PCI
device based on a device tree node (containing a reference to the GPIO)
so that the device would be enumerated (and probed) regardless of the
PCI link. There's no infrastructure to do that currently, but perhaps
worth investigating.

I think the W_DISABLE is somewhat similar. From what Manikanta was
saying, the PCI link also goes down when the pin is asserted, so we
loose any means of communicating with it over PCI.

The issue that Manikanta was trying to solve with this particular patch
was that since the PCI device is part of the PCI device hierarchy, some
userspace tools (X server, for example) will see it and try to discover
whether it's a GPU or not. This in turn causes errors from the PCI host
controller because it's trying to access a device behind a link that's
down. That, I assume, could also happen for the ISOLATEB case that I was
describing above, though it hasn't been brought up, I think.

> > > > > > > The problem that Manikanta is trying to solve here occurs in
> > > > > > > this situation (Manikanta, correct me if I've got this wrong):
> > > > > > > on some setups, a WiFi module connected over PCI will toggle a
> > > > > > > power GPIO as part of runtime suspend. This effectively causes
> > > > > > > the module to disappear from the PCI bus (i.e. it can no longer
> > > > > > > be accessed until the power GPIO is toggled again).
> > > > > > 
> > > > > > GPIO is toggled as part of WiFi on/off, can be triggered from
> > > > > > network manager UI.
> 
> That's kinda icky, IMHO.

Isn't that kind of the point of rfkill? I seem to remember having a
notebook where this was done exactly the same way. There was also a
button/switch that you could push which would result in the WiFi device
either going away completely or at the least loosing the WiFi link. It
seems like that's exactly what Manikanta is describing.

> > > > > > Correct, rfkill switch should handle the GPIO.
> > > > > > Sequence will be,
> > > > > >  - WiFi ON
> > > > > >    - rfkill switch enables the WiFi GPIO
> > > > > >    - Tegra PCIe receives hot plug event
> > > > > >    - Tegra PCIe hot plug driver rescans PCI bus and enumerates the device
> > > > > >    - PCI client driver is probed, which will create network interface
> > > > > >  - WiFi OFF
> > > > > >    - rfkill switch disables the WiFi GPIO
> > > > > >    - Tegra PCIe receives hot unplug event
> > > > > >    - Tegra PCIe hot plug driver removes PCI devices under the bus
> > > > > >    - PCI client driver remove is executed, which will remove
> > > > > >      network interface
> > > > > > We don't need current patch in this case because PCI device is not
> > > > > > present in the PCI hierarchy, so there cannot be EP config access
> > > > > > with link down.  However Tegra doesn't support hot plug and unplug
> > > > > > events. I am not sure if we have any software based hot plug event
> > > > > > trigger.
> 
> Looks reasonable to me.
> 
> I guess if you absolutely know in software when the device is present or
> not, you don't need "real" PCIe hotplug, just need to tickle the
> software right?

Right.

> > > > How does rfkill work?  It sounds like it completely removes power from
> > > > the wifi device, putting it in D3cold.  Is there any software
> > > > notification other than the "Slot present pin change" (which looks
> > > > like a Tegra-specific thing)?
> 
> Well, they said above it's a GPIO that controls it, so the software
> already knows and doesn't really need an event?

We still need to communicate from rfkill to the PCI host controller that
something happened, since they are two different entities.

> > > The rfkill subsystem provides a generic interface for disabling any radio
> > > transmitter in the system. WiFi M.2 form factor cards provide W_DISABLE
> > > GPIO to control the radio transmitter
> 
> But it depends on the hardware how this is handled, Intel NICs for
> example just trigger an IRQ to the host and don't turn off much, for
> them the W_DISABLE pin is just a GPIO in input mode, with edge triggered
> interrupt to the driver.

Okay, so does this mean you have some input device connected to the WiFi
device that will be used (without software intervention) to disable the
transmitter and then the WiFi device will signal using the W_DISABLE pin
that the transmitter was indeed disabled?

> > > and I have seen some cards provide
> > > control to turn off complete chip through this GPIO. 
> 
> I never heard of this. Which NICs are we talking about?
> 
> > Perhaps what we need here is some sort of mechanism to make rfkill and
> > the PCI host controller interoperate? I could imagine for example that
> > the PCI host controller would get a new "rfkill" property in device
> > tree that points at the rfkill device via phandle.
> 
> But you don't know which the rfkill device is, do you?
> 
> I mean, fundamentally, you just have a GPIO that turns on and off the
> W_DISABLE pin. NICs will not generally disappear from the bus when
> that's turned on, so you need a NIC driver integration.

I think that's the main problem that we're trying to solve. In our case
it does seem like the device completely disappears from the bus.

> I guess you also have an rfkill-gpio driver assigned to this GPIO, which
> gets assigned there via DT/platform code?

Yes, I think that's correct. Manikanta, please confirm.

> Ah, but then I guess you could have a phandle in the DT or so that ties
> the W_DISABLE-GPIO with the PCIe slot that it controls.

Right, that's what I was thinking.

> > The driver could then get a reference to it using something like:
> > 
> > 	rfkill = rfkill_get(dev);
> > 	if (IS_ERR(rfkill)) {
> > 		...
> > 	}
> > 
> > and register for notification:
> > 
> > 	err = rfkill_subscribe(rfkill, callback);
> > 	if (err < 0) {
> > 		...
> > 	}
> > 
> > rfkill_unsubscribe() and rfkill_put() would then be used upon driver
> > unload to detach from the rfkill.
> 
> This I don't understand.

This was just an example of what I was imagining. The network driver
would get an rfkill (looked up via device tree phandle) and subscribe to
receive events from it, so that it could be notified when the rfkill is
"blocked" and rescan the bus to get the WiFi device unplugged. Once
unblocked it would be notified again and rescan the bus so that the
device would reappear.

> > I noticed that there's an rfkill-gpio driver (net/rfkill/rfkill-gpio.c)
> > that already does pretty much everything that we need, except that it
> > doesn't support DT yet, but I suspect that that's pretty easy to add.
> 
> Oh, good point, no DT support here - so how *do* you actually
> instantiate the rfkill today??

I suspect that we've got downstream patches for that. The patch here is
part of a series to upstream support for this. I haven't seen the patch
for rfkill-gpio, but perhaps that's queued for later.

> > Johannes, any thoughts on this. In a nutshell what we're trying to solve
> > here is devices that get removed from/added to PCI based on an rfkill-
> > type of device. The difference to other implementations is that we have
> > no way of detecting when the device has gone away (PCI hotplug does not
> > work). So we'd need some software-triggered mechanism to let the PCI
> > host controller know when the device is presumably going away or being
> > added back, so that the PCI bus can be rescanned and the PCI device
> > removed or added at that point).
> 
> Right.
> 
> So, I'm not even sure we need the *driver* to do anything other than say
> "I know the device will drop off the bus when rfkill is enabled", right?
> 
> 
> But do we actually need rfkill to be involved here?
> 
> I mean, let's say first we make rfkill-gpio DT-aware, rather than just
> ACPI. This should be simple. Then it drives a GPIO (it can actually
> drive two and a clock, not sure I know why).
> 
> Now, next we need something that says that the device should be treated
> as hotplug/unplug. We could make this in the driver somehow like you
> suggested, but that seems like a lot of effort?
> 
> Couldn't we put this into the *GPIO* subsystem instead?
> 
> I mean - conceivably there could be GPIOs that just power down a device
> for example. Not even through something like W_DISABLE, but just having
> a GPIO hooked up to a transistor on the voltage pin of the device. That
> would have very similar semantics?
> 
> So why not just attach the PCIe device/port to the GPIO, and have the
> GPIO implementation here call the detach/attach (or detach/rescan?) when
> they are toggled?
> 
> Not that I'd mind having it in rfkill! But it seems like a special case
> to have it there, when you can do so much more with GPIOs.

Yeah, that's where things become a little muddy. For the ISOLATEB case
there was initially a similar proposal. The problem is that on one hand
we can have different semantics for these pins. On one platform this
could be a kind of "power" GPIO, on others it could be ISOLATE/DISABLE,
and on yet others it would be more like a reset. In order to make the
PCIe port aware of the differences we'd have to expose multiple GPIOs in
DT for context.

The other problem with this is that, in order to avoid the chicken-and-
egg problem, we need to associate these GPIOs with the root ports,
because those are the only ones that exist at probe time. All downstream
devices may not be available because the power/reset/disable pin is not
asserted/deasserted yet. Now, you could potentially have a switch in the
downstream hierarchy, so it becomes completely unclear what exact device
the GPIO is associated with.

Related to that, a GPIO like this is really only useful if you can make
use of it. For example you want to assert/deassert this GPIO in order to
put the WiFi/Ethernet/whatever device into a low-power mode when it is
not used, right? But in order to do so, the driver for that device needs
to be able to handle the GPIO, because it is the only one that knows the
right point in time to toggle it. Conversely, if this was associated
with the root port, the only point in time where the root port driver
could toggle it is on a suspend/resume of the entire bus, which makes it
rather useless.

But then we're back to square one where we basically have to associate
the GPIO with the specific device. I think that's the right thing to do
because, well, that's what reality is. The GPIO is directly routed to a
pin on the chip. It's not something that goes over the PCI connector or
anything. However, we're also back to the chicken-and-egg problem since
without toggling the GPIO the device might not even get enumerated.

rfkill-gpio has the advantage that it decouples this and gets us out of
the chicken-and-egg situation. It also has fairly well-defined semantics
and fits the use-case, so it's a very appealing option.

Thierry
Johannes Berg June 18, 2019, 2:48 p.m. UTC | #13
On Tue, 2019-06-18 at 15:40 +0200, Thierry Reding wrote:
> On Tue, Jun 18, 2019 at 02:32:59PM +0200, Johannes Berg wrote:
> > I got to this thread really late I guess :-)
> > 
> > On Tue, 2019-06-18 at 12:49 +0200, Thierry Reding wrote:
> > 
> > > > > > > > > > > 1. WiFi devices provides power-off feature for power saving
> > > > > > > > > > > in mobiles.  When WiFi is turned off we shouldn't power on
> > > > > > > > > > > the HW back without user turning it back on.
> > 
> > But why would you disconnect the PCIe device just to power it down?!
> 
> It's a side-effect of asserting that W_DISABLE pin that the bus link
> basically goes down. We've had a similar case recently, one that we
> haven't quite solved either, where an RTL8169 Ethernet controller is
> hooked up to a GPIO that controls the ISOLATEB (I think that was the
> name) pin. If that pin is asserted, according to the documentation,
> the device stops sampling/driving the PCI signals. So for all intents
> and purposes it becomes disconnected.

Right.

> We could kind of deal with this if the ISOLATEB was deasserted at probe
> time, because that would mean that the device is at least enumerated on
> PCI. Then when we go into some power down mode (for example when the
> interface is taken down), the NIC driver could assert the GPIO and on
> resuming from the power down mode deassert it again. Logically the
> device would stay around, we just couldn't talk to it over PCI until the
> driver has deasserted the ISOLATEB GPIO.
> 
> The problem is that it's not exactly defined what the status of the pin
> would be at probe time. If it is asserted, the NIC will never show up on
> the PCI bus and hence no driver would be registered that could deassert
> the ISOLATEB signal. Well, unless we somehow created a "placeholder" PCI
> device based on a device tree node (containing a reference to the GPIO)
> so that the device would be enumerated (and probed) regardless of the
> PCI link. There's no infrastructure to do that currently, but perhaps
> worth investigating.
> 
> I think the W_DISABLE is somewhat similar. From what Manikanta was
> saying, the PCI link also goes down when the pin is asserted, so we
> loose any means of communicating with it over PCI.
> 
> The issue that Manikanta was trying to solve with this particular patch
> was that since the PCI device is part of the PCI device hierarchy, some
> userspace tools (X server, for example) will see it and try to discover
> whether it's a GPU or not. This in turn causes errors from the PCI host
> controller because it's trying to access a device behind a link that's
> down. That, I assume, could also happen for the ISOLATEB case that I was
> describing above, though it hasn't been brought up, I think.

Agree, sounds like it.

> > > > > > > > The problem that Manikanta is trying to solve here occurs in
> > > > > > > > this situation (Manikanta, correct me if I've got this wrong):
> > > > > > > > on some setups, a WiFi module connected over PCI will toggle a
> > > > > > > > power GPIO as part of runtime suspend. This effectively causes
> > > > > > > > the module to disappear from the PCI bus (i.e. it can no longer
> > > > > > > > be accessed until the power GPIO is toggled again).
> > > > > > > 
> > > > > > > GPIO is toggled as part of WiFi on/off, can be triggered from
> > > > > > > network manager UI.
> > 
> > That's kinda icky, IMHO.
> 
> Isn't that kind of the point of rfkill? I seem to remember having a
> notebook where this was done exactly the same way. There was also a
> button/switch that you could push which would result in the WiFi device
> either going away completely or at the least loosing the WiFi link. It
> seems like that's exactly what Manikanta is describing.

Right.

So ... rfkill has some terminology issues sometimes. Let me clarify what
typically happens.

Usually, you have the rfkill instance "wiphy-xyz" which is on the
wireless NIC (through cfg80211). This has two things:

 1) software rfkill *control*
 2) hardware rfkill *reporting*

So a device like iwlwifi has software rfkill control through rfkill
"wiphy-phy0", which is really just implemented as "bring down all the
netdevs etc.". And then you have "HW rfkill reporting", where our NIC
just reports the current status of the W_DISABLE pin. This again causes
the software to turn off all the netdevs etc.

This is how it looks like for the *device* side.

Now, for the *platform* side, which we're really looking at now, we
usually have the rfkill be a bit differently:

hardware rfkill *reporting* isn't used at all here.

software rfkill *control*, instead of directly controlling the netdevs
etc. like the instances in cfg80211 do, this just controls the GPIO.


Now, in a typical platform with an Intel NIC, you have something like

platform rfkill device
 --> controls GPIO
  W_DISABLE pin
   --> reported as HW rfkill
    --> rfkill-wiphy0 device

In some other platforms, you literally just have a hardware button:

hardware button
 --> controls GPIO
  ... as before ...

Now, this all works great, but is basically software only, just using
the hardware pins as a communication mechanism.


Now, with some platforms, and particularly with BT USB devices where
I've seen this a lot (but never saw it with PCIe before), you have what
you're describing here, that the device just drops off the bus for an
rfkill.

Again, though, the actual cause of this might be a GPIO control (through
an rfkill instance) or something else like a literal hardware button
(not uncommon for Bluetooth).


> > Well, they said above it's a GPIO that controls it, so the software
> > already knows and doesn't really need an event?
> 
> We still need to communicate from rfkill to the PCI host controller that
> something happened, since they are two different entities.

Yeah, but the question is if we really need it from *rfkill* rather than
the GPIO as I described below?

> > > > The rfkill subsystem provides a generic interface for disabling any radio
> > > > transmitter in the system. WiFi M.2 form factor cards provide W_DISABLE
> > > > GPIO to control the radio transmitter
> > 
> > But it depends on the hardware how this is handled, Intel NICs for
> > example just trigger an IRQ to the host and don't turn off much, for
> > them the W_DISABLE pin is just a GPIO in input mode, with edge triggered
> > interrupt to the driver.
> 
> Okay, so does this mean you have some input device connected to the WiFi
> device that will be used (without software intervention) to disable the
> transmitter and then the WiFi device will signal using the W_DISABLE pin
> that the transmitter was indeed disabled?

See above.

> This was just an example of what I was imagining. The network driver
> would get an rfkill (looked up via device tree phandle) and subscribe to
> receive events from it, so that it could be notified when the rfkill is
> "blocked" and rescan the bus to get the WiFi device unplugged. Once
> unblocked it would be notified again and rescan the bus so that the
> device would reappear.

Ok. Not sure we need much involvement of the driver and/or the rfkill
even though.

> > I mean, let's say first we make rfkill-gpio DT-aware, rather than just
> > ACPI. This should be simple. Then it drives a GPIO (it can actually
> > drive two and a clock, not sure I know why).
> > 
> > Now, next we need something that says that the device should be treated
> > as hotplug/unplug. We could make this in the driver somehow like you
> > suggested, but that seems like a lot of effort?
> > 
> > Couldn't we put this into the *GPIO* subsystem instead?
> > 
> > I mean - conceivably there could be GPIOs that just power down a device
> > for example. Not even through something like W_DISABLE, but just having
> > a GPIO hooked up to a transistor on the voltage pin of the device. That
> > would have very similar semantics?
> > 
> > So why not just attach the PCIe device/port to the GPIO, and have the
> > GPIO implementation here call the detach/attach (or detach/rescan?) when
> > they are toggled?
> > 
> > Not that I'd mind having it in rfkill! But it seems like a special case
> > to have it there, when you can do so much more with GPIOs.
> 
> Yeah, that's where things become a little muddy. For the ISOLATEB case
> there was initially a similar proposal. 

OK.

> The problem is that on one hand
> we can have different semantics for these pins. On one platform this
> could be a kind of "power" GPIO, on others it could be ISOLATE/DISABLE,
> and on yet others it would be more like a reset. In order to make the
> PCIe port aware of the differences we'd have to expose multiple GPIOs in
> DT for context.

Right, but does it matter?

I mean - does this have a different impact on the software? It seems to
me one way or the other all you really need to do is hot-unplug a device
on the right signal one way, and rescan for devices on the other signal?

> The other problem with this is that, in order to avoid the chicken-and-
> egg problem, we need to associate these GPIOs with the root ports,
> because those are the only ones that exist at probe time. All downstream
> devices may not be available because the power/reset/disable pin is not
> asserted/deasserted yet. Now, you could potentially have a switch in the
> downstream hierarchy, so it becomes completely unclear what exact device
> the GPIO is associated with.

Hmm, sort of I guess. I think you need *both* associations really.

On the "disappear" transition, you need it to be linked to a very
specific device. On the "appear" transition, you need the rescan of the
root port right?

> Related to that, a GPIO like this is really only useful if you can make
> use of it. For example you want to assert/deassert this GPIO in order to
> put the WiFi/Ethernet/whatever device into a low-power mode when it is
> not used, right? But in order to do so, the driver for that device needs
> to be able to handle the GPIO, because it is the only one that knows the
> right point in time to toggle it. Conversely, if this was associated
> with the root port, the only point in time where the root port driver
> could toggle it is on a suspend/resume of the entire bus, which makes it
> rather useless.

Depends. If you're talking about rfkill, you have a completely separate
rfkill-gpio device (the "platform rfkill device" I was talking about
earlier), and the driver for the actual wifi NIC isn't actually involved
at all.

> But then we're back to square one where we basically have to associate
> the GPIO with the specific device. I think that's the right thing to do
> because, well, that's what reality is. The GPIO is directly routed to a
> pin on the chip. It's not something that goes over the PCI connector or
> anything. However, we're also back to the chicken-and-egg problem since
> without toggling the GPIO the device might not even get enumerated.
> 
> rfkill-gpio has the advantage that it decouples this and gets us out of
> the chicken-and-egg situation. It also has fairly well-defined semantics
> and fits the use-case, so it's a very appealing option.

Sure, I get that, but I still don't understand why we should link it to
the rfkill rather than the GPIO?

I mean, if we have these things in the platform/DT:

 WIFI-WDISABLE-GPIO
 WIFI-NIC-PCIE
 PCIE-ROOT-PORT
 PLATFORM-WIFI-RFKILL

then we'd describe the

  PLATFORM-WIFI-RFKILL as an rfkill-gpio using the WIFI-WDISABLE-GPIO

and make some sort of link:

 WIFI-WDISABLE-GPIO --enable-rescan-- PCIE-ROOT-PORT
                    --unplug-device-- WIFI-NIC-PCIE

or not?

johannes
Bjorn Helgaas June 19, 2019, 1:38 p.m. UTC | #14
On Tue, Jun 18, 2019 at 02:32:59PM +0200, Johannes Berg wrote:
> On Tue, 2019-06-18 at 12:49 +0200, Thierry Reding wrote:
> 
> > > > > > > > > > 1. WiFi devices provides power-off feature for power saving
> > > > > > > > > > in mobiles.  When WiFi is turned off we shouldn't power on
> > > > > > > > > > the HW back without user turning it back on.
> 
> But why would you disconnect the PCIe device just to power it down?!
> 
> > > > > > > The problem that Manikanta is trying to solve here occurs in
> > > > > > > this situation (Manikanta, correct me if I've got this wrong):
> > > > > > > on some setups, a WiFi module connected over PCI will toggle a
> > > > > > > power GPIO as part of runtime suspend. This effectively causes
> > > > > > > the module to disappear from the PCI bus (i.e. it can no longer
> > > > > > > be accessed until the power GPIO is toggled again).
> > > > > > 
> > > > > > GPIO is toggled as part of WiFi on/off, can be triggered from
> > > > > > network manager UI.
> 
> That's kinda icky, IMHO.
> 
> > > > > > Correct, rfkill switch should handle the GPIO.
> > > > > > Sequence will be,
> > > > > >  - WiFi ON
> > > > > >    - rfkill switch enables the WiFi GPIO
> > > > > >    - Tegra PCIe receives hot plug event
> > > > > >    - Tegra PCIe hot plug driver rescans PCI bus and enumerates the device
> > > > > >    - PCI client driver is probed, which will create network interface
> > > > > >  - WiFi OFF
> > > > > >    - rfkill switch disables the WiFi GPIO
> > > > > >    - Tegra PCIe receives hot unplug event
> > > > > >    - Tegra PCIe hot plug driver removes PCI devices under the bus
> > > > > >    - PCI client driver remove is executed, which will remove
> > > > > >      network interface
> > > > > > We don't need current patch in this case because PCI device is not
> > > > > > present in the PCI hierarchy, so there cannot be EP config access
> > > > > > with link down.  However Tegra doesn't support hot plug and unplug
> > > > > > events. I am not sure if we have any software based hot plug event
> > > > > > trigger.
> 
> Looks reasonable to me.
> 
> I guess if you absolutely know in software when the device is
> present or not, you don't need "real" PCIe hotplug, just need to
> tickle the software right?
> 
> > > > How does rfkill work?  It sounds like it completely removes
> > > > power from the wifi device, putting it in D3cold.  Is there
> > > > any software notification other than the "Slot present pin
> > > > change" (which looks like a Tegra-specific thing)?
> 
> Well, they said above it's a GPIO that controls it, so the software
> already knows and doesn't really need an event?

Forgive my ignorance about rfkill.  At least in this Tegra case, it
sounds like rfkill basically controls a power switch for the entire
device, i.e., it doesn't merely turn off the radio portion of the
device; it puts the entire PCI device in D3cold.

Is rfkill integrated with the power management subsystem?  E.g., when
lspci or X tries to read config space via pci_read_config(), does the
pci_config_pm_runtime_get() in that path wake up the device?

IMO, if the struct pci_dev exists, we should be able to rely on the
device actually being accessible (possibly after bringing it back to
D0).  If rfkill only turns off the radio, leaving the PCI interface
active, that would be fine -- in that case generic PCI things like
lspci would work normally and it would be up to the driver to manage
network-related things.

But if rfkill turns off PCI interface and the power management
subsystem can't wake it up, I think we should unbind the driver and
remove the pci_dev, so it wouldn't appear in lspci at all.

Bjorn
Johannes Berg June 19, 2019, 1:40 p.m. UTC | #15
On Wed, 2019-06-19 at 08:38 -0500, Bjorn Helgaas wrote:

> > > > > How does rfkill work?  It sounds like it completely removes
> > > > > power from the wifi device, putting it in D3cold.  Is there
> > > > > any software notification other than the "Slot present pin
> > > > > change" (which looks like a Tegra-specific thing)?
> > 
> > Well, they said above it's a GPIO that controls it, so the software
> > already knows and doesn't really need an event?
> 
> Forgive my ignorance about rfkill.  At least in this Tegra case, it
> sounds like rfkill basically controls a power switch for the entire
> device, i.e., it doesn't merely turn off the radio portion of the
> device; it puts the entire PCI device in D3cold.

Sort of. The actual (hardware) implementation seems a bit more
complicated than a "power switch", but yes, that's the effect of it.

> Is rfkill integrated with the power management subsystem?  E.g., when
> lspci or X tries to read config space via pci_read_config(), does the
> pci_config_pm_runtime_get() in that path wake up the device?

No, that's the problem at hand AFAICT.

> IMO, if the struct pci_dev exists, we should be able to rely on the
> device actually being accessible (possibly after bringing it back to
> D0).  If rfkill only turns off the radio, leaving the PCI interface
> active, that would be fine -- in that case generic PCI things like
> lspci would work normally and it would be up to the driver to manage
> network-related things.
> 
> But if rfkill turns off PCI interface and the power management
> subsystem can't wake it up, I think we should unbind the driver and
> remove the pci_dev, so it wouldn't appear in lspci at all.

Right. That's being suggested here, but since the platform has no actual
hardware hotplug, that needs to be implemented in software.

The question at hand is *how* to actually achieve that.

I'm kind of arguing that it's not rfkill that achieves it, but the
underlying GPIO that toggles the device, since that GPIO could also be
bound to something other than an rfkill-gpio instance.

johannes
diff mbox series

Patch

diff --git a/drivers/pci/controller/pci-tegra.c b/drivers/pci/controller/pci-tegra.c
index d20c88a79e00..33f4dfab9e35 100644
--- a/drivers/pci/controller/pci-tegra.c
+++ b/drivers/pci/controller/pci-tegra.c
@@ -428,6 +428,14 @@  static inline u32 pads_readl(struct tegra_pcie *pcie, unsigned long offset)
 	return readl(pcie->pads + offset);
 }
 
+static bool tegra_pcie_link_up(struct tegra_pcie_port *port)
+{
+	u32 value;
+
+	value = readl(port->base + RP_LINK_CONTROL_STATUS);
+	return !!(value & RP_LINK_CONTROL_STATUS_DL_LINK_ACTIVE);
+}
+
 /*
  * The configuration space mapping on Tegra is somewhat similar to the ECAM
  * defined by PCIe. However it deviates a bit in how the 4 bits for extended
@@ -493,20 +501,50 @@  static void __iomem *tegra_pcie_map_bus(struct pci_bus *bus,
 static int tegra_pcie_config_read(struct pci_bus *bus, unsigned int devfn,
 				  int where, int size, u32 *value)
 {
+	struct tegra_pcie *pcie = bus->sysdata;
+	struct pci_dev *bridge;
+	struct tegra_pcie_port *port;
+
 	if (bus->number == 0)
 		return pci_generic_config_read32(bus, devfn, where, size,
 						 value);
 
+	bridge = pcie_find_root_port(bus->self);
+
+	list_for_each_entry(port, &pcie->ports, list)
+		if (port->index + 1 == PCI_SLOT(bridge->devfn))
+			break;
+
+	/* If there is no link, then there is no device */
+	if (!tegra_pcie_link_up(port)) {
+		*value = 0xffffffff;
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+
 	return pci_generic_config_read(bus, devfn, where, size, value);
 }
 
 static int tegra_pcie_config_write(struct pci_bus *bus, unsigned int devfn,
 				   int where, int size, u32 value)
 {
+	struct tegra_pcie *pcie = bus->sysdata;
+	struct tegra_pcie_port *port;
+	struct pci_dev *bridge;
+
 	if (bus->number == 0)
 		return pci_generic_config_write32(bus, devfn, where, size,
 						  value);
 
+	bridge = pcie_find_root_port(bus->self);
+
+	list_for_each_entry(port, &pcie->ports, list)
+		if (port->index + 1 == PCI_SLOT(bridge->devfn))
+			break;
+
+	/* If there is no link, then there is no device */
+	if (!tegra_pcie_link_up(port))
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
 	return pci_generic_config_write(bus, devfn, where, size, value);
 }