diff mbox

[RFC,1/4] x86/platform/intel/iosf_mbi: Add a mutex for punit access

Message ID 20170101201403.12132-2-hdegoede@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Hans de Goede Jan. 1, 2017, 8:14 p.m. UTC
The punit on baytrail / cherrytrail systems is not only accessed through
the iosf_mbi functions, but also by the i915 code. Add a mutex to protect
the punit against simultaneous accesses and 2 functions to lock / unlock
this mutex.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 arch/x86/include/asm/iosf_mbi.h    | 19 +++++++++++++++++++
 arch/x86/platform/intel/iosf_mbi.c | 13 +++++++++++++
 2 files changed, 32 insertions(+)

Comments

Ville Syrjälä Jan. 2, 2017, 2:12 p.m. UTC | #1
On Sun, Jan 01, 2017 at 09:14:00PM +0100, Hans de Goede wrote:
> The punit on baytrail / cherrytrail systems is not only accessed through
> the iosf_mbi functions, but also by the i915 code. Add a mutex to protect
> the punit against simultaneous accesses and 2 functions to lock / unlock
> this mutex.

I'm not sure which part of punit you're actually trying to protect
here. Some specific registers?

> 
> Signed-off-by: Hans de Goede <hdegoede@redhat.com>
> ---
>  arch/x86/include/asm/iosf_mbi.h    | 19 +++++++++++++++++++
>  arch/x86/platform/intel/iosf_mbi.c | 13 +++++++++++++
>  2 files changed, 32 insertions(+)
> 
> diff --git a/arch/x86/include/asm/iosf_mbi.h b/arch/x86/include/asm/iosf_mbi.h
> index b41ee16..02963bd 100644
> --- a/arch/x86/include/asm/iosf_mbi.h
> +++ b/arch/x86/include/asm/iosf_mbi.h
> @@ -88,6 +88,21 @@ int iosf_mbi_write(u8 port, u8 opcode, u32 offset, u32 mdr);
>   */
>  int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask);
>  
> +/**
> + * iosf_mbi_punit_lock() - Lock the punit mutex
> + *
> + * This function must be called before accessing the punit or the pmic, be it
> + * through iosf_mbi_* or through other means.
> + *
> + * This function locks a mutex, as such it may sleep.
> + */
> +void iosf_mbi_punit_lock(void);
> +
> +/**
> + * iosf_mbi_punit_unlock() - Unlock the punit mutex
> + */
> +void iosf_mbi_punit_unlock(void);
> +
>  #else /* CONFIG_IOSF_MBI is not enabled */
>  static inline
>  bool iosf_mbi_available(void)
> @@ -115,6 +130,10 @@ int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask)
>  	WARN(1, "IOSF_MBI driver not available");
>  	return -EPERM;
>  }
> +
> +static inline void iosf_mbi_punit_lock(void) {}
> +static inline void iosf_mbi_punit_unlock(void) {}
> +
>  #endif /* CONFIG_IOSF_MBI */
>  
>  #endif /* IOSF_MBI_SYMS_H */
> diff --git a/arch/x86/platform/intel/iosf_mbi.c b/arch/x86/platform/intel/iosf_mbi.c
> index edf2c54..75d8135 100644
> --- a/arch/x86/platform/intel/iosf_mbi.c
> +++ b/arch/x86/platform/intel/iosf_mbi.c
> @@ -34,6 +34,7 @@
>  
>  static struct pci_dev *mbi_pdev;
>  static DEFINE_SPINLOCK(iosf_mbi_lock);
> +static DEFINE_MUTEX(iosf_mbi_punit_mutex);
>  
>  static inline u32 iosf_mbi_form_mcr(u8 op, u8 port, u8 offset)
>  {
> @@ -190,6 +191,18 @@ bool iosf_mbi_available(void)
>  }
>  EXPORT_SYMBOL(iosf_mbi_available);
>  
> +void iosf_mbi_punit_lock(void)
> +{
> +	mutex_lock(&iosf_mbi_punit_mutex);
> +}
> +EXPORT_SYMBOL(iosf_mbi_punit_lock);
> +
> +void iosf_mbi_punit_unlock(void)
> +{
> +	mutex_unlock(&iosf_mbi_punit_mutex);
> +}
> +EXPORT_SYMBOL(iosf_mbi_punit_unlock);
> +
>  #ifdef CONFIG_IOSF_MBI_DEBUG
>  static u32	dbg_mdr;
>  static u32	dbg_mcr;
> -- 
> 2.9.3
Hans de Goede Jan. 2, 2017, 2:21 p.m. UTC | #2
Hi,

On 02-01-17 15:12, Ville Syrjälä wrote:
> On Sun, Jan 01, 2017 at 09:14:00PM +0100, Hans de Goede wrote:
>> The punit on baytrail / cherrytrail systems is not only accessed through
>> the iosf_mbi functions, but also by the i915 code. Add a mutex to protect
>> the punit against simultaneous accesses and 2 functions to lock / unlock
>> this mutex.
>
> I'm not sure which part of punit you're actually trying to protect
> here. Some specific registers?

The theory I'm going by is that for certain actions / certain requests
we send to the punit, the punit needs to access the (axp288) pmic, to
change (or enable / disable) certain voltages.
So it needs to access the pmic i2c bus, but in some cases the kernel
is accessing this itself (e.g. for battery monitoring) and is holding
the punit i2c bus semaphore. At least with CPU-core C-state transitions,
this seems to be happening, if I do read i2c transfers on the pmic
i2c bus repeatedly without blocking the CPU from entering C6 (*) while
accessing the i2c bus my cherrytrail tablet will freeze in 10 - 30
seconds.

The findings of one of the users commenting in:

https://bugzilla.kernel.org/show_bug.cgi?id=155241

Seem to indicate a similar problem with the i915 driver doing
power-management while the i2c-designware-baytrail code is holding
the punit i2c bus semaphore. One would hope that the punit would be
smart enough to simply wait for the semaphore to get released in that
case, but at least for the C6 CPU core transition it seems that allowing
that to happen while holding the semaphore causes a hard crash of the
SoC. So I guess that for explicit requests the punit code assumes that
the OS is not holding the semaphore.

Regards,

Hans



*) which powers off most of the core, so likely causes interaction with
the pmic


>
>>
>> Signed-off-by: Hans de Goede <hdegoede@redhat.com>
>> ---
>>  arch/x86/include/asm/iosf_mbi.h    | 19 +++++++++++++++++++
>>  arch/x86/platform/intel/iosf_mbi.c | 13 +++++++++++++
>>  2 files changed, 32 insertions(+)
>>
>> diff --git a/arch/x86/include/asm/iosf_mbi.h b/arch/x86/include/asm/iosf_mbi.h
>> index b41ee16..02963bd 100644
>> --- a/arch/x86/include/asm/iosf_mbi.h
>> +++ b/arch/x86/include/asm/iosf_mbi.h
>> @@ -88,6 +88,21 @@ int iosf_mbi_write(u8 port, u8 opcode, u32 offset, u32 mdr);
>>   */
>>  int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask);
>>
>> +/**
>> + * iosf_mbi_punit_lock() - Lock the punit mutex
>> + *
>> + * This function must be called before accessing the punit or the pmic, be it
>> + * through iosf_mbi_* or through other means.
>> + *
>> + * This function locks a mutex, as such it may sleep.
>> + */
>> +void iosf_mbi_punit_lock(void);
>> +
>> +/**
>> + * iosf_mbi_punit_unlock() - Unlock the punit mutex
>> + */
>> +void iosf_mbi_punit_unlock(void);
>> +
>>  #else /* CONFIG_IOSF_MBI is not enabled */
>>  static inline
>>  bool iosf_mbi_available(void)
>> @@ -115,6 +130,10 @@ int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask)
>>  	WARN(1, "IOSF_MBI driver not available");
>>  	return -EPERM;
>>  }
>> +
>> +static inline void iosf_mbi_punit_lock(void) {}
>> +static inline void iosf_mbi_punit_unlock(void) {}
>> +
>>  #endif /* CONFIG_IOSF_MBI */
>>
>>  #endif /* IOSF_MBI_SYMS_H */
>> diff --git a/arch/x86/platform/intel/iosf_mbi.c b/arch/x86/platform/intel/iosf_mbi.c
>> index edf2c54..75d8135 100644
>> --- a/arch/x86/platform/intel/iosf_mbi.c
>> +++ b/arch/x86/platform/intel/iosf_mbi.c
>> @@ -34,6 +34,7 @@
>>
>>  static struct pci_dev *mbi_pdev;
>>  static DEFINE_SPINLOCK(iosf_mbi_lock);
>> +static DEFINE_MUTEX(iosf_mbi_punit_mutex);
>>
>>  static inline u32 iosf_mbi_form_mcr(u8 op, u8 port, u8 offset)
>>  {
>> @@ -190,6 +191,18 @@ bool iosf_mbi_available(void)
>>  }
>>  EXPORT_SYMBOL(iosf_mbi_available);
>>
>> +void iosf_mbi_punit_lock(void)
>> +{
>> +	mutex_lock(&iosf_mbi_punit_mutex);
>> +}
>> +EXPORT_SYMBOL(iosf_mbi_punit_lock);
>> +
>> +void iosf_mbi_punit_unlock(void)
>> +{
>> +	mutex_unlock(&iosf_mbi_punit_mutex);
>> +}
>> +EXPORT_SYMBOL(iosf_mbi_punit_unlock);
>> +
>>  #ifdef CONFIG_IOSF_MBI_DEBUG
>>  static u32	dbg_mdr;
>>  static u32	dbg_mcr;
>> --
>> 2.9.3
>
Ville Syrjälä Jan. 13, 2017, 9:26 a.m. UTC | #3
On Mon, Jan 02, 2017 at 03:21:13PM +0100, Hans de Goede wrote:
> Hi,
> 
> On 02-01-17 15:12, Ville Syrjälä wrote:
> > On Sun, Jan 01, 2017 at 09:14:00PM +0100, Hans de Goede wrote:
> >> The punit on baytrail / cherrytrail systems is not only accessed through
> >> the iosf_mbi functions, but also by the i915 code. Add a mutex to protect
> >> the punit against simultaneous accesses and 2 functions to lock / unlock
> >> this mutex.
> >
> > I'm not sure which part of punit you're actually trying to protect
> > here. Some specific registers?
> 
> The theory I'm going by is that for certain actions / certain requests
> we send to the punit, the punit needs to access the (axp288) pmic, to
> change (or enable / disable) certain voltages.

At least for cpu/display/gt voltages that shouldn't really be the case.
The vcc/vnn/vgg rails are controlled via svid, not i2c.

It also feels quite hand wavy since the punit could do whatever at
any time AFAIK. Eg. if there's some thermal event or something the
punit might kick into action. So trying to protect this from the OS
side might not be able to avoid these problems entirely. It feels like
there really should be some kind of shared hardware/firmware mutex
with the punit to arbitrate access to the i2c bus.

> So it needs to access the pmic i2c bus, but in some cases the kernel
> is accessing this itself (e.g. for battery monitoring) and is holding
> the punit i2c bus semaphore. At least with CPU-core C-state transitions,
> this seems to be happening, if I do read i2c transfers on the pmic
> i2c bus repeatedly without blocking the CPU from entering C6 (*) while
> accessing the i2c bus my cherrytrail tablet will freeze in 10 - 30
> seconds.
> 
> The findings of one of the users commenting in:
> 
> https://bugzilla.kernel.org/show_bug.cgi?id=155241
> 
> Seem to indicate a similar problem with the i915 driver doing
> power-management while the i2c-designware-baytrail code is holding
> the punit i2c bus semaphore. One would hope that the punit would be
> smart enough to simply wait for the semaphore to get released in that
> case, but at least for the C6 CPU core transition it seems that allowing
> that to happen while holding the semaphore causes a hard crash of the
> SoC. So I guess that for explicit requests the punit code assumes that
> the OS is not holding the semaphore.
> 
> Regards,
> 
> Hans
> 
> 
> 
> *) which powers off most of the core, so likely causes interaction with
> the pmic
> 
> 
> >
> >>
> >> Signed-off-by: Hans de Goede <hdegoede@redhat.com>
> >> ---
> >>  arch/x86/include/asm/iosf_mbi.h    | 19 +++++++++++++++++++
> >>  arch/x86/platform/intel/iosf_mbi.c | 13 +++++++++++++
> >>  2 files changed, 32 insertions(+)
> >>
> >> diff --git a/arch/x86/include/asm/iosf_mbi.h b/arch/x86/include/asm/iosf_mbi.h
> >> index b41ee16..02963bd 100644
> >> --- a/arch/x86/include/asm/iosf_mbi.h
> >> +++ b/arch/x86/include/asm/iosf_mbi.h
> >> @@ -88,6 +88,21 @@ int iosf_mbi_write(u8 port, u8 opcode, u32 offset, u32 mdr);
> >>   */
> >>  int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask);
> >>
> >> +/**
> >> + * iosf_mbi_punit_lock() - Lock the punit mutex
> >> + *
> >> + * This function must be called before accessing the punit or the pmic, be it
> >> + * through iosf_mbi_* or through other means.
> >> + *
> >> + * This function locks a mutex, as such it may sleep.
> >> + */
> >> +void iosf_mbi_punit_lock(void);
> >> +
> >> +/**
> >> + * iosf_mbi_punit_unlock() - Unlock the punit mutex
> >> + */
> >> +void iosf_mbi_punit_unlock(void);
> >> +
> >>  #else /* CONFIG_IOSF_MBI is not enabled */
> >>  static inline
> >>  bool iosf_mbi_available(void)
> >> @@ -115,6 +130,10 @@ int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask)
> >>  	WARN(1, "IOSF_MBI driver not available");
> >>  	return -EPERM;
> >>  }
> >> +
> >> +static inline void iosf_mbi_punit_lock(void) {}
> >> +static inline void iosf_mbi_punit_unlock(void) {}
> >> +
> >>  #endif /* CONFIG_IOSF_MBI */
> >>
> >>  #endif /* IOSF_MBI_SYMS_H */
> >> diff --git a/arch/x86/platform/intel/iosf_mbi.c b/arch/x86/platform/intel/iosf_mbi.c
> >> index edf2c54..75d8135 100644
> >> --- a/arch/x86/platform/intel/iosf_mbi.c
> >> +++ b/arch/x86/platform/intel/iosf_mbi.c
> >> @@ -34,6 +34,7 @@
> >>
> >>  static struct pci_dev *mbi_pdev;
> >>  static DEFINE_SPINLOCK(iosf_mbi_lock);
> >> +static DEFINE_MUTEX(iosf_mbi_punit_mutex);
> >>
> >>  static inline u32 iosf_mbi_form_mcr(u8 op, u8 port, u8 offset)
> >>  {
> >> @@ -190,6 +191,18 @@ bool iosf_mbi_available(void)
> >>  }
> >>  EXPORT_SYMBOL(iosf_mbi_available);
> >>
> >> +void iosf_mbi_punit_lock(void)
> >> +{
> >> +	mutex_lock(&iosf_mbi_punit_mutex);
> >> +}
> >> +EXPORT_SYMBOL(iosf_mbi_punit_lock);
> >> +
> >> +void iosf_mbi_punit_unlock(void)
> >> +{
> >> +	mutex_unlock(&iosf_mbi_punit_mutex);
> >> +}
> >> +EXPORT_SYMBOL(iosf_mbi_punit_unlock);
> >> +
> >>  #ifdef CONFIG_IOSF_MBI_DEBUG
> >>  static u32	dbg_mdr;
> >>  static u32	dbg_mcr;
> >> --
> >> 2.9.3
> >
Jarkko Nikula Jan. 13, 2017, 10:34 a.m. UTC | #4
On 01/13/2017 11:26 AM, Ville Syrjälä wrote:
> It also feels quite hand wavy since the punit could do whatever at
> any time AFAIK. Eg. if there's some thermal event or something the
> punit might kick into action. So trying to protect this from the OS
> side might not be able to avoid these problems entirely. It feels like
> there really should be some kind of shared hardware/firmware mutex
> with the punit to arbitrate access to the i2c bus.
>
There is an HW semaphore for I2C access. It is implemented in 
drivers/i2c/busses/i2c-designware-baytrail.c and another set from Hans 
is adding support for Cherrytrail into it.
Ville Syrjälä Jan. 13, 2017, 10:51 a.m. UTC | #5
On Fri, Jan 13, 2017 at 12:34:54PM +0200, Jarkko Nikula wrote:
> On 01/13/2017 11:26 AM, Ville Syrjälä wrote:
> > It also feels quite hand wavy since the punit could do whatever at
> > any time AFAIK. Eg. if there's some thermal event or something the
> > punit might kick into action. So trying to protect this from the OS
> > side might not be able to avoid these problems entirely. It feels like
> > there really should be some kind of shared hardware/firmware mutex
> > with the punit to arbitrate access to the i2c bus.
> >
> There is an HW semaphore for I2C access. It is implemented in 
> drivers/i2c/busses/i2c-designware-baytrail.c and another set from Hans 
> is adding support for Cherrytrail into it.

Then why do we need anything else?
Jarkko Nikula Jan. 13, 2017, 11:12 a.m. UTC | #6
On 01/13/2017 12:51 PM, Ville Syrjälä wrote:
> On Fri, Jan 13, 2017 at 12:34:54PM +0200, Jarkko Nikula wrote:
>> On 01/13/2017 11:26 AM, Ville Syrjälä wrote:
>>> It also feels quite hand wavy since the punit could do whatever at
>>> any time AFAIK. Eg. if there's some thermal event or something the
>>> punit might kick into action. So trying to protect this from the OS
>>> side might not be able to avoid these problems entirely. It feels like
>>> there really should be some kind of shared hardware/firmware mutex
>>> with the punit to arbitrate access to the i2c bus.
>>>
>> There is an HW semaphore for I2C access. It is implemented in
>> drivers/i2c/busses/i2c-designware-baytrail.c and another set from Hans
>> is adding support for Cherrytrail into it.
>
> Then why do we need anything else?
>
 From this patch: "The punit on baytrail / cherrytrail systems is not 
only accessed through the iosf_mbi functions, but also by the i915 code."
Ville Syrjälä Jan. 13, 2017, 12:20 p.m. UTC | #7
On Fri, Jan 13, 2017 at 01:12:15PM +0200, Jarkko Nikula wrote:
> On 01/13/2017 12:51 PM, Ville Syrjälä wrote:
> > On Fri, Jan 13, 2017 at 12:34:54PM +0200, Jarkko Nikula wrote:
> >> On 01/13/2017 11:26 AM, Ville Syrjälä wrote:
> >>> It also feels quite hand wavy since the punit could do whatever at
> >>> any time AFAIK. Eg. if there's some thermal event or something the
> >>> punit might kick into action. So trying to protect this from the OS
> >>> side might not be able to avoid these problems entirely. It feels like
> >>> there really should be some kind of shared hardware/firmware mutex
> >>> with the punit to arbitrate access to the i2c bus.
> >>>
> >> There is an HW semaphore for I2C access. It is implemented in
> >> drivers/i2c/busses/i2c-designware-baytrail.c and another set from Hans
> >> is adding support for Cherrytrail into it.
> >
> > Then why do we need anything else?
> >
>  From this patch: "The punit on baytrail / cherrytrail systems is not 
> only accessed through the iosf_mbi functions, but also by the i915 code."

I don't see how that's relevant at all. Multiple things accessing the
punit concurrently should be perfectly fine as long as they don't frob
the same registers.
Hans de Goede Jan. 13, 2017, 4:06 p.m. UTC | #8
Hi,

On 01/13/2017 10:26 AM, Ville Syrjälä wrote:
> On Mon, Jan 02, 2017 at 03:21:13PM +0100, Hans de Goede wrote:
>> Hi,
>>
>> On 02-01-17 15:12, Ville Syrjälä wrote:
>>> On Sun, Jan 01, 2017 at 09:14:00PM +0100, Hans de Goede wrote:
>>>> The punit on baytrail / cherrytrail systems is not only accessed through
>>>> the iosf_mbi functions, but also by the i915 code. Add a mutex to protect
>>>> the punit against simultaneous accesses and 2 functions to lock / unlock
>>>> this mutex.
>>>
>>> I'm not sure which part of punit you're actually trying to protect
>>> here. Some specific registers?
>>
>> The theory I'm going by is that for certain actions / certain requests
>> we send to the punit, the punit needs to access the (axp288) pmic, to
>> change (or enable / disable) certain voltages.
>
> At least for cpu/display/gt voltages that shouldn't really be the case.
> The vcc/vnn/vgg rails are controlled via svid, not i2c.

Are you sure? The ax288 pmic does not have a svid interface, only
an i2c interface, and AFAICT its buck DCDC converters are used to
feed all of these.

> It also feels quite hand wavy since the punit could do whatever at
> any time AFAIK. Eg. if there's some thermal event or something the
> punit might kick into action. So trying to protect this from the OS
> side might not be able to avoid these problems entirely. It feels like
> there really should be some kind of shared hardware/firmware mutex
> with the punit to arbitrate access to the i2c bus.

Right, and there is such a mutex (which only gets used on systems
with an axp288 pmic...) and we are taking this mutex before starting
an i2c transaction on the pmic i2c bus. But this does not seem to be
enough. It seems the the punit does not check the mutex before
certain OS / host triggered actions. I guess it expects the host to
do this itself.

Please see my new (non RFC) version of this series I've posted.

There are at least 2 problems when relying solely on the punit
pmic i2c bus sempaphore:

1) CPU C1 <-> C6 transations happening while the pmic i2c bus
is being accessed by the host cause the system to hang
2) i915 (runtime) suspend resume fails every other attempt
with timeouts when trying to get a forcewake lock inn i915,
often followed by a system freeze shortly after this.

My non RFC version of this patch-set fixes both.

So summarizing yes you are right that there should be some
hardware mutex (there is, and we are already taking it), but
unfortunately that does not seem to be enough, when explicitly
requesting some power state transation, while another driver
is acccessing the pmic i2c bus bad things happen. Adding
some exclusion mechanism here seems to be necessary.

Note that the i2c acccess vs i915 forcewake issues was
first reported by an user who was trying my patches for
fixing pmic i2c access on cht (fixing the semaphore code on
cht) and then noticed a problem with the i915 driver as soon
as the pmic i2c bus was used. He has also confirmed that the
new non RFC version of my patches fix this.

Regards,

Hans
Ville Syrjälä Jan. 13, 2017, 4:30 p.m. UTC | #9
On Fri, Jan 13, 2017 at 05:06:52PM +0100, Hans de Goede wrote:
> Hi,
> 
> On 01/13/2017 10:26 AM, Ville Syrjälä wrote:
> > On Mon, Jan 02, 2017 at 03:21:13PM +0100, Hans de Goede wrote:
> >> Hi,
> >>
> >> On 02-01-17 15:12, Ville Syrjälä wrote:
> >>> On Sun, Jan 01, 2017 at 09:14:00PM +0100, Hans de Goede wrote:
> >>>> The punit on baytrail / cherrytrail systems is not only accessed through
> >>>> the iosf_mbi functions, but also by the i915 code. Add a mutex to protect
> >>>> the punit against simultaneous accesses and 2 functions to lock / unlock
> >>>> this mutex.
> >>>
> >>> I'm not sure which part of punit you're actually trying to protect
> >>> here. Some specific registers?
> >>
> >> The theory I'm going by is that for certain actions / certain requests
> >> we send to the punit, the punit needs to access the (axp288) pmic, to
> >> change (or enable / disable) certain voltages.
> >
> > At least for cpu/display/gt voltages that shouldn't really be the case.
> > The vcc/vnn/vgg rails are controlled via svid, not i2c.
> 
> Are you sure? The ax288 pmic does not have a svid interface, only
> an i2c interface, and AFAICT its buck DCDC converters are used to
> feed all of these.

Yes, looks like you're right. I guess someone didn't want to spend three
pins for svid.

> 
> > It also feels quite hand wavy since the punit could do whatever at
> > any time AFAIK. Eg. if there's some thermal event or something the
> > punit might kick into action. So trying to protect this from the OS
> > side might not be able to avoid these problems entirely. It feels like
> > there really should be some kind of shared hardware/firmware mutex
> > with the punit to arbitrate access to the i2c bus.
> 
> Right, and there is such a mutex (which only gets used on systems
> with an axp288 pmic...) and we are taking this mutex before starting
> an i2c transaction on the pmic i2c bus. But this does not seem to be
> enough. It seems the the punit does not check the mutex before
> certain OS / host triggered actions. I guess it expects the host to
> do this itself.
> 
> Please see my new (non RFC) version of this series I've posted.
> 
> There are at least 2 problems when relying solely on the punit
> pmic i2c bus sempaphore:
> 
> 1) CPU C1 <-> C6 transations happening while the pmic i2c bus
> is being accessed by the host cause the system to hang
> 2) i915 (runtime) suspend resume fails every other attempt
> with timeouts when trying to get a forcewake lock inn i915,
> often followed by a system freeze shortly after this.

Hmm. But forcewake works at other times? That seems quite strange.
Runtime suspend itself shouldn't really do much, and if we're still
poking at the the hw then we haven't really even suspended anything
yet, so having failing forcewake doesn't sounds at all good.
Hans de Goede Jan. 15, 2017, 11:10 a.m. UTC | #10
Hi,

On 13-01-17 17:30, Ville Syrjälä wrote:
> On Fri, Jan 13, 2017 at 05:06:52PM +0100, Hans de Goede wrote:
>> Hi,
>>
>> On 01/13/2017 10:26 AM, Ville Syrjälä wrote:
>>> On Mon, Jan 02, 2017 at 03:21:13PM +0100, Hans de Goede wrote:
>>>> Hi,
>>>>
>>>> On 02-01-17 15:12, Ville Syrjälä wrote:
>>>>> On Sun, Jan 01, 2017 at 09:14:00PM +0100, Hans de Goede wrote:
>>>>>> The punit on baytrail / cherrytrail systems is not only accessed through
>>>>>> the iosf_mbi functions, but also by the i915 code. Add a mutex to protect
>>>>>> the punit against simultaneous accesses and 2 functions to lock / unlock
>>>>>> this mutex.
>>>>>
>>>>> I'm not sure which part of punit you're actually trying to protect
>>>>> here. Some specific registers?
>>>>
>>>> The theory I'm going by is that for certain actions / certain requests
>>>> we send to the punit, the punit needs to access the (axp288) pmic, to
>>>> change (or enable / disable) certain voltages.
>>>
>>> At least for cpu/display/gt voltages that shouldn't really be the case.
>>> The vcc/vnn/vgg rails are controlled via svid, not i2c.
>>
>> Are you sure? The ax288 pmic does not have a svid interface, only
>> an i2c interface, and AFAICT its buck DCDC converters are used to
>> feed all of these.
>
> Yes, looks like you're right. I guess someone didn't want to spend three
> pins for svid.
>
>>
>>> It also feels quite hand wavy since the punit could do whatever at
>>> any time AFAIK. Eg. if there's some thermal event or something the
>>> punit might kick into action. So trying to protect this from the OS
>>> side might not be able to avoid these problems entirely. It feels like
>>> there really should be some kind of shared hardware/firmware mutex
>>> with the punit to arbitrate access to the i2c bus.
>>
>> Right, and there is such a mutex (which only gets used on systems
>> with an axp288 pmic...) and we are taking this mutex before starting
>> an i2c transaction on the pmic i2c bus. But this does not seem to be
>> enough. It seems the the punit does not check the mutex before
>> certain OS / host triggered actions. I guess it expects the host to
>> do this itself.
>>
>> Please see my new (non RFC) version of this series I've posted.
>>
>> There are at least 2 problems when relying solely on the punit
>> pmic i2c bus sempaphore:
>>
>> 1) CPU C1 <-> C6 transations happening while the pmic i2c bus
>> is being accessed by the host cause the system to hang
>> 2) i915 (runtime) suspend resume fails every other attempt
>> with timeouts when trying to get a forcewake lock inn i915,
>> often followed by a system freeze shortly after this.
>
> Hmm. But forcewake works at other times?

It depends on the workload, I believe the forcewake timeouts are
caused by e.g. the axp288 fuel-gauge driver directly accessing
the pmic i2c bus at the same time as the i915 driver is doing a
forcewake. So in essence this is race and as such not 100%
reproducible. With my workload (Fedora 25 with gnome3) full suspend
+ resume is a good way to reproduce. The bug reporter
(tagorereddy) in:

https://bugzilla.kernel.org/show_bug.cgi?id=155241

Is seeing this during normal use when using a kde / plasma desktop.

Some history, this problem started surfacing when I fixed the
i2c punit semaphore code in i2c-designware-baytrail.c to actually
work on cht, before that systems with an axp288 any attempt to
access the i2c bus by e.g. the axp288_fuel_gauge driver would result
in -ETIMEOUT as the code would fail to acquire the punit i2c bus
semaphore, this i2c-designware-baytrail.c cht bug has so far protected
users against the described race (*).

tagorereddy then tried my patches to get battery monitoring working
on his cht device. Then he reported back in the above bug that he
was getting forcewake timeouts + system hangs. I only noticed I could
reproduce them myself on resume later (which was quite useful in
actually developing the proposed fix).

 > That seems quite strange.
> Runtime suspend itself shouldn't really do much, and if we're still
> poking at the the hw then we haven't really even suspended anything
> yet, so having failing forcewake doesn't sounds at all good.

Sorry, I'm actually seeing these on a (full not runtime) resume,
not suspend, it seems that at resume my setup has the ideal
circumstances to hit the race.

Regards,

Hans


*) Note as described in the cover letter of the non RFC version of
this patch-set:

https://www.spinics.net/lists/dri-devel/msg128896.html

Disabling access to the pmic i2c bus (as the fixed bug does) is
not a workable solution:

"Unfortunately that will cause some major issues on affected devices:
-No battery monitoring
-No "AC" plugged in monitoring
-If booted with a normal USB-A -> micro-USB cable, or no cable, plugged
  in and then the user replaces the cable with an otg USB-host cable /
  adapter, the id-pin shorting will enable a 5v boost convertor, but we
  need to disable the pmic's USB-Vbus path otherwise it will start drawing
  current from the boost convertor, leading to aprox 300mA of extra
  battery drain, this is done by the axp288_charger driver, which needs
  direct i2c access to the pmic bus"
diff mbox

Patch

diff --git a/arch/x86/include/asm/iosf_mbi.h b/arch/x86/include/asm/iosf_mbi.h
index b41ee16..02963bd 100644
--- a/arch/x86/include/asm/iosf_mbi.h
+++ b/arch/x86/include/asm/iosf_mbi.h
@@ -88,6 +88,21 @@  int iosf_mbi_write(u8 port, u8 opcode, u32 offset, u32 mdr);
  */
 int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask);
 
+/**
+ * iosf_mbi_punit_lock() - Lock the punit mutex
+ *
+ * This function must be called before accessing the punit or the pmic, be it
+ * through iosf_mbi_* or through other means.
+ *
+ * This function locks a mutex, as such it may sleep.
+ */
+void iosf_mbi_punit_lock(void);
+
+/**
+ * iosf_mbi_punit_unlock() - Unlock the punit mutex
+ */
+void iosf_mbi_punit_unlock(void);
+
 #else /* CONFIG_IOSF_MBI is not enabled */
 static inline
 bool iosf_mbi_available(void)
@@ -115,6 +130,10 @@  int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask)
 	WARN(1, "IOSF_MBI driver not available");
 	return -EPERM;
 }
+
+static inline void iosf_mbi_punit_lock(void) {}
+static inline void iosf_mbi_punit_unlock(void) {}
+
 #endif /* CONFIG_IOSF_MBI */
 
 #endif /* IOSF_MBI_SYMS_H */
diff --git a/arch/x86/platform/intel/iosf_mbi.c b/arch/x86/platform/intel/iosf_mbi.c
index edf2c54..75d8135 100644
--- a/arch/x86/platform/intel/iosf_mbi.c
+++ b/arch/x86/platform/intel/iosf_mbi.c
@@ -34,6 +34,7 @@ 
 
 static struct pci_dev *mbi_pdev;
 static DEFINE_SPINLOCK(iosf_mbi_lock);
+static DEFINE_MUTEX(iosf_mbi_punit_mutex);
 
 static inline u32 iosf_mbi_form_mcr(u8 op, u8 port, u8 offset)
 {
@@ -190,6 +191,18 @@  bool iosf_mbi_available(void)
 }
 EXPORT_SYMBOL(iosf_mbi_available);
 
+void iosf_mbi_punit_lock(void)
+{
+	mutex_lock(&iosf_mbi_punit_mutex);
+}
+EXPORT_SYMBOL(iosf_mbi_punit_lock);
+
+void iosf_mbi_punit_unlock(void)
+{
+	mutex_unlock(&iosf_mbi_punit_mutex);
+}
+EXPORT_SYMBOL(iosf_mbi_punit_unlock);
+
 #ifdef CONFIG_IOSF_MBI_DEBUG
 static u32	dbg_mdr;
 static u32	dbg_mcr;