diff mbox series

[v2] cx23885: only reset DMA on problematic CPUs

Message ID 1545173976-16992-1-git-send-email-brad@nextdimension.cc (mailing list archive)
State New, archived
Headers show
Series [v2] cx23885: only reset DMA on problematic CPUs | expand

Commit Message

Brad Love Dec. 18, 2018, 10:59 p.m. UTC
It is reported that commit 95f408bbc4e4 ("media: cx23885: Ryzen DMA
related RiSC engine stall fixes") caused regresssions with other CPUs.

Ensure that the quirk will be applied only for the CPUs that
are known to cause problems.

A module option is added for explicit control of the behaviour.

Fixes: 95f408bbc4e4 ("media: cx23885: Ryzen DMA related RiSC engine stall fixes")

Signed-off-by: Brad Love <brad@nextdimension.cc>
---
Changes since v1:
- Added module option for three way control
- Removed '7' from pci id description, Ryzen 3 is the same id

 drivers/media/pci/cx23885/cx23885-core.c | 54 ++++++++++++++++++++++++++++++--
 drivers/media/pci/cx23885/cx23885.h      |  2 ++
 2 files changed, 54 insertions(+), 2 deletions(-)

Comments

Alex Deucher Dec. 18, 2018, 11:49 p.m. UTC | #1
On Tue, Dec 18, 2018 at 5:59 PM Brad Love <brad@nextdimension.cc> wrote:
>
> It is reported that commit 95f408bbc4e4 ("media: cx23885: Ryzen DMA
> related RiSC engine stall fixes") caused regresssions with other CPUs.
>
> Ensure that the quirk will be applied only for the CPUs that
> are known to cause problems.
>
> A module option is added for explicit control of the behaviour.
>
> Fixes: 95f408bbc4e4 ("media: cx23885: Ryzen DMA related RiSC engine stall fixes")
>
> Signed-off-by: Brad Love <brad@nextdimension.cc>
> ---
> Changes since v1:
> - Added module option for three way control
> - Removed '7' from pci id description, Ryzen 3 is the same id
>
>  drivers/media/pci/cx23885/cx23885-core.c | 54 ++++++++++++++++++++++++++++++--
>  drivers/media/pci/cx23885/cx23885.h      |  2 ++
>  2 files changed, 54 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/media/pci/cx23885/cx23885-core.c b/drivers/media/pci/cx23885/cx23885-core.c
> index 39804d8..fb721c7 100644
> --- a/drivers/media/pci/cx23885/cx23885-core.c
> +++ b/drivers/media/pci/cx23885/cx23885-core.c
> @@ -23,6 +23,7 @@
>  #include <linux/moduleparam.h>
>  #include <linux/kmod.h>
>  #include <linux/kernel.h>
> +#include <linux/pci.h>
>  #include <linux/slab.h>
>  #include <linux/interrupt.h>
>  #include <linux/delay.h>
> @@ -41,6 +42,18 @@ MODULE_AUTHOR("Steven Toth <stoth@linuxtv.org>");
>  MODULE_LICENSE("GPL");
>  MODULE_VERSION(CX23885_VERSION);
>
> +/*
> + * Some platforms have been found to require periodic resetting of the DMA
> + * engine. Ryzen and XEON platforms are known to be affected. The symptom
> + * encountered is "mpeg risc op code error". Only Ryzen platforms employ
> + * this workaround if the option equals 1. The workaround can be explicitly
> + * disabled for all platforms by setting to 0, the workaround can be forced
> + * on for any platform by setting to 2.
> + */
> +static unsigned int dma_reset_workaround = 1;
> +module_param(dma_reset_workaround, int, 0644);
> +MODULE_PARM_DESC(dma_reset_workaround, "periodic RiSC dma engine reset; 0-force disable, 1-driver detect (default), 2-force enable");
> +
>  static unsigned int debug;
>  module_param(debug, int, 0644);
>  MODULE_PARM_DESC(debug, "enable debug messages");
> @@ -603,8 +616,13 @@ static void cx23885_risc_disasm(struct cx23885_tsport *port,
>
>  static void cx23885_clear_bridge_error(struct cx23885_dev *dev)
>  {
> -       uint32_t reg1_val = cx_read(TC_REQ); /* read-only */
> -       uint32_t reg2_val = cx_read(TC_REQ_SET);
> +       uint32_t reg1_val, reg2_val;
> +
> +       if (!dev->need_dma_reset)
> +               return;
> +
> +       reg1_val = cx_read(TC_REQ); /* read-only */
> +       reg2_val = cx_read(TC_REQ_SET);
>
>         if (reg1_val && reg2_val) {
>                 cx_write(TC_REQ, reg1_val);
> @@ -2058,6 +2076,36 @@ void cx23885_gpio_enable(struct cx23885_dev *dev, u32 mask, int asoutput)
>         /* TODO: 23-19 */
>  }
>
> +static struct {
> +       int vendor, dev;
> +} const broken_dev_id[] = {
> +       /* According with
> +        * https://openbenchmarking.org/system/1703021-RI-AMDZEN08075/Ryzen%207%201800X/lspci,
> +        * 0x1451 is PCI ID for the IOMMU found on Ryzen
> +        */
> +       { PCI_VENDOR_ID_AMD, 0x1451 },

Does this issue only happen with the IOMMU is enabled?  Is it only for
p2p transfers?  Until recently the DMA and PCI subsystems didn't
actually support p2p properly when the IOMMU was enabled.  that might
explain some of the issues.  Additionally, if you match based on the
IOMMU id, you won't match if the user disables the IOMMU in the sbios.
Is this only an issue with the IOMMU enabled?

Alex

> +};
> +
> +static bool cx23885_does_need_dma_reset(void)
> +{
> +       int i;
> +       struct pci_dev *pdev = NULL;
> +
> +       if (dma_reset_workaround == 0)
> +               return false;
> +       else if (dma_reset_workaround == 2)
> +               return true;
> +
> +       for (i = 0; i < sizeof(broken_dev_id); i++) {
> +               pdev = pci_get_device(broken_dev_id[i].vendor, broken_dev_id[i].dev, NULL);
> +               if (pdev) {
> +                       pci_dev_put(pdev);
> +                       return true;
> +               }
> +       }
> +       return false;
> +}
> +
>  static int cx23885_initdev(struct pci_dev *pci_dev,
>                            const struct pci_device_id *pci_id)
>  {
> @@ -2069,6 +2117,8 @@ static int cx23885_initdev(struct pci_dev *pci_dev,
>         if (NULL == dev)
>                 return -ENOMEM;
>
> +       dev->need_dma_reset = cx23885_does_need_dma_reset();
> +
>         err = v4l2_device_register(&pci_dev->dev, &dev->v4l2_dev);
>         if (err < 0)
>                 goto fail_free;
> diff --git a/drivers/media/pci/cx23885/cx23885.h b/drivers/media/pci/cx23885/cx23885.h
> index d54c7ee..cf965ef 100644
> --- a/drivers/media/pci/cx23885/cx23885.h
> +++ b/drivers/media/pci/cx23885/cx23885.h
> @@ -451,6 +451,8 @@ struct cx23885_dev {
>         /* Analog raw audio */
>         struct cx23885_audio_dev   *audio_dev;
>
> +       /* Does the system require periodic DMA resets? */
> +       unsigned int            need_dma_reset:1;
>  };
>
>  static inline struct cx23885_dev *to_cx23885(struct v4l2_device *v4l2_dev)
> --
> 2.7.4
>
Matthias Schwarzott Dec. 19, 2018, 11:08 a.m. UTC | #2
Am 18.12.18 um 23:59 schrieb Brad Love:
> It is reported that commit 95f408bbc4e4 ("media: cx23885: Ryzen DMA
> related RiSC engine stall fixes") caused regresssions with other CPUs.
> 
> Ensure that the quirk will be applied only for the CPUs that
> are known to cause problems.
> 
> A module option is added for explicit control of the behaviour.
> 
> Fixes: 95f408bbc4e4 ("media: cx23885: Ryzen DMA related RiSC engine stall fixes")
> 
> Signed-off-by: Brad Love <brad@nextdimension.cc>

Hi Brad,
I found one issue. See below.

Regards
Matthias

> ---
> Changes since v1:
> - Added module option for three way control
> - Removed '7' from pci id description, Ryzen 3 is the same id
> 
>  drivers/media/pci/cx23885/cx23885-core.c | 54 ++++++++++++++++++++++++++++++--
>  drivers/media/pci/cx23885/cx23885.h      |  2 ++
>  2 files changed, 54 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/media/pci/cx23885/cx23885-core.c b/drivers/media/pci/cx23885/cx23885-core.c
> index 39804d8..fb721c7 100644
> --- a/drivers/media/pci/cx23885/cx23885-core.c
> +++ b/drivers/media/pci/cx23885/cx23885-core.c
...
> @@ -2058,6 +2076,36 @@ void cx23885_gpio_enable(struct cx23885_dev *dev, u32 mask, int asoutput)
>  	/* TODO: 23-19 */
>  }
>  
> +static struct {
> +	int vendor, dev;
> +} const broken_dev_id[] = {
> +	/* According with
> +	 * https://openbenchmarking.org/system/1703021-RI-AMDZEN08075/Ryzen%207%201800X/lspci,
> +	 * 0x1451 is PCI ID for the IOMMU found on Ryzen
> +	 */
> +	{ PCI_VENDOR_ID_AMD, 0x1451 },
> +};
> +
> +static bool cx23885_does_need_dma_reset(void)
> +{
> +	int i;
> +	struct pci_dev *pdev = NULL;
> +
> +	if (dma_reset_workaround == 0)
> +		return false;
> +	else if (dma_reset_workaround == 2)
> +		return true;
> +
> +	for (i = 0; i < sizeof(broken_dev_id); i++) {
This is broken. sizeof delivers the size in bytes, not in number of
array elements. ARRAY_SIZE is what you want.

> +		pdev = pci_get_device(broken_dev_id[i].vendor, broken_dev_id[i].dev, NULL);
> +		if (pdev) {
> +			pci_dev_put(pdev);
> +			return true;
> +		}
> +	}
> +	return false;
> +}
> +
>  static int cx23885_initdev(struct pci_dev *pci_dev,
>  			   const struct pci_device_id *pci_id)
>  {
> @@ -2069,6 +2117,8 @@ static int cx23885_initdev(struct pci_dev *pci_dev,
>  	if (NULL == dev)
>  		return -ENOMEM;
>  
> +	dev->need_dma_reset = cx23885_does_need_dma_reset();
> +
>  	err = v4l2_device_register(&pci_dev->dev, &dev->v4l2_dev);
>  	if (err < 0)
>  		goto fail_free;
> diff --git a/drivers/media/pci/cx23885/cx23885.h b/drivers/media/pci/cx23885/cx23885.h
> index d54c7ee..cf965ef 100644
> --- a/drivers/media/pci/cx23885/cx23885.h
> +++ b/drivers/media/pci/cx23885/cx23885.h
> @@ -451,6 +451,8 @@ struct cx23885_dev {
>  	/* Analog raw audio */
>  	struct cx23885_audio_dev   *audio_dev;
>  
> +	/* Does the system require periodic DMA resets? */
> +	unsigned int		need_dma_reset:1;
>  };
>  
>  static inline struct cx23885_dev *to_cx23885(struct v4l2_device *v4l2_dev)
>
Brad Love Dec. 19, 2018, 5:09 p.m. UTC | #3
On 19/12/2018 05.08, Matthias Schwarzott wrote:
> Am 18.12.18 um 23:59 schrieb Brad Love:
>> It is reported that commit 95f408bbc4e4 ("media: cx23885: Ryzen DMA
>> related RiSC engine stall fixes") caused regresssions with other CPUs.
>>
>> Ensure that the quirk will be applied only for the CPUs that
>> are known to cause problems.
>>
>> A module option is added for explicit control of the behaviour.
>>
>> Fixes: 95f408bbc4e4 ("media: cx23885: Ryzen DMA related RiSC engine stall fixes")
>>
>> Signed-off-by: Brad Love <brad@nextdimension.cc>
> Hi Brad,
> I found one issue. See below.
>
> Regards
> Matthias


Thanks for the catch Matthias, v3 submitted.

Cheers,

Brad



>
>> ---
>> Changes since v1:
>> - Added module option for three way control
>> - Removed '7' from pci id description, Ryzen 3 is the same id
>>
>>  drivers/media/pci/cx23885/cx23885-core.c | 54 ++++++++++++++++++++++++++++++--
>>  drivers/media/pci/cx23885/cx23885.h      |  2 ++
>>  2 files changed, 54 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/media/pci/cx23885/cx23885-core.c b/drivers/media/pci/cx23885/cx23885-core.c
>> index 39804d8..fb721c7 100644
>> --- a/drivers/media/pci/cx23885/cx23885-core.c
>> +++ b/drivers/media/pci/cx23885/cx23885-core.c
> ...
>> @@ -2058,6 +2076,36 @@ void cx23885_gpio_enable(struct cx23885_dev *dev, u32 mask, int asoutput)
>>  	/* TODO: 23-19 */
>>  }
>>  
>> +static struct {
>> +	int vendor, dev;
>> +} const broken_dev_id[] = {
>> +	/* According with
>> +	 * https://openbenchmarking.org/system/1703021-RI-AMDZEN08075/Ryzen%207%201800X/lspci,
>> +	 * 0x1451 is PCI ID for the IOMMU found on Ryzen
>> +	 */
>> +	{ PCI_VENDOR_ID_AMD, 0x1451 },
>> +};
>> +
>> +static bool cx23885_does_need_dma_reset(void)
>> +{
>> +	int i;
>> +	struct pci_dev *pdev = NULL;
>> +
>> +	if (dma_reset_workaround == 0)
>> +		return false;
>> +	else if (dma_reset_workaround == 2)
>> +		return true;
>> +
>> +	for (i = 0; i < sizeof(broken_dev_id); i++) {
> This is broken. sizeof delivers the size in bytes, not in number of
> array elements. ARRAY_SIZE is what you want.
>
>> +		pdev = pci_get_device(broken_dev_id[i].vendor, broken_dev_id[i].dev, NULL);
>> +		if (pdev) {
>> +			pci_dev_put(pdev);
>> +			return true;
>> +		}
>> +	}
>> +	return false;
>> +}
>> +
>>  static int cx23885_initdev(struct pci_dev *pci_dev,
>>  			   const struct pci_device_id *pci_id)
>>  {
>> @@ -2069,6 +2117,8 @@ static int cx23885_initdev(struct pci_dev *pci_dev,
>>  	if (NULL == dev)
>>  		return -ENOMEM;
>>  
>> +	dev->need_dma_reset = cx23885_does_need_dma_reset();
>> +
>>  	err = v4l2_device_register(&pci_dev->dev, &dev->v4l2_dev);
>>  	if (err < 0)
>>  		goto fail_free;
>> diff --git a/drivers/media/pci/cx23885/cx23885.h b/drivers/media/pci/cx23885/cx23885.h
>> index d54c7ee..cf965ef 100644
>> --- a/drivers/media/pci/cx23885/cx23885.h
>> +++ b/drivers/media/pci/cx23885/cx23885.h
>> @@ -451,6 +451,8 @@ struct cx23885_dev {
>>  	/* Analog raw audio */
>>  	struct cx23885_audio_dev   *audio_dev;
>>  
>> +	/* Does the system require periodic DMA resets? */
>> +	unsigned int		need_dma_reset:1;
>>  };
>>  
>>  static inline struct cx23885_dev *to_cx23885(struct v4l2_device *v4l2_dev)
>>
Brad Love Dec. 19, 2018, 5:26 p.m. UTC | #4
Hi Alex,


On 18/12/2018 17.49, Alex Deucher wrote:
> On Tue, Dec 18, 2018 at 5:59 PM Brad Love <brad@nextdimension.cc> wrote:
>> It is reported that commit 95f408bbc4e4 ("media: cx23885: Ryzen DMA
>> related RiSC engine stall fixes") caused regresssions with other CPUs.
>>
>> Ensure that the quirk will be applied only for the CPUs that
>> are known to cause problems.
>>
>> A module option is added for explicit control of the behaviour.
>>
>> Fixes: 95f408bbc4e4 ("media: cx23885: Ryzen DMA related RiSC engine stall fixes")
>>
>> Signed-off-by: Brad Love <brad@nextdimension.cc>
>> ---
>> Changes since v1:
>> - Added module option for three way control
>> - Removed '7' from pci id description, Ryzen 3 is the same id
>>
>>  drivers/media/pci/cx23885/cx23885-core.c | 54 ++++++++++++++++++++++++++++++--
>>  drivers/media/pci/cx23885/cx23885.h      |  2 ++
>>  2 files changed, 54 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/media/pci/cx23885/cx23885-core.c b/drivers/media/pci/cx23885/cx23885-core.c
>> index 39804d8..fb721c7 100644
>> --- a/drivers/media/pci/cx23885/cx23885-core.c
>> +++ b/drivers/media/pci/cx23885/cx23885-core.c
>> @@ -23,6 +23,7 @@
>>  #include <linux/moduleparam.h>
>>  #include <linux/kmod.h>
>>  #include <linux/kernel.h>
>> +#include <linux/pci.h>
>>  #include <linux/slab.h>
>>  #include <linux/interrupt.h>
>>  #include <linux/delay.h>
>> @@ -41,6 +42,18 @@ MODULE_AUTHOR("Steven Toth <stoth@linuxtv.org>");
>>  MODULE_LICENSE("GPL");
>>  MODULE_VERSION(CX23885_VERSION);
>>
>> +/*
>> + * Some platforms have been found to require periodic resetting of the DMA
>> + * engine. Ryzen and XEON platforms are known to be affected. The symptom
>> + * encountered is "mpeg risc op code error". Only Ryzen platforms employ
>> + * this workaround if the option equals 1. The workaround can be explicitly
>> + * disabled for all platforms by setting to 0, the workaround can be forced
>> + * on for any platform by setting to 2.
>> + */
>> +static unsigned int dma_reset_workaround = 1;
>> +module_param(dma_reset_workaround, int, 0644);
>> +MODULE_PARM_DESC(dma_reset_workaround, "periodic RiSC dma engine reset; 0-force disable, 1-driver detect (default), 2-force enable");
>> +
>>  static unsigned int debug;
>>  module_param(debug, int, 0644);
>>  MODULE_PARM_DESC(debug, "enable debug messages");
>> @@ -603,8 +616,13 @@ static void cx23885_risc_disasm(struct cx23885_tsport *port,
>>
>>  static void cx23885_clear_bridge_error(struct cx23885_dev *dev)
>>  {
>> -       uint32_t reg1_val = cx_read(TC_REQ); /* read-only */
>> -       uint32_t reg2_val = cx_read(TC_REQ_SET);
>> +       uint32_t reg1_val, reg2_val;
>> +
>> +       if (!dev->need_dma_reset)
>> +               return;
>> +
>> +       reg1_val = cx_read(TC_REQ); /* read-only */
>> +       reg2_val = cx_read(TC_REQ_SET);
>>
>>         if (reg1_val && reg2_val) {
>>                 cx_write(TC_REQ, reg1_val);
>> @@ -2058,6 +2076,36 @@ void cx23885_gpio_enable(struct cx23885_dev *dev, u32 mask, int asoutput)
>>         /* TODO: 23-19 */
>>  }
>>
>> +static struct {
>> +       int vendor, dev;
>> +} const broken_dev_id[] = {
>> +       /* According with
>> +        * https://openbenchmarking.org/system/1703021-RI-AMDZEN08075/Ryzen%207%201800X/lspci,
>> +        * 0x1451 is PCI ID for the IOMMU found on Ryzen
>> +        */
>> +       { PCI_VENDOR_ID_AMD, 0x1451 },
> Does this issue only happen with the IOMMU is enabled?  Is it only for
> p2p transfers?  Until recently the DMA and PCI subsystems didn't
> actually support p2p properly when the IOMMU was enabled.  that might
> explain some of the issues.  Additionally, if you match based on the
> IOMMU id, you won't match if the user disables the IOMMU in the sbios.
> Is this only an issue with the IOMMU enabled?
>
> Alex


I'm unsure of the answers to your questions. I do still have my Ryzen3
system around, I'll see if I can disable IOMMU and do some tests.

Regards,

Brad



>> +};
>> +
>> +static bool cx23885_does_need_dma_reset(void)
>> +{
>> +       int i;
>> +       struct pci_dev *pdev = NULL;
>> +
>> +       if (dma_reset_workaround == 0)
>> +               return false;
>> +       else if (dma_reset_workaround == 2)
>> +               return true;
>> +
>> +       for (i = 0; i < sizeof(broken_dev_id); i++) {
>> +               pdev = pci_get_device(broken_dev_id[i].vendor, broken_dev_id[i].dev, NULL);
>> +               if (pdev) {
>> +                       pci_dev_put(pdev);
>> +                       return true;
>> +               }
>> +       }
>> +       return false;
>> +}
>> +
>>  static int cx23885_initdev(struct pci_dev *pci_dev,
>>                            const struct pci_device_id *pci_id)
>>  {
>> @@ -2069,6 +2117,8 @@ static int cx23885_initdev(struct pci_dev *pci_dev,
>>         if (NULL == dev)
>>                 return -ENOMEM;
>>
>> +       dev->need_dma_reset = cx23885_does_need_dma_reset();
>> +
>>         err = v4l2_device_register(&pci_dev->dev, &dev->v4l2_dev);
>>         if (err < 0)
>>                 goto fail_free;
>> diff --git a/drivers/media/pci/cx23885/cx23885.h b/drivers/media/pci/cx23885/cx23885.h
>> index d54c7ee..cf965ef 100644
>> --- a/drivers/media/pci/cx23885/cx23885.h
>> +++ b/drivers/media/pci/cx23885/cx23885.h
>> @@ -451,6 +451,8 @@ struct cx23885_dev {
>>         /* Analog raw audio */
>>         struct cx23885_audio_dev   *audio_dev;
>>
>> +       /* Does the system require periodic DMA resets? */
>> +       unsigned int            need_dma_reset:1;
>>  };
>>
>>  static inline struct cx23885_dev *to_cx23885(struct v4l2_device *v4l2_dev)
>> --
>> 2.7.4
>>
Brad Love Dec. 19, 2018, 5:40 p.m. UTC | #5
Hi Alex,


On 19/12/2018 11.26, Brad Love wrote:
> Hi Alex,
>
>
> On 18/12/2018 17.49, Alex Deucher wrote:
>> On Tue, Dec 18, 2018 at 5:59 PM Brad Love <brad@nextdimension.cc> wrote:
>>> It is reported that commit 95f408bbc4e4 ("media: cx23885: Ryzen DMA
>>> related RiSC engine stall fixes") caused regresssions with other CPUs.
>>>
>>> Ensure that the quirk will be applied only for the CPUs that
>>> are known to cause problems.
>>>
>>> A module option is added for explicit control of the behaviour.
>>>
>>> Fixes: 95f408bbc4e4 ("media: cx23885: Ryzen DMA related RiSC engine stall fixes")
>>>
>>> Signed-off-by: Brad Love <brad@nextdimension.cc>
>>> ---
>>> Changes since v1:
>>> - Added module option for three way control
>>> - Removed '7' from pci id description, Ryzen 3 is the same id
>>>
>>>  drivers/media/pci/cx23885/cx23885-core.c | 54 ++++++++++++++++++++++++++++++--
>>>  drivers/media/pci/cx23885/cx23885.h      |  2 ++
>>>  2 files changed, 54 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/media/pci/cx23885/cx23885-core.c b/drivers/media/pci/cx23885/cx23885-core.c
>>> index 39804d8..fb721c7 100644
>>> --- a/drivers/media/pci/cx23885/cx23885-core.c
>>> +++ b/drivers/media/pci/cx23885/cx23885-core.c
>>> @@ -23,6 +23,7 @@
>>>  #include <linux/moduleparam.h>
>>>  #include <linux/kmod.h>
>>>  #include <linux/kernel.h>
>>> +#include <linux/pci.h>
>>>  #include <linux/slab.h>
>>>  #include <linux/interrupt.h>
>>>  #include <linux/delay.h>
>>> @@ -41,6 +42,18 @@ MODULE_AUTHOR("Steven Toth <stoth@linuxtv.org>");
>>>  MODULE_LICENSE("GPL");
>>>  MODULE_VERSION(CX23885_VERSION);
>>>
>>> +/*
>>> + * Some platforms have been found to require periodic resetting of the DMA
>>> + * engine. Ryzen and XEON platforms are known to be affected. The symptom
>>> + * encountered is "mpeg risc op code error". Only Ryzen platforms employ
>>> + * this workaround if the option equals 1. The workaround can be explicitly
>>> + * disabled for all platforms by setting to 0, the workaround can be forced
>>> + * on for any platform by setting to 2.
>>> + */
>>> +static unsigned int dma_reset_workaround = 1;
>>> +module_param(dma_reset_workaround, int, 0644);
>>> +MODULE_PARM_DESC(dma_reset_workaround, "periodic RiSC dma engine reset; 0-force disable, 1-driver detect (default), 2-force enable");
>>> +
>>>  static unsigned int debug;
>>>  module_param(debug, int, 0644);
>>>  MODULE_PARM_DESC(debug, "enable debug messages");
>>> @@ -603,8 +616,13 @@ static void cx23885_risc_disasm(struct cx23885_tsport *port,
>>>
>>>  static void cx23885_clear_bridge_error(struct cx23885_dev *dev)
>>>  {
>>> -       uint32_t reg1_val = cx_read(TC_REQ); /* read-only */
>>> -       uint32_t reg2_val = cx_read(TC_REQ_SET);
>>> +       uint32_t reg1_val, reg2_val;
>>> +
>>> +       if (!dev->need_dma_reset)
>>> +               return;
>>> +
>>> +       reg1_val = cx_read(TC_REQ); /* read-only */
>>> +       reg2_val = cx_read(TC_REQ_SET);
>>>
>>>         if (reg1_val && reg2_val) {
>>>                 cx_write(TC_REQ, reg1_val);
>>> @@ -2058,6 +2076,36 @@ void cx23885_gpio_enable(struct cx23885_dev *dev, u32 mask, int asoutput)
>>>         /* TODO: 23-19 */
>>>  }
>>>
>>> +static struct {
>>> +       int vendor, dev;
>>> +} const broken_dev_id[] = {
>>> +       /* According with
>>> +        * https://openbenchmarking.org/system/1703021-RI-AMDZEN08075/Ryzen%207%201800X/lspci,
>>> +        * 0x1451 is PCI ID for the IOMMU found on Ryzen
>>> +        */
>>> +       { PCI_VENDOR_ID_AMD, 0x1451 },
>> Does this issue only happen with the IOMMU is enabled?  Is it only for
>> p2p transfers?  Until recently the DMA and PCI subsystems didn't
>> actually support p2p properly when the IOMMU was enabled.  that might
>> explain some of the issues.  Additionally, if you match based on the
>> IOMMU id, you won't match if the user disables the IOMMU in the sbios.
>> Is this only an issue with the IOMMU enabled?
>>
>> Alex
>
> I'm unsure of the answers to your questions. I do still have my Ryzen3
> system around, I'll see if I can disable IOMMU and do some tests.
>
> Regards,
>
> Brad


The moment I looked this up I recalled something. During testing I had
to pass iommu=pt as a kernel command line option, that option is still
in my grub config. Without that set I would get critical AMD-VI errors
from the onboard ethernet as well as gpu. I have left that setting as is
the entire time (~8mo) I've been testing the system, because I could not
boot initially without it. I'll try other options now.

Regards,

Brad




>
>
>>> +};
>>> +
>>> +static bool cx23885_does_need_dma_reset(void)
>>> +{
>>> +       int i;
>>> +       struct pci_dev *pdev = NULL;
>>> +
>>> +       if (dma_reset_workaround == 0)
>>> +               return false;
>>> +       else if (dma_reset_workaround == 2)
>>> +               return true;
>>> +
>>> +       for (i = 0; i < sizeof(broken_dev_id); i++) {
>>> +               pdev = pci_get_device(broken_dev_id[i].vendor, broken_dev_id[i].dev, NULL);
>>> +               if (pdev) {
>>> +                       pci_dev_put(pdev);
>>> +                       return true;
>>> +               }
>>> +       }
>>> +       return false;
>>> +}
>>> +
>>>  static int cx23885_initdev(struct pci_dev *pci_dev,
>>>                            const struct pci_device_id *pci_id)
>>>  {
>>> @@ -2069,6 +2117,8 @@ static int cx23885_initdev(struct pci_dev *pci_dev,
>>>         if (NULL == dev)
>>>                 return -ENOMEM;
>>>
>>> +       dev->need_dma_reset = cx23885_does_need_dma_reset();
>>> +
>>>         err = v4l2_device_register(&pci_dev->dev, &dev->v4l2_dev);
>>>         if (err < 0)
>>>                 goto fail_free;
>>> diff --git a/drivers/media/pci/cx23885/cx23885.h b/drivers/media/pci/cx23885/cx23885.h
>>> index d54c7ee..cf965ef 100644
>>> --- a/drivers/media/pci/cx23885/cx23885.h
>>> +++ b/drivers/media/pci/cx23885/cx23885.h
>>> @@ -451,6 +451,8 @@ struct cx23885_dev {
>>>         /* Analog raw audio */
>>>         struct cx23885_audio_dev   *audio_dev;
>>>
>>> +       /* Does the system require periodic DMA resets? */
>>> +       unsigned int            need_dma_reset:1;
>>>  };
>>>
>>>  static inline struct cx23885_dev *to_cx23885(struct v4l2_device *v4l2_dev)
>>> --
>>> 2.7.4
>>>
diff mbox series

Patch

diff --git a/drivers/media/pci/cx23885/cx23885-core.c b/drivers/media/pci/cx23885/cx23885-core.c
index 39804d8..fb721c7 100644
--- a/drivers/media/pci/cx23885/cx23885-core.c
+++ b/drivers/media/pci/cx23885/cx23885-core.c
@@ -23,6 +23,7 @@ 
 #include <linux/moduleparam.h>
 #include <linux/kmod.h>
 #include <linux/kernel.h>
+#include <linux/pci.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
@@ -41,6 +42,18 @@  MODULE_AUTHOR("Steven Toth <stoth@linuxtv.org>");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(CX23885_VERSION);
 
+/*
+ * Some platforms have been found to require periodic resetting of the DMA
+ * engine. Ryzen and XEON platforms are known to be affected. The symptom
+ * encountered is "mpeg risc op code error". Only Ryzen platforms employ
+ * this workaround if the option equals 1. The workaround can be explicitly
+ * disabled for all platforms by setting to 0, the workaround can be forced
+ * on for any platform by setting to 2.
+ */
+static unsigned int dma_reset_workaround = 1;
+module_param(dma_reset_workaround, int, 0644);
+MODULE_PARM_DESC(dma_reset_workaround, "periodic RiSC dma engine reset; 0-force disable, 1-driver detect (default), 2-force enable");
+
 static unsigned int debug;
 module_param(debug, int, 0644);
 MODULE_PARM_DESC(debug, "enable debug messages");
@@ -603,8 +616,13 @@  static void cx23885_risc_disasm(struct cx23885_tsport *port,
 
 static void cx23885_clear_bridge_error(struct cx23885_dev *dev)
 {
-	uint32_t reg1_val = cx_read(TC_REQ); /* read-only */
-	uint32_t reg2_val = cx_read(TC_REQ_SET);
+	uint32_t reg1_val, reg2_val;
+
+	if (!dev->need_dma_reset)
+		return;
+
+	reg1_val = cx_read(TC_REQ); /* read-only */
+	reg2_val = cx_read(TC_REQ_SET);
 
 	if (reg1_val && reg2_val) {
 		cx_write(TC_REQ, reg1_val);
@@ -2058,6 +2076,36 @@  void cx23885_gpio_enable(struct cx23885_dev *dev, u32 mask, int asoutput)
 	/* TODO: 23-19 */
 }
 
+static struct {
+	int vendor, dev;
+} const broken_dev_id[] = {
+	/* According with
+	 * https://openbenchmarking.org/system/1703021-RI-AMDZEN08075/Ryzen%207%201800X/lspci,
+	 * 0x1451 is PCI ID for the IOMMU found on Ryzen
+	 */
+	{ PCI_VENDOR_ID_AMD, 0x1451 },
+};
+
+static bool cx23885_does_need_dma_reset(void)
+{
+	int i;
+	struct pci_dev *pdev = NULL;
+
+	if (dma_reset_workaround == 0)
+		return false;
+	else if (dma_reset_workaround == 2)
+		return true;
+
+	for (i = 0; i < sizeof(broken_dev_id); i++) {
+		pdev = pci_get_device(broken_dev_id[i].vendor, broken_dev_id[i].dev, NULL);
+		if (pdev) {
+			pci_dev_put(pdev);
+			return true;
+		}
+	}
+	return false;
+}
+
 static int cx23885_initdev(struct pci_dev *pci_dev,
 			   const struct pci_device_id *pci_id)
 {
@@ -2069,6 +2117,8 @@  static int cx23885_initdev(struct pci_dev *pci_dev,
 	if (NULL == dev)
 		return -ENOMEM;
 
+	dev->need_dma_reset = cx23885_does_need_dma_reset();
+
 	err = v4l2_device_register(&pci_dev->dev, &dev->v4l2_dev);
 	if (err < 0)
 		goto fail_free;
diff --git a/drivers/media/pci/cx23885/cx23885.h b/drivers/media/pci/cx23885/cx23885.h
index d54c7ee..cf965ef 100644
--- a/drivers/media/pci/cx23885/cx23885.h
+++ b/drivers/media/pci/cx23885/cx23885.h
@@ -451,6 +451,8 @@  struct cx23885_dev {
 	/* Analog raw audio */
 	struct cx23885_audio_dev   *audio_dev;
 
+	/* Does the system require periodic DMA resets? */
+	unsigned int		need_dma_reset:1;
 };
 
 static inline struct cx23885_dev *to_cx23885(struct v4l2_device *v4l2_dev)