diff mbox series

pci: Add option to disable device level INTx masking

Message ID 20240307184645.104349-1-alex.williamson@redhat.com (mailing list archive)
State New, archived
Headers show
Series pci: Add option to disable device level INTx masking | expand

Commit Message

Alex Williamson March 7, 2024, 6:46 p.m. UTC
The PCI 2.3 spec added definitions of the INTx disable and status bits,
in the command and status registers respectively.  The command register
bit, commonly known as DisINTx in lspci, controls whether the device
can assert the INTx signal.

Operating systems will often write to this bit to test whether a device
supports this style of legacy interrupt masking.  When using device
assignment, such as with vfio-pci, the result of this test dictates
whether the device can use a shared or exclusive interrupt (ie. generic
INTx masking at the device via DisINTx or IRQ controller level INTx
masking).

Add an experimental option to the base set of properties for PCI
devices which allows the DisINTx bit to be excluded from wmask, making
it read-only to the guest for testing purposes related to INTx masking.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 hw/pci/pci.c         | 14 ++++++++++----
 include/hw/pci/pci.h |  2 ++
 2 files changed, 12 insertions(+), 4 deletions(-)

Comments

Cédric Le Goater March 8, 2024, 4:47 p.m. UTC | #1
On 3/7/24 19:46, Alex Williamson wrote:
> The PCI 2.3 spec added definitions of the INTx disable and status bits,
> in the command and status registers respectively.  The command register
> bit, commonly known as DisINTx in lspci, controls whether the device
> can assert the INTx signal.
> 
> Operating systems will often write to this bit to test whether a device
> supports this style of legacy interrupt masking.  When using device
> assignment, such as with vfio-pci, the result of this test dictates
> whether the device can use a shared or exclusive interrupt (ie. generic
> INTx masking at the device via DisINTx or IRQ controller level INTx
> masking).
> 
> Add an experimental option to the base set of properties for PCI
> devices which allows the DisINTx bit to be excluded from wmask, making
> it read-only to the guest for testing purposes related to INTx masking.
> 
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>


LGTM,

Reviewed-by: Cédric Le Goater <clg@redhat.com>

Thanks,

C.



> ---
>   hw/pci/pci.c         | 14 ++++++++++----
>   include/hw/pci/pci.h |  2 ++
>   2 files changed, 12 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index 6496d027ca61..8c78326ad67f 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -85,6 +85,8 @@ static Property pci_props[] = {
>                       QEMU_PCIE_ERR_UNC_MASK_BITNR, true),
>       DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present,
>                       QEMU_PCIE_ARI_NEXTFN_1_BITNR, false),
> +    DEFINE_PROP_BIT("x-pci-disintx", PCIDevice, cap_present,
> +                    QEMU_PCI_DISINTX_BITNR, true),
>       DEFINE_PROP_END_OF_LIST()
>   };
>   
> @@ -861,13 +863,17 @@ static void pci_init_cmask(PCIDevice *dev)
>   static void pci_init_wmask(PCIDevice *dev)
>   {
>       int config_size = pci_config_size(dev);
> +    uint16_t cmd_wmask = PCI_COMMAND_IO | PCI_COMMAND_MEMORY |
> +                         PCI_COMMAND_MASTER | PCI_COMMAND_SERR;
>   
>       dev->wmask[PCI_CACHE_LINE_SIZE] = 0xff;
>       dev->wmask[PCI_INTERRUPT_LINE] = 0xff;
> -    pci_set_word(dev->wmask + PCI_COMMAND,
> -                 PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER |
> -                 PCI_COMMAND_INTX_DISABLE);
> -    pci_word_test_and_set_mask(dev->wmask + PCI_COMMAND, PCI_COMMAND_SERR);
> +
> +    if (dev->cap_present & QEMU_PCI_DISINTX) {
> +        cmd_wmask |= PCI_COMMAND_INTX_DISABLE;
> +    }
> +
> +    pci_set_word(dev->wmask + PCI_COMMAND, cmd_wmask);
>   
>       memset(dev->wmask + PCI_CONFIG_HEADER_SIZE, 0xff,
>              config_size - PCI_CONFIG_HEADER_SIZE);
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index eaa3fc99d884..45f0fac435cc 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -212,6 +212,8 @@ enum {
>       QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR),
>   #define QEMU_PCIE_ARI_NEXTFN_1_BITNR 12
>       QEMU_PCIE_ARI_NEXTFN_1 = (1 << QEMU_PCIE_ARI_NEXTFN_1_BITNR),
> +#define QEMU_PCI_DISINTX_BITNR 13
> +    QEMU_PCI_DISINTX = (1 << QEMU_PCI_DISINTX_BITNR),
>   };
>   
>   typedef struct PCIINTxRoute {
Michael S. Tsirkin March 8, 2024, 4:57 p.m. UTC | #2
On Thu, Mar 07, 2024 at 11:46:42AM -0700, Alex Williamson wrote:
> The PCI 2.3 spec added definitions of the INTx disable and status bits,
> in the command and status registers respectively.  The command register
> bit, commonly known as DisINTx in lspci, controls whether the device
> can assert the INTx signal.
> 
> Operating systems will often write to this bit to test whether a device
> supports this style of legacy interrupt masking.  When using device
> assignment, such as with vfio-pci, the result of this test dictates
> whether the device can use a shared or exclusive interrupt (ie. generic
> INTx masking at the device via DisINTx or IRQ controller level INTx
> masking).
> 
> Add an experimental option to the base set of properties for PCI
> devices which allows the DisINTx bit to be excluded from wmask, making
> it read-only to the guest for testing purposes related to INTx masking.
> 

Could you clarify the use a bit more? It's unstable - do you
expect to experiment with it and then make it permanent down
the road?

> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> ---
>  hw/pci/pci.c         | 14 ++++++++++----
>  include/hw/pci/pci.h |  2 ++
>  2 files changed, 12 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index 6496d027ca61..8c78326ad67f 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -85,6 +85,8 @@ static Property pci_props[] = {
>                      QEMU_PCIE_ERR_UNC_MASK_BITNR, true),
>      DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present,
>                      QEMU_PCIE_ARI_NEXTFN_1_BITNR, false),
> +    DEFINE_PROP_BIT("x-pci-disintx", PCIDevice, cap_present,
> +                    QEMU_PCI_DISINTX_BITNR, true),
>      DEFINE_PROP_END_OF_LIST()
>  };
>  
> @@ -861,13 +863,17 @@ static void pci_init_cmask(PCIDevice *dev)
>  static void pci_init_wmask(PCIDevice *dev)
>  {
>      int config_size = pci_config_size(dev);
> +    uint16_t cmd_wmask = PCI_COMMAND_IO | PCI_COMMAND_MEMORY |
> +                         PCI_COMMAND_MASTER | PCI_COMMAND_SERR;
>  
>      dev->wmask[PCI_CACHE_LINE_SIZE] = 0xff;
>      dev->wmask[PCI_INTERRUPT_LINE] = 0xff;
> -    pci_set_word(dev->wmask + PCI_COMMAND,
> -                 PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER |
> -                 PCI_COMMAND_INTX_DISABLE);
> -    pci_word_test_and_set_mask(dev->wmask + PCI_COMMAND, PCI_COMMAND_SERR);
> +
> +    if (dev->cap_present & QEMU_PCI_DISINTX) {
> +        cmd_wmask |= PCI_COMMAND_INTX_DISABLE;
> +    }
> +
> +    pci_set_word(dev->wmask + PCI_COMMAND, cmd_wmask);
>  
>      memset(dev->wmask + PCI_CONFIG_HEADER_SIZE, 0xff,
>             config_size - PCI_CONFIG_HEADER_SIZE);
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index eaa3fc99d884..45f0fac435cc 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -212,6 +212,8 @@ enum {
>      QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR),
>  #define QEMU_PCIE_ARI_NEXTFN_1_BITNR 12
>      QEMU_PCIE_ARI_NEXTFN_1 = (1 << QEMU_PCIE_ARI_NEXTFN_1_BITNR),
> +#define QEMU_PCI_DISINTX_BITNR 13
> +    QEMU_PCI_DISINTX = (1 << QEMU_PCI_DISINTX_BITNR),
>  };
>  
>  typedef struct PCIINTxRoute {
> -- 
> 2.44.0
Alex Williamson March 8, 2024, 5:24 p.m. UTC | #3
On Fri, 8 Mar 2024 11:57:38 -0500
"Michael S. Tsirkin" <mst@redhat.com> wrote:

> On Thu, Mar 07, 2024 at 11:46:42AM -0700, Alex Williamson wrote:
> > The PCI 2.3 spec added definitions of the INTx disable and status bits,
> > in the command and status registers respectively.  The command register
> > bit, commonly known as DisINTx in lspci, controls whether the device
> > can assert the INTx signal.
> > 
> > Operating systems will often write to this bit to test whether a device
> > supports this style of legacy interrupt masking.  When using device
> > assignment, such as with vfio-pci, the result of this test dictates
> > whether the device can use a shared or exclusive interrupt (ie. generic
> > INTx masking at the device via DisINTx or IRQ controller level INTx
> > masking).
> > 
> > Add an experimental option to the base set of properties for PCI
> > devices which allows the DisINTx bit to be excluded from wmask, making
> > it read-only to the guest for testing purposes related to INTx masking.
> >   
> 
> Could you clarify the use a bit more? It's unstable - do you
> expect to experiment with it and then make it permanent down
> the road?

No, my aspirations end at providing an experimental option.
Technically all devices should support and honor this bit, so I don't
think we should provide a supported method of providing broken behavior,
but there do exist physical devices where this feature is broken or
unsupported.  Rather than implementing emulation of one of these broken
devices, with bug for bug compatibility, it's much easier to be able to
trigger broken DisINTx behavior on an arbitrary device, in an
unsupported fashion.  Thanks,

Alex

> > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> > ---
> >  hw/pci/pci.c         | 14 ++++++++++----
> >  include/hw/pci/pci.h |  2 ++
> >  2 files changed, 12 insertions(+), 4 deletions(-)
> > 
> > diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> > index 6496d027ca61..8c78326ad67f 100644
> > --- a/hw/pci/pci.c
> > +++ b/hw/pci/pci.c
> > @@ -85,6 +85,8 @@ static Property pci_props[] = {
> >                      QEMU_PCIE_ERR_UNC_MASK_BITNR, true),
> >      DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present,
> >                      QEMU_PCIE_ARI_NEXTFN_1_BITNR, false),
> > +    DEFINE_PROP_BIT("x-pci-disintx", PCIDevice, cap_present,
> > +                    QEMU_PCI_DISINTX_BITNR, true),
> >      DEFINE_PROP_END_OF_LIST()
> >  };
> >  
> > @@ -861,13 +863,17 @@ static void pci_init_cmask(PCIDevice *dev)
> >  static void pci_init_wmask(PCIDevice *dev)
> >  {
> >      int config_size = pci_config_size(dev);
> > +    uint16_t cmd_wmask = PCI_COMMAND_IO | PCI_COMMAND_MEMORY |
> > +                         PCI_COMMAND_MASTER | PCI_COMMAND_SERR;
> >  
> >      dev->wmask[PCI_CACHE_LINE_SIZE] = 0xff;
> >      dev->wmask[PCI_INTERRUPT_LINE] = 0xff;
> > -    pci_set_word(dev->wmask + PCI_COMMAND,
> > -                 PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER |
> > -                 PCI_COMMAND_INTX_DISABLE);
> > -    pci_word_test_and_set_mask(dev->wmask + PCI_COMMAND, PCI_COMMAND_SERR);
> > +
> > +    if (dev->cap_present & QEMU_PCI_DISINTX) {
> > +        cmd_wmask |= PCI_COMMAND_INTX_DISABLE;
> > +    }
> > +
> > +    pci_set_word(dev->wmask + PCI_COMMAND, cmd_wmask);
> >  
> >      memset(dev->wmask + PCI_CONFIG_HEADER_SIZE, 0xff,
> >             config_size - PCI_CONFIG_HEADER_SIZE);
> > diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> > index eaa3fc99d884..45f0fac435cc 100644
> > --- a/include/hw/pci/pci.h
> > +++ b/include/hw/pci/pci.h
> > @@ -212,6 +212,8 @@ enum {
> >      QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR),
> >  #define QEMU_PCIE_ARI_NEXTFN_1_BITNR 12
> >      QEMU_PCIE_ARI_NEXTFN_1 = (1 << QEMU_PCIE_ARI_NEXTFN_1_BITNR),
> > +#define QEMU_PCI_DISINTX_BITNR 13
> > +    QEMU_PCI_DISINTX = (1 << QEMU_PCI_DISINTX_BITNR),
> >  };
> >  
> >  typedef struct PCIINTxRoute {
> > -- 
> > 2.44.0  
>
Michael S. Tsirkin March 8, 2024, 7:37 p.m. UTC | #4
On Fri, Mar 08, 2024 at 10:24:14AM -0700, Alex Williamson wrote:
> On Fri, 8 Mar 2024 11:57:38 -0500
> "Michael S. Tsirkin" <mst@redhat.com> wrote:
> 
> > On Thu, Mar 07, 2024 at 11:46:42AM -0700, Alex Williamson wrote:
> > > The PCI 2.3 spec added definitions of the INTx disable and status bits,
> > > in the command and status registers respectively.  The command register
> > > bit, commonly known as DisINTx in lspci, controls whether the device
> > > can assert the INTx signal.
> > > 
> > > Operating systems will often write to this bit to test whether a device
> > > supports this style of legacy interrupt masking.  When using device
> > > assignment, such as with vfio-pci, the result of this test dictates
> > > whether the device can use a shared or exclusive interrupt (ie. generic
> > > INTx masking at the device via DisINTx or IRQ controller level INTx
> > > masking).
> > > 
> > > Add an experimental option to the base set of properties for PCI
> > > devices which allows the DisINTx bit to be excluded from wmask, making
> > > it read-only to the guest for testing purposes related to INTx masking.
> > >   
> > 
> > Could you clarify the use a bit more? It's unstable - do you
> > expect to experiment with it and then make it permanent down
> > the road?
> 
> No, my aspirations end at providing an experimental option.
> Technically all devices should support and honor this bit, so I don't
> think we should provide a supported method of providing broken behavior,
> but there do exist physical devices where this feature is broken or
> unsupported.  Rather than implementing emulation of one of these broken
> devices, with bug for bug compatibility, it's much easier to be able to
> trigger broken DisINTx behavior on an arbitrary device, in an
> unsupported fashion.  Thanks,
> 
> Alex

Well, we tend not to merge patches for playing with random
bits in config space just so people can experiment with
whether this breaks guests, but given this is coming from
a long term contributor and a maintainer, it's a different
matter. So ok, to make another maintainer's life easier
I'm prepared to take this. I'd like to figure out though -
does your need extend to experimenting with all devices
or just with vfio ones? If the later maybe keep it there
where you understand what the actual need is... If the former
as I said I'll merge it.


> > > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> > > ---
> > >  hw/pci/pci.c         | 14 ++++++++++----
> > >  include/hw/pci/pci.h |  2 ++
> > >  2 files changed, 12 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> > > index 6496d027ca61..8c78326ad67f 100644
> > > --- a/hw/pci/pci.c
> > > +++ b/hw/pci/pci.c
> > > @@ -85,6 +85,8 @@ static Property pci_props[] = {
> > >                      QEMU_PCIE_ERR_UNC_MASK_BITNR, true),
> > >      DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present,
> > >                      QEMU_PCIE_ARI_NEXTFN_1_BITNR, false),
> > > +    DEFINE_PROP_BIT("x-pci-disintx", PCIDevice, cap_present,
> > > +                    QEMU_PCI_DISINTX_BITNR, true),
> > >      DEFINE_PROP_END_OF_LIST()
> > >  };
> > >  
> > > @@ -861,13 +863,17 @@ static void pci_init_cmask(PCIDevice *dev)
> > >  static void pci_init_wmask(PCIDevice *dev)
> > >  {
> > >      int config_size = pci_config_size(dev);
> > > +    uint16_t cmd_wmask = PCI_COMMAND_IO | PCI_COMMAND_MEMORY |
> > > +                         PCI_COMMAND_MASTER | PCI_COMMAND_SERR;
> > >  
> > >      dev->wmask[PCI_CACHE_LINE_SIZE] = 0xff;
> > >      dev->wmask[PCI_INTERRUPT_LINE] = 0xff;
> > > -    pci_set_word(dev->wmask + PCI_COMMAND,
> > > -                 PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER |
> > > -                 PCI_COMMAND_INTX_DISABLE);
> > > -    pci_word_test_and_set_mask(dev->wmask + PCI_COMMAND, PCI_COMMAND_SERR);
> > > +
> > > +    if (dev->cap_present & QEMU_PCI_DISINTX) {
> > > +        cmd_wmask |= PCI_COMMAND_INTX_DISABLE;
> > > +    }
> > > +
> > > +    pci_set_word(dev->wmask + PCI_COMMAND, cmd_wmask);
> > >  
> > >      memset(dev->wmask + PCI_CONFIG_HEADER_SIZE, 0xff,
> > >             config_size - PCI_CONFIG_HEADER_SIZE);
> > > diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> > > index eaa3fc99d884..45f0fac435cc 100644
> > > --- a/include/hw/pci/pci.h
> > > +++ b/include/hw/pci/pci.h
> > > @@ -212,6 +212,8 @@ enum {
> > >      QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR),
> > >  #define QEMU_PCIE_ARI_NEXTFN_1_BITNR 12
> > >      QEMU_PCIE_ARI_NEXTFN_1 = (1 << QEMU_PCIE_ARI_NEXTFN_1_BITNR),
> > > +#define QEMU_PCI_DISINTX_BITNR 13
> > > +    QEMU_PCI_DISINTX = (1 << QEMU_PCI_DISINTX_BITNR),
> > >  };
> > >  
> > >  typedef struct PCIINTxRoute {
> > > -- 
> > > 2.44.0  
> >
Alex Williamson March 8, 2024, 8:02 p.m. UTC | #5
On Fri, 8 Mar 2024 14:37:06 -0500
"Michael S. Tsirkin" <mst@redhat.com> wrote:

> On Fri, Mar 08, 2024 at 10:24:14AM -0700, Alex Williamson wrote:
> > On Fri, 8 Mar 2024 11:57:38 -0500
> > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> >   
> > > On Thu, Mar 07, 2024 at 11:46:42AM -0700, Alex Williamson wrote:  
> > > > The PCI 2.3 spec added definitions of the INTx disable and status bits,
> > > > in the command and status registers respectively.  The command register
> > > > bit, commonly known as DisINTx in lspci, controls whether the device
> > > > can assert the INTx signal.
> > > > 
> > > > Operating systems will often write to this bit to test whether a device
> > > > supports this style of legacy interrupt masking.  When using device
> > > > assignment, such as with vfio-pci, the result of this test dictates
> > > > whether the device can use a shared or exclusive interrupt (ie. generic
> > > > INTx masking at the device via DisINTx or IRQ controller level INTx
> > > > masking).
> > > > 
> > > > Add an experimental option to the base set of properties for PCI
> > > > devices which allows the DisINTx bit to be excluded from wmask, making
> > > > it read-only to the guest for testing purposes related to INTx masking.
> > > >     
> > > 
> > > Could you clarify the use a bit more? It's unstable - do you
> > > expect to experiment with it and then make it permanent down
> > > the road?  
> > 
> > No, my aspirations end at providing an experimental option.
> > Technically all devices should support and honor this bit, so I don't
> > think we should provide a supported method of providing broken behavior,
> > but there do exist physical devices where this feature is broken or
> > unsupported.  Rather than implementing emulation of one of these broken
> > devices, with bug for bug compatibility, it's much easier to be able to
> > trigger broken DisINTx behavior on an arbitrary device, in an
> > unsupported fashion.  Thanks,
> > 
> > Alex  
> 
> Well, we tend not to merge patches for playing with random
> bits in config space just so people can experiment with
> whether this breaks guests, but given this is coming from
> a long term contributor and a maintainer, it's a different
> matter. So ok, to make another maintainer's life easier
> I'm prepared to take this. I'd like to figure out though -
> does your need extend to experimenting with all devices
> or just with vfio ones? If the later maybe keep it there
> where you understand what the actual need is... If the former
> as I said I'll merge it.

I'm actually looking at using it with non-vfio devices, for example I
have a dummy nvme driver that can configure either INTx, MSI, or MSI-X
interrupts.  The driver just stuffs nop commands into the admin queue to
trigger an interrupt.  This tests DMA mapping and interrupt paths.  I
intend to port this to a userspace vfio-pci driver that I can run in a
guest on an emulated nvme device, thereby enabling targeted testing
without any host hardware or device dependencies.  If I were to expose
two emulated nvme devices to the guest, one with DisINTx disabled, then
all variations could be tested.

For full disclosure, the vfio-pci kernel driver does have a nointxmask
module option, so while I think it would be useful and provides a
little more flexibility that devices in QEMU can be specified with this
behavior, there are means to do it otherwise. The QEMU vfio-pci driver
certainly has experimental options that don't necessarily have a path
to become supported, I hadn't realized your intention/preference to
make it a staging ground for to-be-supported options for PCIDevice.

If you have concerns about cluttering options or maintaining dead-end
experimental options, let's hold off on this until there's a case that
can't be met with the kernel module option.  Thanks,

Alex

> > > > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> > > > ---
> > > >  hw/pci/pci.c         | 14 ++++++++++----
> > > >  include/hw/pci/pci.h |  2 ++
> > > >  2 files changed, 12 insertions(+), 4 deletions(-)
> > > > 
> > > > diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> > > > index 6496d027ca61..8c78326ad67f 100644
> > > > --- a/hw/pci/pci.c
> > > > +++ b/hw/pci/pci.c
> > > > @@ -85,6 +85,8 @@ static Property pci_props[] = {
> > > >                      QEMU_PCIE_ERR_UNC_MASK_BITNR, true),
> > > >      DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present,
> > > >                      QEMU_PCIE_ARI_NEXTFN_1_BITNR, false),
> > > > +    DEFINE_PROP_BIT("x-pci-disintx", PCIDevice, cap_present,
> > > > +                    QEMU_PCI_DISINTX_BITNR, true),
> > > >      DEFINE_PROP_END_OF_LIST()
> > > >  };
> > > >  
> > > > @@ -861,13 +863,17 @@ static void pci_init_cmask(PCIDevice *dev)
> > > >  static void pci_init_wmask(PCIDevice *dev)
> > > >  {
> > > >      int config_size = pci_config_size(dev);
> > > > +    uint16_t cmd_wmask = PCI_COMMAND_IO | PCI_COMMAND_MEMORY |
> > > > +                         PCI_COMMAND_MASTER | PCI_COMMAND_SERR;
> > > >  
> > > >      dev->wmask[PCI_CACHE_LINE_SIZE] = 0xff;
> > > >      dev->wmask[PCI_INTERRUPT_LINE] = 0xff;
> > > > -    pci_set_word(dev->wmask + PCI_COMMAND,
> > > > -                 PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER |
> > > > -                 PCI_COMMAND_INTX_DISABLE);
> > > > -    pci_word_test_and_set_mask(dev->wmask + PCI_COMMAND, PCI_COMMAND_SERR);
> > > > +
> > > > +    if (dev->cap_present & QEMU_PCI_DISINTX) {
> > > > +        cmd_wmask |= PCI_COMMAND_INTX_DISABLE;
> > > > +    }
> > > > +
> > > > +    pci_set_word(dev->wmask + PCI_COMMAND, cmd_wmask);
> > > >  
> > > >      memset(dev->wmask + PCI_CONFIG_HEADER_SIZE, 0xff,
> > > >             config_size - PCI_CONFIG_HEADER_SIZE);
> > > > diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> > > > index eaa3fc99d884..45f0fac435cc 100644
> > > > --- a/include/hw/pci/pci.h
> > > > +++ b/include/hw/pci/pci.h
> > > > @@ -212,6 +212,8 @@ enum {
> > > >      QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR),
> > > >  #define QEMU_PCIE_ARI_NEXTFN_1_BITNR 12
> > > >      QEMU_PCIE_ARI_NEXTFN_1 = (1 << QEMU_PCIE_ARI_NEXTFN_1_BITNR),
> > > > +#define QEMU_PCI_DISINTX_BITNR 13
> > > > +    QEMU_PCI_DISINTX = (1 << QEMU_PCI_DISINTX_BITNR),
> > > >  };
> > > >  
> > > >  typedef struct PCIINTxRoute {
> > > > -- 
> > > > 2.44.0    
> > >   
>
Michael S. Tsirkin March 8, 2024, 8:05 p.m. UTC | #6
On Fri, Mar 08, 2024 at 01:02:01PM -0700, Alex Williamson wrote:
> On Fri, 8 Mar 2024 14:37:06 -0500
> "Michael S. Tsirkin" <mst@redhat.com> wrote:
> 
> > On Fri, Mar 08, 2024 at 10:24:14AM -0700, Alex Williamson wrote:
> > > On Fri, 8 Mar 2024 11:57:38 -0500
> > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > >   
> > > > On Thu, Mar 07, 2024 at 11:46:42AM -0700, Alex Williamson wrote:  
> > > > > The PCI 2.3 spec added definitions of the INTx disable and status bits,
> > > > > in the command and status registers respectively.  The command register
> > > > > bit, commonly known as DisINTx in lspci, controls whether the device
> > > > > can assert the INTx signal.
> > > > > 
> > > > > Operating systems will often write to this bit to test whether a device
> > > > > supports this style of legacy interrupt masking.  When using device
> > > > > assignment, such as with vfio-pci, the result of this test dictates
> > > > > whether the device can use a shared or exclusive interrupt (ie. generic
> > > > > INTx masking at the device via DisINTx or IRQ controller level INTx
> > > > > masking).
> > > > > 
> > > > > Add an experimental option to the base set of properties for PCI
> > > > > devices which allows the DisINTx bit to be excluded from wmask, making
> > > > > it read-only to the guest for testing purposes related to INTx masking.
> > > > >     
> > > > 
> > > > Could you clarify the use a bit more? It's unstable - do you
> > > > expect to experiment with it and then make it permanent down
> > > > the road?  
> > > 
> > > No, my aspirations end at providing an experimental option.
> > > Technically all devices should support and honor this bit, so I don't
> > > think we should provide a supported method of providing broken behavior,
> > > but there do exist physical devices where this feature is broken or
> > > unsupported.  Rather than implementing emulation of one of these broken
> > > devices, with bug for bug compatibility, it's much easier to be able to
> > > trigger broken DisINTx behavior on an arbitrary device, in an
> > > unsupported fashion.  Thanks,
> > > 
> > > Alex  
> > 
> > Well, we tend not to merge patches for playing with random
> > bits in config space just so people can experiment with
> > whether this breaks guests, but given this is coming from
> > a long term contributor and a maintainer, it's a different
> > matter. So ok, to make another maintainer's life easier
> > I'm prepared to take this. I'd like to figure out though -
> > does your need extend to experimenting with all devices
> > or just with vfio ones? If the later maybe keep it there
> > where you understand what the actual need is... If the former
> > as I said I'll merge it.
> 
> I'm actually looking at using it with non-vfio devices, for example I
> have a dummy nvme driver that can configure either INTx, MSI, or MSI-X
> interrupts.  The driver just stuffs nop commands into the admin queue to
> trigger an interrupt.  This tests DMA mapping and interrupt paths.  I
> intend to port this to a userspace vfio-pci driver that I can run in a
> guest on an emulated nvme device, thereby enabling targeted testing
> without any host hardware or device dependencies.  If I were to expose
> two emulated nvme devices to the guest, one with DisINTx disabled, then
> all variations could be tested.
> 
> For full disclosure, the vfio-pci kernel driver does have a nointxmask
> module option, so while I think it would be useful and provides a
> little more flexibility that devices in QEMU can be specified with this
> behavior, there are means to do it otherwise. The QEMU vfio-pci driver
> certainly has experimental options that don't necessarily have a path
> to become supported, I hadn't realized your intention/preference to
> make it a staging ground for to-be-supported options for PCIDevice.
> 
> If you have concerns about cluttering options or maintaining dead-end
> experimental options, let's hold off on this until there's a case that
> can't be met with the kernel module option.  Thanks,
> 
> Alex

That's the concern. But you decide. One maintainer's time is not more
important than other's. If it helps you - just merge it.

Acked-by: Michael S. Tsirkin <mst@redhat.com>


> > > > > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> > > > > ---
> > > > >  hw/pci/pci.c         | 14 ++++++++++----
> > > > >  include/hw/pci/pci.h |  2 ++
> > > > >  2 files changed, 12 insertions(+), 4 deletions(-)
> > > > > 
> > > > > diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> > > > > index 6496d027ca61..8c78326ad67f 100644
> > > > > --- a/hw/pci/pci.c
> > > > > +++ b/hw/pci/pci.c
> > > > > @@ -85,6 +85,8 @@ static Property pci_props[] = {
> > > > >                      QEMU_PCIE_ERR_UNC_MASK_BITNR, true),
> > > > >      DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present,
> > > > >                      QEMU_PCIE_ARI_NEXTFN_1_BITNR, false),
> > > > > +    DEFINE_PROP_BIT("x-pci-disintx", PCIDevice, cap_present,
> > > > > +                    QEMU_PCI_DISINTX_BITNR, true),
> > > > >      DEFINE_PROP_END_OF_LIST()
> > > > >  };
> > > > >  
> > > > > @@ -861,13 +863,17 @@ static void pci_init_cmask(PCIDevice *dev)
> > > > >  static void pci_init_wmask(PCIDevice *dev)
> > > > >  {
> > > > >      int config_size = pci_config_size(dev);
> > > > > +    uint16_t cmd_wmask = PCI_COMMAND_IO | PCI_COMMAND_MEMORY |
> > > > > +                         PCI_COMMAND_MASTER | PCI_COMMAND_SERR;
> > > > >  
> > > > >      dev->wmask[PCI_CACHE_LINE_SIZE] = 0xff;
> > > > >      dev->wmask[PCI_INTERRUPT_LINE] = 0xff;
> > > > > -    pci_set_word(dev->wmask + PCI_COMMAND,
> > > > > -                 PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER |
> > > > > -                 PCI_COMMAND_INTX_DISABLE);
> > > > > -    pci_word_test_and_set_mask(dev->wmask + PCI_COMMAND, PCI_COMMAND_SERR);
> > > > > +
> > > > > +    if (dev->cap_present & QEMU_PCI_DISINTX) {
> > > > > +        cmd_wmask |= PCI_COMMAND_INTX_DISABLE;
> > > > > +    }
> > > > > +
> > > > > +    pci_set_word(dev->wmask + PCI_COMMAND, cmd_wmask);
> > > > >  
> > > > >      memset(dev->wmask + PCI_CONFIG_HEADER_SIZE, 0xff,
> > > > >             config_size - PCI_CONFIG_HEADER_SIZE);
> > > > > diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> > > > > index eaa3fc99d884..45f0fac435cc 100644
> > > > > --- a/include/hw/pci/pci.h
> > > > > +++ b/include/hw/pci/pci.h
> > > > > @@ -212,6 +212,8 @@ enum {
> > > > >      QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR),
> > > > >  #define QEMU_PCIE_ARI_NEXTFN_1_BITNR 12
> > > > >      QEMU_PCIE_ARI_NEXTFN_1 = (1 << QEMU_PCIE_ARI_NEXTFN_1_BITNR),
> > > > > +#define QEMU_PCI_DISINTX_BITNR 13
> > > > > +    QEMU_PCI_DISINTX = (1 << QEMU_PCI_DISINTX_BITNR),
> > > > >  };
> > > > >  
> > > > >  typedef struct PCIINTxRoute {
> > > > > -- 
> > > > > 2.44.0    
> > > >   
> >
diff mbox series

Patch

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 6496d027ca61..8c78326ad67f 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -85,6 +85,8 @@  static Property pci_props[] = {
                     QEMU_PCIE_ERR_UNC_MASK_BITNR, true),
     DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present,
                     QEMU_PCIE_ARI_NEXTFN_1_BITNR, false),
+    DEFINE_PROP_BIT("x-pci-disintx", PCIDevice, cap_present,
+                    QEMU_PCI_DISINTX_BITNR, true),
     DEFINE_PROP_END_OF_LIST()
 };
 
@@ -861,13 +863,17 @@  static void pci_init_cmask(PCIDevice *dev)
 static void pci_init_wmask(PCIDevice *dev)
 {
     int config_size = pci_config_size(dev);
+    uint16_t cmd_wmask = PCI_COMMAND_IO | PCI_COMMAND_MEMORY |
+                         PCI_COMMAND_MASTER | PCI_COMMAND_SERR;
 
     dev->wmask[PCI_CACHE_LINE_SIZE] = 0xff;
     dev->wmask[PCI_INTERRUPT_LINE] = 0xff;
-    pci_set_word(dev->wmask + PCI_COMMAND,
-                 PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER |
-                 PCI_COMMAND_INTX_DISABLE);
-    pci_word_test_and_set_mask(dev->wmask + PCI_COMMAND, PCI_COMMAND_SERR);
+
+    if (dev->cap_present & QEMU_PCI_DISINTX) {
+        cmd_wmask |= PCI_COMMAND_INTX_DISABLE;
+    }
+
+    pci_set_word(dev->wmask + PCI_COMMAND, cmd_wmask);
 
     memset(dev->wmask + PCI_CONFIG_HEADER_SIZE, 0xff,
            config_size - PCI_CONFIG_HEADER_SIZE);
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index eaa3fc99d884..45f0fac435cc 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -212,6 +212,8 @@  enum {
     QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR),
 #define QEMU_PCIE_ARI_NEXTFN_1_BITNR 12
     QEMU_PCIE_ARI_NEXTFN_1 = (1 << QEMU_PCIE_ARI_NEXTFN_1_BITNR),
+#define QEMU_PCI_DISINTX_BITNR 13
+    QEMU_PCI_DISINTX = (1 << QEMU_PCI_DISINTX_BITNR),
 };
 
 typedef struct PCIINTxRoute {