diff mbox series

[v2,05/12] IOMMU: rename and re-type ats_enabled

Message ID 7f11ca06-9bed-443b-9c79-0e62b71a1f96@suse.com (mailing list archive)
State New, archived
Headers show
Series VT-d: SATC handling; ATS: tidying | expand

Commit Message

Jan Beulich Feb. 15, 2024, 10:15 a.m. UTC
Make the variable a tristate, with (as done elsewhere) a negative value
meaning "default". Since all use sites need looking at, also rename it
to match our usual "opt_*" pattern. While touching it, also move it to
.data.ro_after_init.

The only place it retains boolean nature is pci_ats_device(), for now.

In AMD code re-order conditionals to have the config space accesses
after (cheaper) flag checks.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
In domain_context_mapping_one() I'm a little puzzled that translation
type is selected based on only IOMMU and global properties, i.e. not
taking the device itself into account.
---
v2: Re-base over new earlier patches.

Comments

Jan Beulich Feb. 15, 2024, 10:21 a.m. UTC | #1
On 15.02.2024 11:15, Jan Beulich wrote:
> Make the variable a tristate, with (as done elsewhere) a negative value
> meaning "default". Since all use sites need looking at, also rename it
> to match our usual "opt_*" pattern. While touching it, also move it to
> .data.ro_after_init.
> 
> The only place it retains boolean nature is pci_ats_device(), for now.
> 
> In AMD code re-order conditionals to have the config space accesses
> after (cheaper) flag checks.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

I'm sorry, forgot to Cc Andrew for the AMD IOMMU part of the changes.

Jan

> ---
> In domain_context_mapping_one() I'm a little puzzled that translation
> type is selected based on only IOMMU and global properties, i.e. not
> taking the device itself into account.
> ---
> v2: Re-base over new earlier patches.
> 
> --- a/xen/drivers/passthrough/amd/iommu_cmd.c
> +++ b/xen/drivers/passthrough/amd/iommu_cmd.c
> @@ -282,7 +282,7 @@ void amd_iommu_flush_iotlb(u8 devfn, con
>      struct amd_iommu *iommu;
>      unsigned int req_id, queueid, maxpend;
>  
> -    if ( !ats_enabled )
> +    if ( opt_ats <= 0 )
>          return;
>  
>      if ( !pci_ats_enabled(pdev->seg, pdev->bus, pdev->devfn) )
> @@ -340,7 +340,7 @@ static void _amd_iommu_flush_pages(struc
>          flush_command_buffer(iommu, 0);
>      }
>  
> -    if ( ats_enabled )
> +    if ( opt_ats > 0 )
>      {
>          amd_iommu_flush_all_iotlbs(d, daddr, order);
>  
> --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
> +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
> @@ -119,7 +119,7 @@ static bool use_ats(
>      const struct amd_iommu *iommu,
>      const struct ivrs_mappings *ivrs_dev)
>  {
> -    return !ivrs_dev->block_ats &&
> +    return opt_ats > 0 && !ivrs_dev->block_ats &&
>             iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) &&
>             pci_ats_device(iommu->seg, pdev->bus, pdev->devfn);
>  }
> @@ -196,7 +196,7 @@ static int __must_check amd_iommu_setup_
>          dte->sys_mgt = MASK_EXTR(ivrs_dev->device_flags, ACPI_IVHD_SYSTEM_MGMT);
>  
>          if ( use_ats(pdev, iommu, ivrs_dev) )
> -            dte->i = ats_enabled;
> +            dte->i = true;
>  
>          spin_unlock_irqrestore(&iommu->lock, flags);
>  
> @@ -257,7 +257,7 @@ static int __must_check amd_iommu_setup_
>                                           ACPI_IVHD_SYSTEM_MGMT));
>  
>          if ( use_ats(pdev, iommu, ivrs_dev) )
> -            ASSERT(dte->i == ats_enabled);
> +            ASSERT(dte->i);
>  
>          spin_unlock_irqrestore(&iommu->lock, flags);
>  
> --- a/xen/drivers/passthrough/ats.c
> +++ b/xen/drivers/passthrough/ats.c
> @@ -18,8 +18,8 @@
>  #include <xen/pci_regs.h>
>  #include "ats.h"
>  
> -bool __read_mostly ats_enabled;
> -boolean_param("ats", ats_enabled);
> +int8_t __ro_after_init opt_ats = -1;
> +boolean_param("ats", opt_ats);
>  
>  int enable_ats_device(struct pci_dev *pdev, struct list_head *ats_list)
>  {
> --- a/xen/drivers/passthrough/ats.h
> +++ b/xen/drivers/passthrough/ats.h
> @@ -22,7 +22,7 @@
>  #define ATS_QUEUE_DEPTH_MASK     0x1f
>  #define ATS_ENABLE               (1<<15)
>  
> -extern bool ats_enabled;
> +extern int8_t opt_ats;
>  
>  int enable_ats_device(struct pci_dev *pdev, struct list_head *ats_list);
>  void disable_ats_device(struct pci_dev *pdev);
> @@ -43,7 +43,7 @@ static inline int pci_ats_enabled(int se
>  
>  static inline int pci_ats_device(int seg, int bus, int devfn)
>  {
> -    if ( !ats_enabled )
> +    if ( !opt_ats )
>          return 0;
>  
>      return pci_find_ext_capability(PCI_SBDF(seg, bus, devfn),
> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -1543,7 +1543,7 @@ int domain_context_mapping_one(
>          }
>  
>          context_set_address_root(lctxt, root);
> -        if ( ats_enabled && ecap_dev_iotlb(iommu->ecap) )
> +        if ( opt_ats > 0 && ecap_dev_iotlb(iommu->ecap) )
>              context_set_translation_type(lctxt, CONTEXT_TT_DEV_IOTLB);
>          else
>              context_set_translation_type(lctxt, CONTEXT_TT_MULTI_LEVEL);
> --- a/xen/drivers/passthrough/vtd/x86/ats.c
> +++ b/xen/drivers/passthrough/vtd/x86/ats.c
> @@ -46,7 +46,7 @@ int ats_device(const struct pci_dev *pde
>      struct acpi_drhd_unit *ats_drhd;
>      unsigned int pos, expfl = 0;
>  
> -    if ( !ats_enabled || !iommu_qinval )
> +    if ( opt_ats <= 0 || !iommu_qinval )
>          return 0;
>  
>      if ( !ecap_queued_inval(drhd->iommu->ecap) ||
>
Roger Pau Monné May 6, 2024, 12:42 p.m. UTC | #2
On Thu, Feb 15, 2024 at 11:15:39AM +0100, Jan Beulich wrote:
> Make the variable a tristate, with (as done elsewhere) a negative value
> meaning "default". Since all use sites need looking at, also rename it
> to match our usual "opt_*" pattern. While touching it, also move it to
> .data.ro_after_init.

I guess I need to look at further patches, as given the feedback on
the past version I think we agreed we want to set ATS unconditionally
disabled by default, and hence I'm not sure I see the point of the
tri-state if enabling ATS will require an explicit opt-in on the
command line (ats=1).

> The only place it retains boolean nature is pci_ats_device(), for now.
> 
> In AMD code re-order conditionals to have the config space accesses
> after (cheaper) flag checks.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> In domain_context_mapping_one() I'm a little puzzled that translation
> type is selected based on only IOMMU and global properties, i.e. not
> taking the device itself into account.

Is it maybe fine do set DEV_IOTLB unconditionally as long as the IOMMU
supports it, and then let the device decide whether it wants to issue
translated or non-translated requests depending on whether it supports
(and enables) ATS?

I think it would be best to limit this strictly to devices that have
ATS enabled.

> ---
> v2: Re-base over new earlier patches.
> 
> --- a/xen/drivers/passthrough/amd/iommu_cmd.c
> +++ b/xen/drivers/passthrough/amd/iommu_cmd.c
> @@ -282,7 +282,7 @@ void amd_iommu_flush_iotlb(u8 devfn, con
>      struct amd_iommu *iommu;
>      unsigned int req_id, queueid, maxpend;
>  
> -    if ( !ats_enabled )
> +    if ( opt_ats <= 0 )

If having a tri-state is required, won't it be best to decide on a
binary value at init time, so that runtime functions can handle
opt_ats as a boolean?

Otherwise we are open coding the default expectation of what -1
implies (disabled) in all use sites.

>          return;
>  
>      if ( !pci_ats_enabled(pdev->seg, pdev->bus, pdev->devfn) )
> @@ -340,7 +340,7 @@ static void _amd_iommu_flush_pages(struc
>          flush_command_buffer(iommu, 0);
>      }
>  
> -    if ( ats_enabled )
> +    if ( opt_ats > 0 )
>      {
>          amd_iommu_flush_all_iotlbs(d, daddr, order);
>  
> --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
> +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
> @@ -119,7 +119,7 @@ static bool use_ats(
>      const struct amd_iommu *iommu,
>      const struct ivrs_mappings *ivrs_dev)
>  {
> -    return !ivrs_dev->block_ats &&
> +    return opt_ats > 0 && !ivrs_dev->block_ats &&
>             iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) &&
>             pci_ats_device(iommu->seg, pdev->bus, pdev->devfn);
>  }
> @@ -196,7 +196,7 @@ static int __must_check amd_iommu_setup_
>          dte->sys_mgt = MASK_EXTR(ivrs_dev->device_flags, ACPI_IVHD_SYSTEM_MGMT);
>  
>          if ( use_ats(pdev, iommu, ivrs_dev) )
> -            dte->i = ats_enabled;
> +            dte->i = true;

Might be easier to just use:

dte->i = use_ats(pdev, iommu, ivrs_dev);

>  
>          spin_unlock_irqrestore(&iommu->lock, flags);
>  
> @@ -257,7 +257,7 @@ static int __must_check amd_iommu_setup_
>                                           ACPI_IVHD_SYSTEM_MGMT));
>  
>          if ( use_ats(pdev, iommu, ivrs_dev) )
> -            ASSERT(dte->i == ats_enabled);
> +            ASSERT(dte->i);

ASSERT(dte->i == use_ats(pdev, iommu, ivrs_dev));

>  
>          spin_unlock_irqrestore(&iommu->lock, flags);
>  
> --- a/xen/drivers/passthrough/ats.c
> +++ b/xen/drivers/passthrough/ats.c
> @@ -18,8 +18,8 @@
>  #include <xen/pci_regs.h>
>  #include "ats.h"
>  
> -bool __read_mostly ats_enabled;
> -boolean_param("ats", ats_enabled);
> +int8_t __ro_after_init opt_ats = -1;
> +boolean_param("ats", opt_ats);
>  
>  int enable_ats_device(struct pci_dev *pdev, struct list_head *ats_list)
>  {
> --- a/xen/drivers/passthrough/ats.h
> +++ b/xen/drivers/passthrough/ats.h
> @@ -22,7 +22,7 @@
>  #define ATS_QUEUE_DEPTH_MASK     0x1f
>  #define ATS_ENABLE               (1<<15)
>  
> -extern bool ats_enabled;
> +extern int8_t opt_ats;
>  
>  int enable_ats_device(struct pci_dev *pdev, struct list_head *ats_list);
>  void disable_ats_device(struct pci_dev *pdev);
> @@ -43,7 +43,7 @@ static inline int pci_ats_enabled(int se
>  
>  static inline int pci_ats_device(int seg, int bus, int devfn)
>  {
> -    if ( !ats_enabled )
> +    if ( !opt_ats )
>          return 0;

Can't you remove that check altogether now, since you are adding an
opt_ats check to use_ats()?

Thanks, Roger.
Jan Beulich May 6, 2024, 1:20 p.m. UTC | #3
On 06.05.2024 14:42, Roger Pau Monné wrote:
> On Thu, Feb 15, 2024 at 11:15:39AM +0100, Jan Beulich wrote:
>> Make the variable a tristate, with (as done elsewhere) a negative value
>> meaning "default". Since all use sites need looking at, also rename it
>> to match our usual "opt_*" pattern. While touching it, also move it to
>> .data.ro_after_init.
> 
> I guess I need to look at further patches, as given the feedback on
> the past version I think we agreed we want to set ATS unconditionally
> disabled by default, and hence I'm not sure I see the point of the
> tri-state if enabling ATS will require an explicit opt-in on the
> command line (ats=1).

With the present wording in the VT-d spec (which we've now had vague
indication that it may not be meant that way) there needs to be
tristate behavior:
- With "ats=0" ATS won't be used.
- With "ats=1" ATS will be used for all ATS-capable devices.
- Without either option ATS will be used for devices where firmware
  mandates its use.
If the alternative reading was confirmed (and preferably the text also
adjusted), we should be able to get away with a simple boolean again.
At which point this patch may end up being purely renaming, and hence
could then as well be left out.

>> The only place it retains boolean nature is pci_ats_device(), for now.
>>
>> In AMD code re-order conditionals to have the config space accesses
>> after (cheaper) flag checks.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> ---
>> In domain_context_mapping_one() I'm a little puzzled that translation
>> type is selected based on only IOMMU and global properties, i.e. not
>> taking the device itself into account.
> 
> Is it maybe fine do set DEV_IOTLB unconditionally as long as the IOMMU
> supports it, and then let the device decide whether it wants to issue
> translated or non-translated requests depending on whether it supports
> (and enables) ATS?

This might be the reason why it is what it is now.

> I think it would be best to limit this strictly to devices that have
> ATS enabled.

And this was the reason for me putting the remark here.

>> --- a/xen/drivers/passthrough/amd/iommu_cmd.c
>> +++ b/xen/drivers/passthrough/amd/iommu_cmd.c
>> @@ -282,7 +282,7 @@ void amd_iommu_flush_iotlb(u8 devfn, con
>>      struct amd_iommu *iommu;
>>      unsigned int req_id, queueid, maxpend;
>>  
>> -    if ( !ats_enabled )
>> +    if ( opt_ats <= 0 )
> 
> If having a tri-state is required, won't it be best to decide on a
> binary value at init time, so that runtime functions can handle
> opt_ats as a boolean?

As per above, unlike for other options we can't consolidate into a
boolean, as runtime behavior wants to be different with all three
possible settings.

> Otherwise we are open coding the default expectation of what -1
> implies (disabled) in all use sites.

That's pretty much unavoidable with the different meaning of 1 and -1.

>> @@ -196,7 +196,7 @@ static int __must_check amd_iommu_setup_
>>          dte->sys_mgt = MASK_EXTR(ivrs_dev->device_flags, ACPI_IVHD_SYSTEM_MGMT);
>>  
>>          if ( use_ats(pdev, iommu, ivrs_dev) )
>> -            dte->i = ats_enabled;
>> +            dte->i = true;
> 
> Might be easier to just use:
> 
> dte->i = use_ats(pdev, iommu, ivrs_dev);

I'm hesitant here, as in principle we might be overwriting a "true" by
"false" then.

>> @@ -257,7 +257,7 @@ static int __must_check amd_iommu_setup_
>>                                           ACPI_IVHD_SYSTEM_MGMT));
>>  
>>          if ( use_ats(pdev, iommu, ivrs_dev) )
>> -            ASSERT(dte->i == ats_enabled);
>> +            ASSERT(dte->i);
> 
> ASSERT(dte->i == use_ats(pdev, iommu, ivrs_dev));

I'm okay switching here, but better to the precise logical equivalent of
the earlier code:

ASSERT(dte->i || !use_ats(pdev, iommu, ivrs_dev));

>> @@ -43,7 +43,7 @@ static inline int pci_ats_enabled(int se
>>  
>>  static inline int pci_ats_device(int seg, int bus, int devfn)
>>  {
>> -    if ( !ats_enabled )
>> +    if ( !opt_ats )
>>          return 0;
> 
> Can't you remove that check altogether now, since you are adding an
> opt_ats check to use_ats()?

Two reasons why not: For one this isn't AMD-specific code, and hence
shouldn't be tied to the AMD-specific use_ats(). In principle VT-d
code should be okay to call here, too. And then
amd_iommu_disable_domain_device() doesn't use use_ats(), but does call
here.

Jan
Roger Pau Monné May 6, 2024, 1:53 p.m. UTC | #4
On Mon, May 06, 2024 at 03:20:38PM +0200, Jan Beulich wrote:
> On 06.05.2024 14:42, Roger Pau Monné wrote:
> > On Thu, Feb 15, 2024 at 11:15:39AM +0100, Jan Beulich wrote:
> >> Make the variable a tristate, with (as done elsewhere) a negative value
> >> meaning "default". Since all use sites need looking at, also rename it
> >> to match our usual "opt_*" pattern. While touching it, also move it to
> >> .data.ro_after_init.
> > 
> > I guess I need to look at further patches, as given the feedback on
> > the past version I think we agreed we want to set ATS unconditionally
> > disabled by default, and hence I'm not sure I see the point of the
> > tri-state if enabling ATS will require an explicit opt-in on the
> > command line (ats=1).
> 
> With the present wording in the VT-d spec (which we've now had vague
> indication that it may not be meant that way) there needs to be
> tristate behavior:
> - With "ats=0" ATS won't be used.
> - With "ats=1" ATS will be used for all ATS-capable devices.
> - Without either option ATS will be used for devices where firmware
>   mandates its use.

I'm afraid I don't agree to this behavior.  Regardless of what the
firmware requests ATS must only be enabled on user-request (iow: when
the ats=1 command line option is passed).  Otherwise ATS must remain
disabled for all devices.  It's not fine for firmware to trigger the
enabling of a feature that's not supported on Xen.

> >> @@ -196,7 +196,7 @@ static int __must_check amd_iommu_setup_
> >>          dte->sys_mgt = MASK_EXTR(ivrs_dev->device_flags, ACPI_IVHD_SYSTEM_MGMT);
> >>  
> >>          if ( use_ats(pdev, iommu, ivrs_dev) )
> >> -            dte->i = ats_enabled;
> >> +            dte->i = true;
> > 
> > Might be easier to just use:
> > 
> > dte->i = use_ats(pdev, iommu, ivrs_dev);
> 
> I'm hesitant here, as in principle we might be overwriting a "true" by
> "false" then.

Hm, but that would be fine, what's the point in enabling the IOMMU to
reply to ATS requests if ATS is not enabled on the device?

IOW: overwriting a "true" with a "false" seem like the correct
behavior if it's based on the output of use_ats().

> >> @@ -257,7 +257,7 @@ static int __must_check amd_iommu_setup_
> >>                                           ACPI_IVHD_SYSTEM_MGMT));
> >>  
> >>          if ( use_ats(pdev, iommu, ivrs_dev) )
> >> -            ASSERT(dte->i == ats_enabled);
> >> +            ASSERT(dte->i);
> > 
> > ASSERT(dte->i == use_ats(pdev, iommu, ivrs_dev));
> 
> I'm okay switching here, but better to the precise logical equivalent of
> the earlier code:
> 
> ASSERT(dte->i || !use_ats(pdev, iommu, ivrs_dev));

Hm, I see.  I think we should be more strict with this (see my
previous comment), but we could defer to a later change.

> 
> >> @@ -43,7 +43,7 @@ static inline int pci_ats_enabled(int se
> >>  
> >>  static inline int pci_ats_device(int seg, int bus, int devfn)
> >>  {
> >> -    if ( !ats_enabled )
> >> +    if ( !opt_ats )
> >>          return 0;
> > 
> > Can't you remove that check altogether now, since you are adding an
> > opt_ats check to use_ats()?
> 
> Two reasons why not: For one this isn't AMD-specific code, and hence
> shouldn't be tied to the AMD-specific use_ats(). In principle VT-d
> code should be okay to call here, too. And then
> amd_iommu_disable_domain_device() doesn't use use_ats(), but does call
> here.

Oh, that's confusing, I didn't realize use_ats was AMD specific code.
It should have some kind of prefix to avoid this kind of confusion.

Thanks, Roger.
Jan Beulich May 15, 2024, 10:07 a.m. UTC | #5
On 06.05.2024 15:53, Roger Pau Monné wrote:
> On Mon, May 06, 2024 at 03:20:38PM +0200, Jan Beulich wrote:
>> On 06.05.2024 14:42, Roger Pau Monné wrote:
>>> On Thu, Feb 15, 2024 at 11:15:39AM +0100, Jan Beulich wrote:
>>>> Make the variable a tristate, with (as done elsewhere) a negative value
>>>> meaning "default". Since all use sites need looking at, also rename it
>>>> to match our usual "opt_*" pattern. While touching it, also move it to
>>>> .data.ro_after_init.
>>>
>>> I guess I need to look at further patches, as given the feedback on
>>> the past version I think we agreed we want to set ATS unconditionally
>>> disabled by default, and hence I'm not sure I see the point of the
>>> tri-state if enabling ATS will require an explicit opt-in on the
>>> command line (ats=1).
>>
>> With the present wording in the VT-d spec (which we've now had vague
>> indication that it may not be meant that way) there needs to be
>> tristate behavior:
>> - With "ats=0" ATS won't be used.
>> - With "ats=1" ATS will be used for all ATS-capable devices.
>> - Without either option ATS will be used for devices where firmware
>>   mandates its use.
> 
> I'm afraid I don't agree to this behavior.  Regardless of what the
> firmware requests ATS must only be enabled on user-request (iow: when
> the ats=1 command line option is passed).  Otherwise ATS must remain
> disabled for all devices.  It's not fine for firmware to trigger the
> enabling of a feature that's not supported on Xen.

Well. On one hand I can see your point. Otoh with the spec still being the
way it is, on systems mandating ATS use for at least one device we'd then
simply need to deem Xen unsupported there altogether. The goal of the
series, though, is to make things work as mandated by the spec on such
systems, which to me implies we need to consider use of ATS supported in
such cases (and only for those specific devices, i.e. still without
considering use of "ats" on the command line supported).

If and when the spec was changed to clarify the flag is a performance hint,
not a functional requirement, then we could do as you suggest. At which
point, as mentioned before, opt_ats may be possible to become a plain
boolean variable.

>>>> @@ -196,7 +196,7 @@ static int __must_check amd_iommu_setup_
>>>>          dte->sys_mgt = MASK_EXTR(ivrs_dev->device_flags, ACPI_IVHD_SYSTEM_MGMT);
>>>>  
>>>>          if ( use_ats(pdev, iommu, ivrs_dev) )
>>>> -            dte->i = ats_enabled;
>>>> +            dte->i = true;
>>>
>>> Might be easier to just use:
>>>
>>> dte->i = use_ats(pdev, iommu, ivrs_dev);
>>
>> I'm hesitant here, as in principle we might be overwriting a "true" by
>> "false" then.
> 
> Hm, but that would be fine, what's the point in enabling the IOMMU to
> reply to ATS requests if ATS is not enabled on the device?
> 
> IOW: overwriting a "true" with a "false" seem like the correct
> behavior if it's based on the output of use_ats().

I don't think so, unless there were flow guarantees excluding the possibility
of taking this path twice without intermediately disabling the device again.
Down from here the enabling of ATS is gated on use_ats(). Hence if, in an
earlier invocation, we enabled ATS (and set dte->i), we wouldn't turn off ATS
below (there's only code to turn it on), yet with what you suggest we'd clear
dte->i.

Thinking about it: Maybe your comment roots in you meaning to leverage here
that use_ats() is not supposed to return different values for the same device,
when invoked multiple times. If so, I'm afraid I'm hesitant to make use of
such a property when I can easily avoid it.

>>>> @@ -43,7 +43,7 @@ static inline int pci_ats_enabled(int se
>>>>  
>>>>  static inline int pci_ats_device(int seg, int bus, int devfn)
>>>>  {
>>>> -    if ( !ats_enabled )
>>>> +    if ( !opt_ats )
>>>>          return 0;
>>>
>>> Can't you remove that check altogether now, since you are adding an
>>> opt_ats check to use_ats()?
>>
>> Two reasons why not: For one this isn't AMD-specific code, and hence
>> shouldn't be tied to the AMD-specific use_ats(). In principle VT-d
>> code should be okay to call here, too. And then
>> amd_iommu_disable_domain_device() doesn't use use_ats(), but does call
>> here.
> 
> Oh, that's confusing, I didn't realize use_ats was AMD specific code.
> It should have some kind of prefix to avoid this kind of confusion.

Hmm, the function being static in an AMD-only file, I would have thought that
makes it clear enough that it's AMD-specific.

Jan
Roger Pau Monné May 20, 2024, 10:29 a.m. UTC | #6
On Wed, May 15, 2024 at 12:07:50PM +0200, Jan Beulich wrote:
> On 06.05.2024 15:53, Roger Pau Monné wrote:
> > On Mon, May 06, 2024 at 03:20:38PM +0200, Jan Beulich wrote:
> >> On 06.05.2024 14:42, Roger Pau Monné wrote:
> >>> On Thu, Feb 15, 2024 at 11:15:39AM +0100, Jan Beulich wrote:
> >>>> Make the variable a tristate, with (as done elsewhere) a negative value
> >>>> meaning "default". Since all use sites need looking at, also rename it
> >>>> to match our usual "opt_*" pattern. While touching it, also move it to
> >>>> .data.ro_after_init.
> >>>
> >>> I guess I need to look at further patches, as given the feedback on
> >>> the past version I think we agreed we want to set ATS unconditionally
> >>> disabled by default, and hence I'm not sure I see the point of the
> >>> tri-state if enabling ATS will require an explicit opt-in on the
> >>> command line (ats=1).
> >>
> >> With the present wording in the VT-d spec (which we've now had vague
> >> indication that it may not be meant that way) there needs to be
> >> tristate behavior:
> >> - With "ats=0" ATS won't be used.
> >> - With "ats=1" ATS will be used for all ATS-capable devices.
> >> - Without either option ATS will be used for devices where firmware
> >>   mandates its use.
> > 
> > I'm afraid I don't agree to this behavior.  Regardless of what the
> > firmware requests ATS must only be enabled on user-request (iow: when
> > the ats=1 command line option is passed).  Otherwise ATS must remain
> > disabled for all devices.  It's not fine for firmware to trigger the
> > enabling of a feature that's not supported on Xen.
> 
> Well. On one hand I can see your point. Otoh with the spec still being the
> way it is, on systems mandating ATS use for at least one device we'd then
> simply need to deem Xen unsupported there altogether. The goal of the
> series, though, is to make things work as mandated by the spec on such
> systems, which to me implies we need to consider use of ATS supported in
> such cases (and only for those specific devices, i.e. still without
> considering use of "ats" on the command line supported).

I'm in general hesitant of ATS because I think it undermines the
security of PCI passthrough.  However this would still be acceptable
for dom0 because it's (usually?) part of the trusted base of a Xen
host.

If we want to make use of ATS for devices assigned to dom0 we should
clarify the warning in xen-command-line.pandoc.

We should also consider that dom0 usually does a lot of p2m
manipulations (by having to map grants and foreign pages).  Those will
result in p2m flushes that will lead to IOMMU flushes, and when using
ATS that will require device TLB flushes.  I wonder how much of an
overhead this will add to normal dom0 operations (plus the added risk
of those device TLB flushes stalling the IOMMU queue).

I would be much more comfortable with making the ats= command line
option a tri-state:

ats={0,1,mandatory}

Where the 'mandatory' option or equivalent enables ATS only for
devices that mandate it.  However I still think the default option
should be disabled for all devices.  If devices that require ATS are
found on the system I would use `warning_add()` to notify the user
of the need to consider adding ats=mandatory to the command line.

> If and when the spec was changed to clarify the flag is a performance hint,
> not a functional requirement, then we could do as you suggest. At which
> point, as mentioned before, opt_ats may be possible to become a plain
> boolean variable.

It's a complex situation, and I'm kind of surprised by the
introduction of this mandatory ATS requirement by Intel in a
non-backwards compatible way (as the specification claims the device
won't be functional without ATS enabled if required).

> >>>> @@ -196,7 +196,7 @@ static int __must_check amd_iommu_setup_
> >>>>          dte->sys_mgt = MASK_EXTR(ivrs_dev->device_flags, ACPI_IVHD_SYSTEM_MGMT);
> >>>>  
> >>>>          if ( use_ats(pdev, iommu, ivrs_dev) )
> >>>> -            dte->i = ats_enabled;
> >>>> +            dte->i = true;
> >>>
> >>> Might be easier to just use:
> >>>
> >>> dte->i = use_ats(pdev, iommu, ivrs_dev);
> >>
> >> I'm hesitant here, as in principle we might be overwriting a "true" by
> >> "false" then.
> > 
> > Hm, but that would be fine, what's the point in enabling the IOMMU to
> > reply to ATS requests if ATS is not enabled on the device?
> > 
> > IOW: overwriting a "true" with a "false" seem like the correct
> > behavior if it's based on the output of use_ats().
> 
> I don't think so, unless there were flow guarantees excluding the possibility
> of taking this path twice without intermediately disabling the device again.
> Down from here the enabling of ATS is gated on use_ats(). Hence if, in an
> earlier invocation, we enabled ATS (and set dte->i), we wouldn't turn off ATS
> below (there's only code to turn it on), yet with what you suggest we'd clear
> dte->i.

Please bear with me, I think I'm confused, why would use_ats(), and if
that's the case, don't we want to update dte->i so that it matches the
ATS state?

Otherwise we would fail to disable IOMMU device address translation
support if ATS was disabled?

> Thinking about it: Maybe your comment roots in you meaning to leverage here
> that use_ats() is not supposed to return different values for the same device,
> when invoked multiple times. If so, I'm afraid I'm hesitant to make use of
> such a property when I can easily avoid it.
> 
> >>>> @@ -43,7 +43,7 @@ static inline int pci_ats_enabled(int se
> >>>>  
> >>>>  static inline int pci_ats_device(int seg, int bus, int devfn)
> >>>>  {
> >>>> -    if ( !ats_enabled )
> >>>> +    if ( !opt_ats )
> >>>>          return 0;
> >>>
> >>> Can't you remove that check altogether now, since you are adding an
> >>> opt_ats check to use_ats()?
> >>
> >> Two reasons why not: For one this isn't AMD-specific code, and hence
> >> shouldn't be tied to the AMD-specific use_ats(). In principle VT-d
> >> code should be okay to call here, too. And then
> >> amd_iommu_disable_domain_device() doesn't use use_ats(), but does call
> >> here.
> > 
> > Oh, that's confusing, I didn't realize use_ats was AMD specific code.
> > It should have some kind of prefix to avoid this kind of confusion.
> 
> Hmm, the function being static in an AMD-only file, I would have thought that
> makes it clear enough that it's AMD-specific.

Yes, sure, I guess the name looked generic enough to be something that
could be shared across vendor implementations.

Thanks, Roger.
Jan Beulich May 21, 2024, 6:21 a.m. UTC | #7
On 20.05.2024 12:29, Roger Pau Monné wrote:
> On Wed, May 15, 2024 at 12:07:50PM +0200, Jan Beulich wrote:
>> On 06.05.2024 15:53, Roger Pau Monné wrote:
>>> On Mon, May 06, 2024 at 03:20:38PM +0200, Jan Beulich wrote:
>>>> On 06.05.2024 14:42, Roger Pau Monné wrote:
>>>>> On Thu, Feb 15, 2024 at 11:15:39AM +0100, Jan Beulich wrote:
>>>>>> @@ -196,7 +196,7 @@ static int __must_check amd_iommu_setup_
>>>>>>          dte->sys_mgt = MASK_EXTR(ivrs_dev->device_flags, ACPI_IVHD_SYSTEM_MGMT);
>>>>>>  
>>>>>>          if ( use_ats(pdev, iommu, ivrs_dev) )
>>>>>> -            dte->i = ats_enabled;
>>>>>> +            dte->i = true;
>>>>>
>>>>> Might be easier to just use:
>>>>>
>>>>> dte->i = use_ats(pdev, iommu, ivrs_dev);
>>>>
>>>> I'm hesitant here, as in principle we might be overwriting a "true" by
>>>> "false" then.
>>>
>>> Hm, but that would be fine, what's the point in enabling the IOMMU to
>>> reply to ATS requests if ATS is not enabled on the device?
>>>
>>> IOW: overwriting a "true" with a "false" seem like the correct
>>> behavior if it's based on the output of use_ats().
>>
>> I don't think so, unless there were flow guarantees excluding the possibility
>> of taking this path twice without intermediately disabling the device again.
>> Down from here the enabling of ATS is gated on use_ats(). Hence if, in an
>> earlier invocation, we enabled ATS (and set dte->i), we wouldn't turn off ATS
>> below (there's only code to turn it on), yet with what you suggest we'd clear
>> dte->i.
> 
> Please bear with me, I think I'm confused, why would use_ats(), and if
> that's the case, don't we want to update dte->i so that it matches the
> ATS state?

I'm afraid I can't parse this. Maybe a result of incomplete editing? The
topic is complex enough that I don't want to even try to guess what you
may have meant to ask ...

> Otherwise we would fail to disable IOMMU device address translation
> support if ATS was disabled?

I think the answer here is "no", but with the above I'm not really sure
here, either.

Jan
Roger Pau Monné May 21, 2024, 10:03 a.m. UTC | #8
On Tue, May 21, 2024 at 08:21:35AM +0200, Jan Beulich wrote:
> On 20.05.2024 12:29, Roger Pau Monné wrote:
> > On Wed, May 15, 2024 at 12:07:50PM +0200, Jan Beulich wrote:
> >> On 06.05.2024 15:53, Roger Pau Monné wrote:
> >>> On Mon, May 06, 2024 at 03:20:38PM +0200, Jan Beulich wrote:
> >>>> On 06.05.2024 14:42, Roger Pau Monné wrote:
> >>>>> On Thu, Feb 15, 2024 at 11:15:39AM +0100, Jan Beulich wrote:
> >>>>>> @@ -196,7 +196,7 @@ static int __must_check amd_iommu_setup_
> >>>>>>          dte->sys_mgt = MASK_EXTR(ivrs_dev->device_flags, ACPI_IVHD_SYSTEM_MGMT);
> >>>>>>  
> >>>>>>          if ( use_ats(pdev, iommu, ivrs_dev) )
> >>>>>> -            dte->i = ats_enabled;
> >>>>>> +            dte->i = true;
> >>>>>
> >>>>> Might be easier to just use:
> >>>>>
> >>>>> dte->i = use_ats(pdev, iommu, ivrs_dev);
> >>>>
> >>>> I'm hesitant here, as in principle we might be overwriting a "true" by
> >>>> "false" then.
> >>>
> >>> Hm, but that would be fine, what's the point in enabling the IOMMU to
> >>> reply to ATS requests if ATS is not enabled on the device?
> >>>
> >>> IOW: overwriting a "true" with a "false" seem like the correct
> >>> behavior if it's based on the output of use_ats().
> >>
> >> I don't think so, unless there were flow guarantees excluding the possibility
> >> of taking this path twice without intermediately disabling the device again.
> >> Down from here the enabling of ATS is gated on use_ats(). Hence if, in an
> >> earlier invocation, we enabled ATS (and set dte->i), we wouldn't turn off ATS
> >> below (there's only code to turn it on), yet with what you suggest we'd clear
> >> dte->i.
> > 
> > Please bear with me, I think I'm confused, why would use_ats(), and if
> > that's the case, don't we want to update dte->i so that it matches the
> > ATS state?
> 
> I'm afraid I can't parse this. Maybe a result of incomplete editing? The
> topic is complex enough that I don't want to even try to guess what you
> may have meant to ask ...

Oh, indeed, sorry, the full sentences should have been:

Please bear with me, I think I'm confused, why would use_ats() return
different values for the same device?

And if that's the case, don't we want to update dte->i so that it
matches the ATS state signaled by use_ats()?

> > Otherwise we would fail to disable IOMMU device address translation
> > support if ATS was disabled?
> 
> I think the answer here is "no", but with the above I'm not really sure
> here, either.

Given the current logic in use_ats() AFAICT the return value of that
function should not change for a given device?

Thanks, Roger.
Jan Beulich May 21, 2024, 10:23 a.m. UTC | #9
On 21.05.2024 12:03, Roger Pau Monné wrote:
> On Tue, May 21, 2024 at 08:21:35AM +0200, Jan Beulich wrote:
>> On 20.05.2024 12:29, Roger Pau Monné wrote:
>>> On Wed, May 15, 2024 at 12:07:50PM +0200, Jan Beulich wrote:
>>>> On 06.05.2024 15:53, Roger Pau Monné wrote:
>>>>> On Mon, May 06, 2024 at 03:20:38PM +0200, Jan Beulich wrote:
>>>>>> On 06.05.2024 14:42, Roger Pau Monné wrote:
>>>>>>> On Thu, Feb 15, 2024 at 11:15:39AM +0100, Jan Beulich wrote:
>>>>>>>> @@ -196,7 +196,7 @@ static int __must_check amd_iommu_setup_
>>>>>>>>          dte->sys_mgt = MASK_EXTR(ivrs_dev->device_flags, ACPI_IVHD_SYSTEM_MGMT);
>>>>>>>>  
>>>>>>>>          if ( use_ats(pdev, iommu, ivrs_dev) )
>>>>>>>> -            dte->i = ats_enabled;
>>>>>>>> +            dte->i = true;
>>>>>>>
>>>>>>> Might be easier to just use:
>>>>>>>
>>>>>>> dte->i = use_ats(pdev, iommu, ivrs_dev);
>>>>>>
>>>>>> I'm hesitant here, as in principle we might be overwriting a "true" by
>>>>>> "false" then.
>>>>>
>>>>> Hm, but that would be fine, what's the point in enabling the IOMMU to
>>>>> reply to ATS requests if ATS is not enabled on the device?
>>>>>
>>>>> IOW: overwriting a "true" with a "false" seem like the correct
>>>>> behavior if it's based on the output of use_ats().
>>>>
>>>> I don't think so, unless there were flow guarantees excluding the possibility
>>>> of taking this path twice without intermediately disabling the device again.
>>>> Down from here the enabling of ATS is gated on use_ats(). Hence if, in an
>>>> earlier invocation, we enabled ATS (and set dte->i), we wouldn't turn off ATS
>>>> below (there's only code to turn it on), yet with what you suggest we'd clear
>>>> dte->i.
>>>
>>> Please bear with me, I think I'm confused, why would use_ats(), and if
>>> that's the case, don't we want to update dte->i so that it matches the
>>> ATS state?
>>
>> I'm afraid I can't parse this. Maybe a result of incomplete editing? The
>> topic is complex enough that I don't want to even try to guess what you
>> may have meant to ask ...
> 
> Oh, indeed, sorry, the full sentences should have been:
> 
> Please bear with me, I think I'm confused, why would use_ats() return
> different values for the same device?

Right now it can't, yet in principle opt_ats could change value when
wired up accordingly via hypfs.

> And if that's the case, don't we want to update dte->i so that it
> matches the ATS state signaled by use_ats()?

That depends on what adjustment would be done besides changing the
variable value, if that was to become runtime changeable.

>>> Otherwise we would fail to disable IOMMU device address translation
>>> support if ATS was disabled?
>>
>> I think the answer here is "no", but with the above I'm not really sure
>> here, either.
> 
> Given the current logic in use_ats() AFAICT the return value of that
> function should not change for a given device?

Right now it shouldn't, yes. I'm still a little hesitant to have callers
make implications from that.

Jan
diff mbox series

Patch

--- a/xen/drivers/passthrough/amd/iommu_cmd.c
+++ b/xen/drivers/passthrough/amd/iommu_cmd.c
@@ -282,7 +282,7 @@  void amd_iommu_flush_iotlb(u8 devfn, con
     struct amd_iommu *iommu;
     unsigned int req_id, queueid, maxpend;
 
-    if ( !ats_enabled )
+    if ( opt_ats <= 0 )
         return;
 
     if ( !pci_ats_enabled(pdev->seg, pdev->bus, pdev->devfn) )
@@ -340,7 +340,7 @@  static void _amd_iommu_flush_pages(struc
         flush_command_buffer(iommu, 0);
     }
 
-    if ( ats_enabled )
+    if ( opt_ats > 0 )
     {
         amd_iommu_flush_all_iotlbs(d, daddr, order);
 
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -119,7 +119,7 @@  static bool use_ats(
     const struct amd_iommu *iommu,
     const struct ivrs_mappings *ivrs_dev)
 {
-    return !ivrs_dev->block_ats &&
+    return opt_ats > 0 && !ivrs_dev->block_ats &&
            iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) &&
            pci_ats_device(iommu->seg, pdev->bus, pdev->devfn);
 }
@@ -196,7 +196,7 @@  static int __must_check amd_iommu_setup_
         dte->sys_mgt = MASK_EXTR(ivrs_dev->device_flags, ACPI_IVHD_SYSTEM_MGMT);
 
         if ( use_ats(pdev, iommu, ivrs_dev) )
-            dte->i = ats_enabled;
+            dte->i = true;
 
         spin_unlock_irqrestore(&iommu->lock, flags);
 
@@ -257,7 +257,7 @@  static int __must_check amd_iommu_setup_
                                          ACPI_IVHD_SYSTEM_MGMT));
 
         if ( use_ats(pdev, iommu, ivrs_dev) )
-            ASSERT(dte->i == ats_enabled);
+            ASSERT(dte->i);
 
         spin_unlock_irqrestore(&iommu->lock, flags);
 
--- a/xen/drivers/passthrough/ats.c
+++ b/xen/drivers/passthrough/ats.c
@@ -18,8 +18,8 @@ 
 #include <xen/pci_regs.h>
 #include "ats.h"
 
-bool __read_mostly ats_enabled;
-boolean_param("ats", ats_enabled);
+int8_t __ro_after_init opt_ats = -1;
+boolean_param("ats", opt_ats);
 
 int enable_ats_device(struct pci_dev *pdev, struct list_head *ats_list)
 {
--- a/xen/drivers/passthrough/ats.h
+++ b/xen/drivers/passthrough/ats.h
@@ -22,7 +22,7 @@ 
 #define ATS_QUEUE_DEPTH_MASK     0x1f
 #define ATS_ENABLE               (1<<15)
 
-extern bool ats_enabled;
+extern int8_t opt_ats;
 
 int enable_ats_device(struct pci_dev *pdev, struct list_head *ats_list);
 void disable_ats_device(struct pci_dev *pdev);
@@ -43,7 +43,7 @@  static inline int pci_ats_enabled(int se
 
 static inline int pci_ats_device(int seg, int bus, int devfn)
 {
-    if ( !ats_enabled )
+    if ( !opt_ats )
         return 0;
 
     return pci_find_ext_capability(PCI_SBDF(seg, bus, devfn),
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1543,7 +1543,7 @@  int domain_context_mapping_one(
         }
 
         context_set_address_root(lctxt, root);
-        if ( ats_enabled && ecap_dev_iotlb(iommu->ecap) )
+        if ( opt_ats > 0 && ecap_dev_iotlb(iommu->ecap) )
             context_set_translation_type(lctxt, CONTEXT_TT_DEV_IOTLB);
         else
             context_set_translation_type(lctxt, CONTEXT_TT_MULTI_LEVEL);
--- a/xen/drivers/passthrough/vtd/x86/ats.c
+++ b/xen/drivers/passthrough/vtd/x86/ats.c
@@ -46,7 +46,7 @@  int ats_device(const struct pci_dev *pde
     struct acpi_drhd_unit *ats_drhd;
     unsigned int pos, expfl = 0;
 
-    if ( !ats_enabled || !iommu_qinval )
+    if ( opt_ats <= 0 || !iommu_qinval )
         return 0;
 
     if ( !ecap_queued_inval(drhd->iommu->ecap) ||