diff mbox series

[4/6] iommu: Combine device strictness requests with the global default

Message ID 20210621165230.4.Id84a954e705fcad3fdb35beb2bc372e4bf2108c7@changeid (mailing list archive)
State Superseded
Delegated to: Bjorn Helgaas
Headers show
Series iommu: Enable devices to request non-strict DMA, starting with QCom SD/MMC | expand

Commit Message

Douglas Anderson June 21, 2021, 11:52 p.m. UTC
In the patch ("drivers: base: Add bits to struct device to control
iommu strictness") we add the ability for devices to tell us about
their IOMMU strictness requirements. Let's now take that into account
in the IOMMU layer.

A few notes here:
* Presumably this is always how iommu_get_dma_strict() was intended to
  behave. Had this not been the intention then it never would have
  taken a domain as a parameter.
* The iommu_set_dma_strict() feels awfully non-symmetric now. That
  function sets the _default_ strictness globally in the system
  whereas iommu_get_dma_strict() returns the value for a given domain
  (falling back to the default). Presumably, at least, the fact that
  iommu_set_dma_strict() doesn't take a domain makes this obvious.

The function iommu_get_dma_strict() should now make it super obvious
where strictness comes from and who overides who. Though the function
changed a bunch to make the logic clearer, the only two new rules
should be:
* Devices can force strictness for themselves, overriding the cmdline
  "iommu.strict=0" or a call to iommu_set_dma_strict(false)).
* Devices can request non-strictness for themselves, assuming there
  was no cmdline "iommu.strict=1" or a call to
  iommu_set_dma_strict(true).

Signed-off-by: Douglas Anderson <dianders@chromium.org>
---

 drivers/iommu/iommu.c | 56 +++++++++++++++++++++++++++++++++----------
 include/linux/iommu.h |  2 ++
 2 files changed, 45 insertions(+), 13 deletions(-)

Comments

Baolu Lu June 22, 2021, 2:03 a.m. UTC | #1
On 6/22/21 7:52 AM, Douglas Anderson wrote:
> @@ -1519,7 +1542,8 @@ static int iommu_get_def_domain_type(struct device *dev)
>   
>   static int iommu_group_alloc_default_domain(struct bus_type *bus,
>   					    struct iommu_group *group,
> -					    unsigned int type)
> +					    unsigned int type,
> +					    struct device *dev)
>   {
>   	struct iommu_domain *dom;
>   
> @@ -1534,6 +1558,12 @@ static int iommu_group_alloc_default_domain(struct bus_type *bus,
>   	if (!dom)
>   		return -ENOMEM;
>   
> +	/* Save the strictness requests from the device */
> +	if (dev && type == IOMMU_DOMAIN_DMA) {
> +		dom->request_non_strict = dev->request_non_strict_iommu;
> +		dom->force_strict = dev->force_strict_iommu;
> +	}
> +

An iommu default domain might be used by multiple devices which might
have different "strict" attributions. Then who could override who?

Best regards,
baolu
Saravana Kannan June 22, 2021, 2:55 a.m. UTC | #2
On Mon, Jun 21, 2021 at 4:53 PM Douglas Anderson <dianders@chromium.org> wrote:
>
> In the patch ("drivers: base: Add bits to struct device to control
> iommu strictness") we add the ability for devices to tell us about
> their IOMMU strictness requirements. Let's now take that into account
> in the IOMMU layer.
>
> A few notes here:
> * Presumably this is always how iommu_get_dma_strict() was intended to
>   behave. Had this not been the intention then it never would have
>   taken a domain as a parameter.
> * The iommu_set_dma_strict() feels awfully non-symmetric now. That
>   function sets the _default_ strictness globally in the system
>   whereas iommu_get_dma_strict() returns the value for a given domain
>   (falling back to the default). Presumably, at least, the fact that
>   iommu_set_dma_strict() doesn't take a domain makes this obvious.
>
> The function iommu_get_dma_strict() should now make it super obvious
> where strictness comes from and who overides who. Though the function
> changed a bunch to make the logic clearer, the only two new rules
> should be:
> * Devices can force strictness for themselves, overriding the cmdline
>   "iommu.strict=0" or a call to iommu_set_dma_strict(false)).
> * Devices can request non-strictness for themselves, assuming there
>   was no cmdline "iommu.strict=1" or a call to
>   iommu_set_dma_strict(true).
>
> Signed-off-by: Douglas Anderson <dianders@chromium.org>
> ---
>
>  drivers/iommu/iommu.c | 56 +++++++++++++++++++++++++++++++++----------
>  include/linux/iommu.h |  2 ++
>  2 files changed, 45 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 808ab70d5df5..0c84a4c06110 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -28,8 +28,19 @@
>  static struct kset *iommu_group_kset;
>  static DEFINE_IDA(iommu_group_ida);
>
> +enum iommu_strictness {
> +       IOMMU_DEFAULT_STRICTNESS = -1,
> +       IOMMU_NOT_STRICT = 0,
> +       IOMMU_STRICT = 1,
> +};
> +static inline enum iommu_strictness bool_to_strictness(bool strictness)
> +{
> +       return (enum iommu_strictness)strictness;
> +}
> +
>  static unsigned int iommu_def_domain_type __read_mostly;
> -static bool iommu_dma_strict __read_mostly = true;
> +static enum iommu_strictness cmdline_dma_strict __read_mostly = IOMMU_DEFAULT_STRICTNESS;
> +static enum iommu_strictness driver_dma_strict __read_mostly = IOMMU_DEFAULT_STRICTNESS;
>  static u32 iommu_cmd_line __read_mostly;
>
>  struct iommu_group {
> @@ -69,7 +80,6 @@ static const char * const iommu_group_resv_type_string[] = {
>  };
>
>  #define IOMMU_CMD_LINE_DMA_API         BIT(0)
> -#define IOMMU_CMD_LINE_STRICT          BIT(1)
>
>  static int iommu_alloc_default_domain(struct iommu_group *group,
>                                       struct device *dev);
> @@ -336,25 +346,38 @@ early_param("iommu.passthrough", iommu_set_def_domain_type);
>
>  static int __init iommu_dma_setup(char *str)
>  {
> -       int ret = kstrtobool(str, &iommu_dma_strict);
> +       bool strict;
> +       int ret = kstrtobool(str, &strict);
>
>         if (!ret)
> -               iommu_cmd_line |= IOMMU_CMD_LINE_STRICT;
> +               cmdline_dma_strict = bool_to_strictness(strict);
>         return ret;
>  }
>  early_param("iommu.strict", iommu_dma_setup);
>
>  void iommu_set_dma_strict(bool strict)
>  {
> -       if (strict || !(iommu_cmd_line & IOMMU_CMD_LINE_STRICT))
> -               iommu_dma_strict = strict;
> +       /* A driver can request strictness but not the other way around */
> +       if (driver_dma_strict != IOMMU_STRICT)
> +               driver_dma_strict = bool_to_strictness(strict);
>  }
>
>  bool iommu_get_dma_strict(struct iommu_domain *domain)
>  {
> -       /* only allow lazy flushing for DMA domains */
> -       if (domain->type == IOMMU_DOMAIN_DMA)
> -               return iommu_dma_strict;
> +       /* Non-DMA domains or anyone forcing it to strict makes it strict */
> +       if (domain->type != IOMMU_DOMAIN_DMA ||
> +           cmdline_dma_strict == IOMMU_STRICT ||
> +           driver_dma_strict == IOMMU_STRICT ||
> +           domain->force_strict)
> +               return true;
> +
> +       /* Anyone requesting non-strict (if no forces) makes it non-strict */
> +       if (cmdline_dma_strict == IOMMU_NOT_STRICT ||
> +           driver_dma_strict == IOMMU_NOT_STRICT ||
> +           domain->request_non_strict)
> +               return false;
> +
> +       /* Nobody said anything, so it's strict by default */

If iommu.strict is not set in the command line, upstream treats it as
iommu.strict=1. Meaning, no drivers can override it.

If I understand it correctly, with your series, if iommu.strict=1 is
not set, drivers can override the "default strict mode" and ask for
non-strict mode for their domain. So if this series gets in and future
driver changes start asking for non-strict mode, systems that are
expected to operate in fully strict mode will now have devices
operating in non-strict mode.

That's breaking backward compatibility for the kernel command line
param. It looks like what you really need is to change iommu.strict
from 0/1 to lazy (previously 0), strict preferred, strict enforced
(previously 1) and you need to default it to "enforced".

Alternately (and potentially a better option?), you really should be
changing/extending dev_is_untrusted() so that it applies for any
struct device (not just PCI device) and then have this overridden in
DT (or ACPI or any firmware) to indicate a specific device is safe to
use non-strict mode on. What you are trying to capture (if the device
safe enough) really isn't a function of the DMA device's driver, but a
function of the DMA device.



>         return true;
>  }
>  EXPORT_SYMBOL_GPL(iommu_get_dma_strict);
> @@ -1519,7 +1542,8 @@ static int iommu_get_def_domain_type(struct device *dev)
>
>  static int iommu_group_alloc_default_domain(struct bus_type *bus,
>                                             struct iommu_group *group,
> -                                           unsigned int type)
> +                                           unsigned int type,
> +                                           struct device *dev)
>  {
>         struct iommu_domain *dom;
>
> @@ -1534,6 +1558,12 @@ static int iommu_group_alloc_default_domain(struct bus_type *bus,
>         if (!dom)
>                 return -ENOMEM;
>
> +       /* Save the strictness requests from the device */
> +       if (dev && type == IOMMU_DOMAIN_DMA) {
> +               dom->request_non_strict = dev->request_non_strict_iommu;
> +               dom->force_strict = dev->force_strict_iommu;
> +       }
> +
>         group->default_domain = dom;
>         if (!group->domain)
>                 group->domain = dom;
> @@ -1550,7 +1580,7 @@ static int iommu_alloc_default_domain(struct iommu_group *group,
>
>         type = iommu_get_def_domain_type(dev) ? : iommu_def_domain_type;
>
> -       return iommu_group_alloc_default_domain(dev->bus, group, type);
> +       return iommu_group_alloc_default_domain(dev->bus, group, type, dev);
>  }
>
>  /**
> @@ -1721,7 +1751,7 @@ static void probe_alloc_default_domain(struct bus_type *bus,
>         if (!gtype.type)
>                 gtype.type = iommu_def_domain_type;
>
> -       iommu_group_alloc_default_domain(bus, group, gtype.type);
> +       iommu_group_alloc_default_domain(bus, group, gtype.type, NULL);
>
>  }
>
> @@ -3130,7 +3160,7 @@ static int iommu_change_dev_def_domain(struct iommu_group *group,
>         }
>
>         /* Sets group->default_domain to the newly allocated domain */
> -       ret = iommu_group_alloc_default_domain(dev->bus, group, type);
> +       ret = iommu_group_alloc_default_domain(dev->bus, group, type, dev);
>         if (ret)
>                 goto out;
>
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 32d448050bf7..0bddef77f415 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -81,6 +81,8 @@ struct iommu_domain_geometry {
>
>  struct iommu_domain {
>         unsigned type;
> +       bool force_strict:1;
> +       bool request_non_strict:1;
>         const struct iommu_ops *ops;
>         unsigned long pgsize_bitmap;    /* Bitmap of page sizes in use */
>         iommu_fault_handler_t handler;
> --
> 2.32.0.288.g62a8d224e6-goog
>
Robin Murphy June 22, 2021, 11:49 a.m. UTC | #3
On 2021-06-22 00:52, Douglas Anderson wrote:
> In the patch ("drivers: base: Add bits to struct device to control
> iommu strictness") we add the ability for devices to tell us about
> their IOMMU strictness requirements. Let's now take that into account
> in the IOMMU layer.
> 
> A few notes here:
> * Presumably this is always how iommu_get_dma_strict() was intended to
>    behave. Had this not been the intention then it never would have
>    taken a domain as a parameter.

FWIW strictness does have the semantic of being a per-domain property, 
but mostly in the sense that it's only relevant to IOMMU_DOMAIN_DMA 
domains, so the main thing was encapsulating that check rather than 
duplicating it all over callsites.

> * The iommu_set_dma_strict() feels awfully non-symmetric now. That
>    function sets the _default_ strictness globally in the system
>    whereas iommu_get_dma_strict() returns the value for a given domain
>    (falling back to the default). Presumably, at least, the fact that
>    iommu_set_dma_strict() doesn't take a domain makes this obvious.

It *is* asymmetric - one is for IOMMU core code and individual driver 
internals to know whether they need to do whatever bits of setting up a 
flush queue for a given domain they are responsible for, while the other 
is specifically for two drivers to force the global default in order to 
preserve legacy driver-specific behaviour. Maybe that should have been 
called something like iommu_set_dma_default_strict instead... :/

Robin.

> The function iommu_get_dma_strict() should now make it super obvious
> where strictness comes from and who overides who. Though the function
> changed a bunch to make the logic clearer, the only two new rules
> should be:
> * Devices can force strictness for themselves, overriding the cmdline
>    "iommu.strict=0" or a call to iommu_set_dma_strict(false)).
> * Devices can request non-strictness for themselves, assuming there
>    was no cmdline "iommu.strict=1" or a call to
>    iommu_set_dma_strict(true).
> 
> Signed-off-by: Douglas Anderson <dianders@chromium.org>
> ---
> 
>   drivers/iommu/iommu.c | 56 +++++++++++++++++++++++++++++++++----------
>   include/linux/iommu.h |  2 ++
>   2 files changed, 45 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 808ab70d5df5..0c84a4c06110 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -28,8 +28,19 @@
>   static struct kset *iommu_group_kset;
>   static DEFINE_IDA(iommu_group_ida);
>   
> +enum iommu_strictness {
> +	IOMMU_DEFAULT_STRICTNESS = -1,
> +	IOMMU_NOT_STRICT = 0,
> +	IOMMU_STRICT = 1,
> +};
> +static inline enum iommu_strictness bool_to_strictness(bool strictness)
> +{
> +	return (enum iommu_strictness)strictness;
> +}
> +
>   static unsigned int iommu_def_domain_type __read_mostly;
> -static bool iommu_dma_strict __read_mostly = true;
> +static enum iommu_strictness cmdline_dma_strict __read_mostly = IOMMU_DEFAULT_STRICTNESS;
> +static enum iommu_strictness driver_dma_strict __read_mostly = IOMMU_DEFAULT_STRICTNESS;
>   static u32 iommu_cmd_line __read_mostly;
>   
>   struct iommu_group {
> @@ -69,7 +80,6 @@ static const char * const iommu_group_resv_type_string[] = {
>   };
>   
>   #define IOMMU_CMD_LINE_DMA_API		BIT(0)
> -#define IOMMU_CMD_LINE_STRICT		BIT(1)
>   
>   static int iommu_alloc_default_domain(struct iommu_group *group,
>   				      struct device *dev);
> @@ -336,25 +346,38 @@ early_param("iommu.passthrough", iommu_set_def_domain_type);
>   
>   static int __init iommu_dma_setup(char *str)
>   {
> -	int ret = kstrtobool(str, &iommu_dma_strict);
> +	bool strict;
> +	int ret = kstrtobool(str, &strict);
>   
>   	if (!ret)
> -		iommu_cmd_line |= IOMMU_CMD_LINE_STRICT;
> +		cmdline_dma_strict = bool_to_strictness(strict);
>   	return ret;
>   }
>   early_param("iommu.strict", iommu_dma_setup);
>   
>   void iommu_set_dma_strict(bool strict)
>   {
> -	if (strict || !(iommu_cmd_line & IOMMU_CMD_LINE_STRICT))
> -		iommu_dma_strict = strict;
> +	/* A driver can request strictness but not the other way around */
> +	if (driver_dma_strict != IOMMU_STRICT)
> +		driver_dma_strict = bool_to_strictness(strict);
>   }
>   
>   bool iommu_get_dma_strict(struct iommu_domain *domain)
>   {
> -	/* only allow lazy flushing for DMA domains */
> -	if (domain->type == IOMMU_DOMAIN_DMA)
> -		return iommu_dma_strict;
> +	/* Non-DMA domains or anyone forcing it to strict makes it strict */
> +	if (domain->type != IOMMU_DOMAIN_DMA ||
> +	    cmdline_dma_strict == IOMMU_STRICT ||
> +	    driver_dma_strict == IOMMU_STRICT ||
> +	    domain->force_strict)
> +		return true;
> +
> +	/* Anyone requesting non-strict (if no forces) makes it non-strict */
> +	if (cmdline_dma_strict == IOMMU_NOT_STRICT ||
> +	    driver_dma_strict == IOMMU_NOT_STRICT ||
> +	    domain->request_non_strict)
> +		return false;
> +
> +	/* Nobody said anything, so it's strict by default */
>   	return true;
>   }
>   EXPORT_SYMBOL_GPL(iommu_get_dma_strict);
> @@ -1519,7 +1542,8 @@ static int iommu_get_def_domain_type(struct device *dev)
>   
>   static int iommu_group_alloc_default_domain(struct bus_type *bus,
>   					    struct iommu_group *group,
> -					    unsigned int type)
> +					    unsigned int type,
> +					    struct device *dev)
>   {
>   	struct iommu_domain *dom;
>   
> @@ -1534,6 +1558,12 @@ static int iommu_group_alloc_default_domain(struct bus_type *bus,
>   	if (!dom)
>   		return -ENOMEM;
>   
> +	/* Save the strictness requests from the device */
> +	if (dev && type == IOMMU_DOMAIN_DMA) {
> +		dom->request_non_strict = dev->request_non_strict_iommu;
> +		dom->force_strict = dev->force_strict_iommu;
> +	}
> +
>   	group->default_domain = dom;
>   	if (!group->domain)
>   		group->domain = dom;
> @@ -1550,7 +1580,7 @@ static int iommu_alloc_default_domain(struct iommu_group *group,
>   
>   	type = iommu_get_def_domain_type(dev) ? : iommu_def_domain_type;
>   
> -	return iommu_group_alloc_default_domain(dev->bus, group, type);
> +	return iommu_group_alloc_default_domain(dev->bus, group, type, dev);
>   }
>   
>   /**
> @@ -1721,7 +1751,7 @@ static void probe_alloc_default_domain(struct bus_type *bus,
>   	if (!gtype.type)
>   		gtype.type = iommu_def_domain_type;
>   
> -	iommu_group_alloc_default_domain(bus, group, gtype.type);
> +	iommu_group_alloc_default_domain(bus, group, gtype.type, NULL);
>   
>   }
>   
> @@ -3130,7 +3160,7 @@ static int iommu_change_dev_def_domain(struct iommu_group *group,
>   	}
>   
>   	/* Sets group->default_domain to the newly allocated domain */
> -	ret = iommu_group_alloc_default_domain(dev->bus, group, type);
> +	ret = iommu_group_alloc_default_domain(dev->bus, group, type, dev);
>   	if (ret)
>   		goto out;
>   
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 32d448050bf7..0bddef77f415 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -81,6 +81,8 @@ struct iommu_domain_geometry {
>   
>   struct iommu_domain {
>   	unsigned type;
> +	bool force_strict:1;
> +	bool request_non_strict:1;
>   	const struct iommu_ops *ops;
>   	unsigned long pgsize_bitmap;	/* Bitmap of page sizes in use */
>   	iommu_fault_handler_t handler;
>
Douglas Anderson June 22, 2021, 4:40 p.m. UTC | #4
Hi,

On Mon, Jun 21, 2021 at 7:56 PM Saravana Kannan <saravanak@google.com> wrote:
>
> On Mon, Jun 21, 2021 at 4:53 PM Douglas Anderson <dianders@chromium.org> wrote:
> >
> > In the patch ("drivers: base: Add bits to struct device to control
> > iommu strictness") we add the ability for devices to tell us about
> > their IOMMU strictness requirements. Let's now take that into account
> > in the IOMMU layer.
> >
> > A few notes here:
> > * Presumably this is always how iommu_get_dma_strict() was intended to
> >   behave. Had this not been the intention then it never would have
> >   taken a domain as a parameter.
> > * The iommu_set_dma_strict() feels awfully non-symmetric now. That
> >   function sets the _default_ strictness globally in the system
> >   whereas iommu_get_dma_strict() returns the value for a given domain
> >   (falling back to the default). Presumably, at least, the fact that
> >   iommu_set_dma_strict() doesn't take a domain makes this obvious.
> >
> > The function iommu_get_dma_strict() should now make it super obvious
> > where strictness comes from and who overides who. Though the function
> > changed a bunch to make the logic clearer, the only two new rules
> > should be:
> > * Devices can force strictness for themselves, overriding the cmdline
> >   "iommu.strict=0" or a call to iommu_set_dma_strict(false)).
> > * Devices can request non-strictness for themselves, assuming there
> >   was no cmdline "iommu.strict=1" or a call to
> >   iommu_set_dma_strict(true).
> >
> > Signed-off-by: Douglas Anderson <dianders@chromium.org>
> > ---
> >
> >  drivers/iommu/iommu.c | 56 +++++++++++++++++++++++++++++++++----------
> >  include/linux/iommu.h |  2 ++
> >  2 files changed, 45 insertions(+), 13 deletions(-)
> >
> > diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> > index 808ab70d5df5..0c84a4c06110 100644
> > --- a/drivers/iommu/iommu.c
> > +++ b/drivers/iommu/iommu.c
> > @@ -28,8 +28,19 @@
> >  static struct kset *iommu_group_kset;
> >  static DEFINE_IDA(iommu_group_ida);
> >
> > +enum iommu_strictness {
> > +       IOMMU_DEFAULT_STRICTNESS = -1,
> > +       IOMMU_NOT_STRICT = 0,
> > +       IOMMU_STRICT = 1,
> > +};
> > +static inline enum iommu_strictness bool_to_strictness(bool strictness)
> > +{
> > +       return (enum iommu_strictness)strictness;
> > +}
> > +
> >  static unsigned int iommu_def_domain_type __read_mostly;
> > -static bool iommu_dma_strict __read_mostly = true;
> > +static enum iommu_strictness cmdline_dma_strict __read_mostly = IOMMU_DEFAULT_STRICTNESS;
> > +static enum iommu_strictness driver_dma_strict __read_mostly = IOMMU_DEFAULT_STRICTNESS;
> >  static u32 iommu_cmd_line __read_mostly;
> >
> >  struct iommu_group {
> > @@ -69,7 +80,6 @@ static const char * const iommu_group_resv_type_string[] = {
> >  };
> >
> >  #define IOMMU_CMD_LINE_DMA_API         BIT(0)
> > -#define IOMMU_CMD_LINE_STRICT          BIT(1)
> >
> >  static int iommu_alloc_default_domain(struct iommu_group *group,
> >                                       struct device *dev);
> > @@ -336,25 +346,38 @@ early_param("iommu.passthrough", iommu_set_def_domain_type);
> >
> >  static int __init iommu_dma_setup(char *str)
> >  {
> > -       int ret = kstrtobool(str, &iommu_dma_strict);
> > +       bool strict;
> > +       int ret = kstrtobool(str, &strict);
> >
> >         if (!ret)
> > -               iommu_cmd_line |= IOMMU_CMD_LINE_STRICT;
> > +               cmdline_dma_strict = bool_to_strictness(strict);
> >         return ret;
> >  }
> >  early_param("iommu.strict", iommu_dma_setup);
> >
> >  void iommu_set_dma_strict(bool strict)
> >  {
> > -       if (strict || !(iommu_cmd_line & IOMMU_CMD_LINE_STRICT))
> > -               iommu_dma_strict = strict;
> > +       /* A driver can request strictness but not the other way around */
> > +       if (driver_dma_strict != IOMMU_STRICT)
> > +               driver_dma_strict = bool_to_strictness(strict);
> >  }
> >
> >  bool iommu_get_dma_strict(struct iommu_domain *domain)
> >  {
> > -       /* only allow lazy flushing for DMA domains */
> > -       if (domain->type == IOMMU_DOMAIN_DMA)
> > -               return iommu_dma_strict;
> > +       /* Non-DMA domains or anyone forcing it to strict makes it strict */
> > +       if (domain->type != IOMMU_DOMAIN_DMA ||
> > +           cmdline_dma_strict == IOMMU_STRICT ||
> > +           driver_dma_strict == IOMMU_STRICT ||
> > +           domain->force_strict)
> > +               return true;
> > +
> > +       /* Anyone requesting non-strict (if no forces) makes it non-strict */
> > +       if (cmdline_dma_strict == IOMMU_NOT_STRICT ||
> > +           driver_dma_strict == IOMMU_NOT_STRICT ||
> > +           domain->request_non_strict)
> > +               return false;
> > +
> > +       /* Nobody said anything, so it's strict by default */
>
> If iommu.strict is not set in the command line, upstream treats it as
> iommu.strict=1. Meaning, no drivers can override it.
>
> If I understand it correctly, with your series, if iommu.strict=1 is
> not set, drivers can override the "default strict mode" and ask for
> non-strict mode for their domain. So if this series gets in and future
> driver changes start asking for non-strict mode, systems that are
> expected to operate in fully strict mode will now have devices
> operating in non-strict mode.
>
> That's breaking backward compatibility for the kernel command line
> param. It looks like what you really need is to change iommu.strict
> from 0/1 to lazy (previously 0), strict preferred, strict enforced
> (previously 1) and you need to default it to "enforced".

I'm not quite sure I'd agree, but certainly it could be up for debate.
I think I'm keeping full compatibility with the kernel command line
parameter, specifically:

* iommu.strict=0: default to non-strict mode unless a driver overrides

* iommu.strict=1: force everything to strict no matter what

...both of those two things are the same before and after my patchset.

You're arguing that I'm changing the behavior of the system when no
command line parameter is present. To me this seems a little bit of a
stretch. If no command line parameter is present I'd assert that the
kernel should do some sort of sane behavior and that we don't have to
force 100% strictness if the command line parameter isn't present at
all.

I would also note that your assertion that the system is 100% strict
under the "no command line parameter" case isn't actually true as far
as I can tell. The code in mainline is a little hard to follow (for me
the code after my patch is easier to follow), but you can see that
even before my patch a call to iommu_set_dma_strict() could be used to
make the system non-strict if no command-line parameter was passed.


> Alternately (and potentially a better option?), you really should be
> changing/extending dev_is_untrusted() so that it applies for any
> struct device (not just PCI device) and then have this overridden in
> DT (or ACPI or any firmware) to indicate a specific device is safe to
> use non-strict mode on.

I was really trying _not_ to do that. I believe this has been talked
about several times, including at last year's Linux Plumbers
conference. As far as I can tell it always ends in a shouting match w/
no forward progress. There are a bunch of problems here, namely:

* Trust isn't necessarily binary. There might be peripherals that you
sort-of trust, others that you really trust, and others that you don't
trust at all. For the ones you sort-of trust there may be some things
you trust about them and other things you don't trust about them.

* The firmware isn't necessarily the best arbitrar of trust. For
instance, if the company that employs me (Google) compiled their own
firmware for a given peripheral device and they were convinced that
the peripheral firmware couldn't be compromised any more easily than
code running in the kernel itself, they might assert that the
peripheral device should be "trusted". An individual Linux hacker,
however, might not really trust the firmware blob that Google provides
and might want the device to be "untrusted".

* In the PCI subsystem I believe that "trusted" vs "untrusted" is
generally associated with whether a device is soldered down onto the
board or in some type of slot (the "external" concept). That's been
working OK for them, I think, but I'm not convinced it'd be easy to
apply everywhere. One example problem: what do you do about SD cards?
The thing doing the DMA (the SDHCI controller) is certainly "internal"
but the cards are "external". I'm making the argument in my series
that SDHCI should be considered at least trusted enough to use
non-strict DMA, but it's still technically "external" and you wouldn't
necessarily, for instance, trust the filesystem structure not to be
crafted in a malicious way so as to exploit the kernel.

> What you are trying to capture (if the device
> safe enough) really isn't a function of the DMA device's driver, but a
> function of the DMA device.

It's a function of the DMA device, but the entity in the kernel with
the most knowledge about this is the device's driver. The driver also
has the best ability to make informed decisions, perhaps looking at
the device's properties (like the "non-removable" one for SD/MMC) to
help make decisions without us having to create a new property to
describe trust and then argue about who sets it and when.

-Doug
Douglas Anderson June 22, 2021, 4:53 p.m. UTC | #5
Hi,

On Mon, Jun 21, 2021 at 7:05 PM Lu Baolu <baolu.lu@linux.intel.com> wrote:
>
> On 6/22/21 7:52 AM, Douglas Anderson wrote:
> > @@ -1519,7 +1542,8 @@ static int iommu_get_def_domain_type(struct device *dev)
> >
> >   static int iommu_group_alloc_default_domain(struct bus_type *bus,
> >                                           struct iommu_group *group,
> > -                                         unsigned int type)
> > +                                         unsigned int type,
> > +                                         struct device *dev)
> >   {
> >       struct iommu_domain *dom;
> >
> > @@ -1534,6 +1558,12 @@ static int iommu_group_alloc_default_domain(struct bus_type *bus,
> >       if (!dom)
> >               return -ENOMEM;
> >
> > +     /* Save the strictness requests from the device */
> > +     if (dev && type == IOMMU_DOMAIN_DMA) {
> > +             dom->request_non_strict = dev->request_non_strict_iommu;
> > +             dom->force_strict = dev->force_strict_iommu;
> > +     }
> > +
>
> An iommu default domain might be used by multiple devices which might
> have different "strict" attributions. Then who could override who?

My gut instinct would be that if multiple devices were part of a given
domain that it would be combined like this:

1. Any device that requests strict makes the domain strict force strict.

2. To request non-strict all of the devices in the domain would have
to request non-strict.

To do that I'd have to change my patchset obviously, but I don't think
it should be hard. We can just keep a count of devices and a count of
the strict vs. non-strict requests? If there are no other blockers
I'll try to do that in my v2.

-Doug
Douglas Anderson June 22, 2021, 5:01 p.m. UTC | #6
Hi,

On Tue, Jun 22, 2021 at 9:53 AM Doug Anderson <dianders@chromium.org> wrote:
>
> Hi,
>
> On Mon, Jun 21, 2021 at 7:05 PM Lu Baolu <baolu.lu@linux.intel.com> wrote:
> >
> > On 6/22/21 7:52 AM, Douglas Anderson wrote:
> > > @@ -1519,7 +1542,8 @@ static int iommu_get_def_domain_type(struct device *dev)
> > >
> > >   static int iommu_group_alloc_default_domain(struct bus_type *bus,
> > >                                           struct iommu_group *group,
> > > -                                         unsigned int type)
> > > +                                         unsigned int type,
> > > +                                         struct device *dev)
> > >   {
> > >       struct iommu_domain *dom;
> > >
> > > @@ -1534,6 +1558,12 @@ static int iommu_group_alloc_default_domain(struct bus_type *bus,
> > >       if (!dom)
> > >               return -ENOMEM;
> > >
> > > +     /* Save the strictness requests from the device */
> > > +     if (dev && type == IOMMU_DOMAIN_DMA) {
> > > +             dom->request_non_strict = dev->request_non_strict_iommu;
> > > +             dom->force_strict = dev->force_strict_iommu;
> > > +     }
> > > +
> >
> > An iommu default domain might be used by multiple devices which might
> > have different "strict" attributions. Then who could override who?
>
> My gut instinct would be that if multiple devices were part of a given
> domain that it would be combined like this:
>
> 1. Any device that requests strict makes the domain strict force strict.
>
> 2. To request non-strict all of the devices in the domain would have
> to request non-strict.
>
> To do that I'd have to change my patchset obviously, but I don't think
> it should be hard. We can just keep a count of devices and a count of
> the strict vs. non-strict requests? If there are no other blockers
> I'll try to do that in my v2.

One issue, I guess, is that we might need to transition a non-strict
domain to become strict. Is that possible? We'd end up with an extra
"flush queue" that we didn't need to allocate, but are there other
problems?

Actually, in general would it be possible to transition between strict
and non-strict at runtime as long as we called
init_iova_flush_queue()? Maybe that's a better solution than all
this--we just boot in strict mode and can just transition to
non-strict mode after-the-fact?


-Doug
Rajat Jain June 22, 2021, 6:45 p.m. UTC | #7
On Mon, Jun 21, 2021 at 4:53 PM Douglas Anderson <dianders@chromium.org> wrote:
>
> In the patch ("drivers: base: Add bits to struct device to control
> iommu strictness") we add the ability for devices to tell us about
> their IOMMU strictness requirements. Let's now take that into account
> in the IOMMU layer.
>
> A few notes here:
> * Presumably this is always how iommu_get_dma_strict() was intended to
>   behave. Had this not been the intention then it never would have
>   taken a domain as a parameter.
> * The iommu_set_dma_strict() feels awfully non-symmetric now. That
>   function sets the _default_ strictness globally in the system
>   whereas iommu_get_dma_strict() returns the value for a given domain
>   (falling back to the default). Presumably, at least, the fact that
>   iommu_set_dma_strict() doesn't take a domain makes this obvious.
>
> The function iommu_get_dma_strict() should now make it super obvious
> where strictness comes from and who overides who. Though the function
> changed a bunch to make the logic clearer, the only two new rules
> should be:
> * Devices can force strictness for themselves, overriding the cmdline
>   "iommu.strict=0" or a call to iommu_set_dma_strict(false)).
> * Devices can request non-strictness for themselves, assuming there
>   was no cmdline "iommu.strict=1" or a call to
>   iommu_set_dma_strict(true).

Along the same lines, I believe a platform (device tree / ACPI) should
also be able to have a say in this. I assume in your proposal, a
platform would expose a property in device tree which the device
driver would need to parse and then use it to set these bits in the
"struct device"?

Thanks,

Rajat



>
> Signed-off-by: Douglas Anderson <dianders@chromium.org>
> ---
>
>  drivers/iommu/iommu.c | 56 +++++++++++++++++++++++++++++++++----------
>  include/linux/iommu.h |  2 ++
>  2 files changed, 45 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 808ab70d5df5..0c84a4c06110 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -28,8 +28,19 @@
>  static struct kset *iommu_group_kset;
>  static DEFINE_IDA(iommu_group_ida);
>
> +enum iommu_strictness {
> +       IOMMU_DEFAULT_STRICTNESS = -1,
> +       IOMMU_NOT_STRICT = 0,
> +       IOMMU_STRICT = 1,
> +};
> +static inline enum iommu_strictness bool_to_strictness(bool strictness)
> +{
> +       return (enum iommu_strictness)strictness;
> +}
> +
>  static unsigned int iommu_def_domain_type __read_mostly;
> -static bool iommu_dma_strict __read_mostly = true;
> +static enum iommu_strictness cmdline_dma_strict __read_mostly = IOMMU_DEFAULT_STRICTNESS;
> +static enum iommu_strictness driver_dma_strict __read_mostly = IOMMU_DEFAULT_STRICTNESS;
>  static u32 iommu_cmd_line __read_mostly;
>
>  struct iommu_group {
> @@ -69,7 +80,6 @@ static const char * const iommu_group_resv_type_string[] = {
>  };
>
>  #define IOMMU_CMD_LINE_DMA_API         BIT(0)
> -#define IOMMU_CMD_LINE_STRICT          BIT(1)
>
>  static int iommu_alloc_default_domain(struct iommu_group *group,
>                                       struct device *dev);
> @@ -336,25 +346,38 @@ early_param("iommu.passthrough", iommu_set_def_domain_type);
>
>  static int __init iommu_dma_setup(char *str)
>  {
> -       int ret = kstrtobool(str, &iommu_dma_strict);
> +       bool strict;
> +       int ret = kstrtobool(str, &strict);
>
>         if (!ret)
> -               iommu_cmd_line |= IOMMU_CMD_LINE_STRICT;
> +               cmdline_dma_strict = bool_to_strictness(strict);
>         return ret;
>  }
>  early_param("iommu.strict", iommu_dma_setup);
>
>  void iommu_set_dma_strict(bool strict)
>  {
> -       if (strict || !(iommu_cmd_line & IOMMU_CMD_LINE_STRICT))
> -               iommu_dma_strict = strict;
> +       /* A driver can request strictness but not the other way around */
> +       if (driver_dma_strict != IOMMU_STRICT)
> +               driver_dma_strict = bool_to_strictness(strict);
>  }
>
>  bool iommu_get_dma_strict(struct iommu_domain *domain)
>  {
> -       /* only allow lazy flushing for DMA domains */
> -       if (domain->type == IOMMU_DOMAIN_DMA)
> -               return iommu_dma_strict;
> +       /* Non-DMA domains or anyone forcing it to strict makes it strict */
> +       if (domain->type != IOMMU_DOMAIN_DMA ||
> +           cmdline_dma_strict == IOMMU_STRICT ||
> +           driver_dma_strict == IOMMU_STRICT ||
> +           domain->force_strict)
> +               return true;
> +
> +       /* Anyone requesting non-strict (if no forces) makes it non-strict */
> +       if (cmdline_dma_strict == IOMMU_NOT_STRICT ||
> +           driver_dma_strict == IOMMU_NOT_STRICT ||
> +           domain->request_non_strict)
> +               return false;
> +
> +       /* Nobody said anything, so it's strict by default */
>         return true;
>  }
>  EXPORT_SYMBOL_GPL(iommu_get_dma_strict);
> @@ -1519,7 +1542,8 @@ static int iommu_get_def_domain_type(struct device *dev)
>
>  static int iommu_group_alloc_default_domain(struct bus_type *bus,
>                                             struct iommu_group *group,
> -                                           unsigned int type)
> +                                           unsigned int type,
> +                                           struct device *dev)
>  {
>         struct iommu_domain *dom;
>
> @@ -1534,6 +1558,12 @@ static int iommu_group_alloc_default_domain(struct bus_type *bus,
>         if (!dom)
>                 return -ENOMEM;
>
> +       /* Save the strictness requests from the device */
> +       if (dev && type == IOMMU_DOMAIN_DMA) {
> +               dom->request_non_strict = dev->request_non_strict_iommu;
> +               dom->force_strict = dev->force_strict_iommu;
> +       }
> +
>         group->default_domain = dom;
>         if (!group->domain)
>                 group->domain = dom;
> @@ -1550,7 +1580,7 @@ static int iommu_alloc_default_domain(struct iommu_group *group,
>
>         type = iommu_get_def_domain_type(dev) ? : iommu_def_domain_type;
>
> -       return iommu_group_alloc_default_domain(dev->bus, group, type);
> +       return iommu_group_alloc_default_domain(dev->bus, group, type, dev);
>  }
>
>  /**
> @@ -1721,7 +1751,7 @@ static void probe_alloc_default_domain(struct bus_type *bus,
>         if (!gtype.type)
>                 gtype.type = iommu_def_domain_type;
>
> -       iommu_group_alloc_default_domain(bus, group, gtype.type);
> +       iommu_group_alloc_default_domain(bus, group, gtype.type, NULL);
>
>  }
>
> @@ -3130,7 +3160,7 @@ static int iommu_change_dev_def_domain(struct iommu_group *group,
>         }
>
>         /* Sets group->default_domain to the newly allocated domain */
> -       ret = iommu_group_alloc_default_domain(dev->bus, group, type);
> +       ret = iommu_group_alloc_default_domain(dev->bus, group, type, dev);
>         if (ret)
>                 goto out;
>
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 32d448050bf7..0bddef77f415 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -81,6 +81,8 @@ struct iommu_domain_geometry {
>
>  struct iommu_domain {
>         unsigned type;
> +       bool force_strict:1;
> +       bool request_non_strict:1;
>         const struct iommu_ops *ops;
>         unsigned long pgsize_bitmap;    /* Bitmap of page sizes in use */
>         iommu_fault_handler_t handler;
> --
> 2.32.0.288.g62a8d224e6-goog
>
Douglas Anderson June 22, 2021, 7:35 p.m. UTC | #8
Hi,

On Tue, Jun 22, 2021 at 11:45 AM Rajat Jain <rajatja@google.com> wrote:
>
> On Mon, Jun 21, 2021 at 4:53 PM Douglas Anderson <dianders@chromium.org> wrote:
> >
> > In the patch ("drivers: base: Add bits to struct device to control
> > iommu strictness") we add the ability for devices to tell us about
> > their IOMMU strictness requirements. Let's now take that into account
> > in the IOMMU layer.
> >
> > A few notes here:
> > * Presumably this is always how iommu_get_dma_strict() was intended to
> >   behave. Had this not been the intention then it never would have
> >   taken a domain as a parameter.
> > * The iommu_set_dma_strict() feels awfully non-symmetric now. That
> >   function sets the _default_ strictness globally in the system
> >   whereas iommu_get_dma_strict() returns the value for a given domain
> >   (falling back to the default). Presumably, at least, the fact that
> >   iommu_set_dma_strict() doesn't take a domain makes this obvious.
> >
> > The function iommu_get_dma_strict() should now make it super obvious
> > where strictness comes from and who overides who. Though the function
> > changed a bunch to make the logic clearer, the only two new rules
> > should be:
> > * Devices can force strictness for themselves, overriding the cmdline
> >   "iommu.strict=0" or a call to iommu_set_dma_strict(false)).
> > * Devices can request non-strictness for themselves, assuming there
> >   was no cmdline "iommu.strict=1" or a call to
> >   iommu_set_dma_strict(true).
>
> Along the same lines, I believe a platform (device tree / ACPI) should
> also be able to have a say in this. I assume in your proposal, a
> platform would expose a property in device tree which the device
> driver would need to parse and then use it to set these bits in the
> "struct device"?

Nothing would prevent creating a device tree or ACPI property that
caused either "force-strict" or "request-non-strict" from being set if
everyone agrees that it's a good idea. I wouldn't reject the idea
myself, but I do worry that we'd devolve into the usual bikeshed for
exactly how this should look. I talked about this a bit in my response
to Saravana, but basically:

* If there was some generic property, would we call it "untrusted",
"external", or something else?

* How do you describe "trust" in a generic "objective" way? It's not
really boolean and trying to describe exactly how trustworthy
something should be considered is hard.

* At least for the device tree there's a general requirement that it
describes the hardware and not so much how the software should
configure the hardware. As I understand it there is _some_ leeway here
where it's OK to describe how the hardware was designed for the OS to
configure it, but it's a pretty high bar and a hard sell. In general
the device tree isn't supposed to be used to describe "policy". In
other words: if one OS might decide on one setting and another OS on
another then it doesn't really belong in the device tree.

* In general the kernel is also not really supposed to have policy
hardcoded in either, though it feels like we can get away with having
a good default/sane policy and allowing overriding the policy with
command line parameters (like iommu.strict). In the case where
something has to be configured at bootup there's not many ways to do
better.


tl;dr: I have no plans to try to make an overarching property, but my
patch series does allow subsystems to come up with and easily
implement their own rules as it makes sense. While this might seem
hodgepodge I prefer to see it as "flexible" since I'm not convinced
that we're going to be able to come up with an overarching trust
framework.

-Doug
Saravana Kannan June 22, 2021, 7:50 p.m. UTC | #9
On Tue, Jun 22, 2021 at 9:46 AM Doug Anderson <dianders@chromium.org> wrote:
>
> Hi,
>
> On Mon, Jun 21, 2021 at 7:56 PM Saravana Kannan <saravanak@google.com> wrote:
> >
> > On Mon, Jun 21, 2021 at 4:53 PM Douglas Anderson <dianders@chromium.org> wrote:
> > >
> > > In the patch ("drivers: base: Add bits to struct device to control
> > > iommu strictness") we add the ability for devices to tell us about
> > > their IOMMU strictness requirements. Let's now take that into account
> > > in the IOMMU layer.
> > >
> > > A few notes here:
> > > * Presumably this is always how iommu_get_dma_strict() was intended to
> > >   behave. Had this not been the intention then it never would have
> > >   taken a domain as a parameter.
> > > * The iommu_set_dma_strict() feels awfully non-symmetric now. That
> > >   function sets the _default_ strictness globally in the system
> > >   whereas iommu_get_dma_strict() returns the value for a given domain
> > >   (falling back to the default). Presumably, at least, the fact that
> > >   iommu_set_dma_strict() doesn't take a domain makes this obvious.
> > >
> > > The function iommu_get_dma_strict() should now make it super obvious
> > > where strictness comes from and who overides who. Though the function
> > > changed a bunch to make the logic clearer, the only two new rules
> > > should be:
> > > * Devices can force strictness for themselves, overriding the cmdline
> > >   "iommu.strict=0" or a call to iommu_set_dma_strict(false)).
> > > * Devices can request non-strictness for themselves, assuming there
> > >   was no cmdline "iommu.strict=1" or a call to
> > >   iommu_set_dma_strict(true).
> > >
> > > Signed-off-by: Douglas Anderson <dianders@chromium.org>
> > > ---
> > >
> > >  drivers/iommu/iommu.c | 56 +++++++++++++++++++++++++++++++++----------
> > >  include/linux/iommu.h |  2 ++
> > >  2 files changed, 45 insertions(+), 13 deletions(-)
> > >
> > > diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> > > index 808ab70d5df5..0c84a4c06110 100644
> > > --- a/drivers/iommu/iommu.c
> > > +++ b/drivers/iommu/iommu.c
> > > @@ -28,8 +28,19 @@
> > >  static struct kset *iommu_group_kset;
> > >  static DEFINE_IDA(iommu_group_ida);
> > >
> > > +enum iommu_strictness {
> > > +       IOMMU_DEFAULT_STRICTNESS = -1,
> > > +       IOMMU_NOT_STRICT = 0,
> > > +       IOMMU_STRICT = 1,
> > > +};
> > > +static inline enum iommu_strictness bool_to_strictness(bool strictness)
> > > +{
> > > +       return (enum iommu_strictness)strictness;
> > > +}
> > > +
> > >  static unsigned int iommu_def_domain_type __read_mostly;
> > > -static bool iommu_dma_strict __read_mostly = true;
> > > +static enum iommu_strictness cmdline_dma_strict __read_mostly = IOMMU_DEFAULT_STRICTNESS;
> > > +static enum iommu_strictness driver_dma_strict __read_mostly = IOMMU_DEFAULT_STRICTNESS;
> > >  static u32 iommu_cmd_line __read_mostly;
> > >
> > >  struct iommu_group {
> > > @@ -69,7 +80,6 @@ static const char * const iommu_group_resv_type_string[] = {
> > >  };
> > >
> > >  #define IOMMU_CMD_LINE_DMA_API         BIT(0)
> > > -#define IOMMU_CMD_LINE_STRICT          BIT(1)
> > >
> > >  static int iommu_alloc_default_domain(struct iommu_group *group,
> > >                                       struct device *dev);
> > > @@ -336,25 +346,38 @@ early_param("iommu.passthrough", iommu_set_def_domain_type);
> > >
> > >  static int __init iommu_dma_setup(char *str)
> > >  {
> > > -       int ret = kstrtobool(str, &iommu_dma_strict);
> > > +       bool strict;
> > > +       int ret = kstrtobool(str, &strict);
> > >
> > >         if (!ret)
> > > -               iommu_cmd_line |= IOMMU_CMD_LINE_STRICT;
> > > +               cmdline_dma_strict = bool_to_strictness(strict);
> > >         return ret;
> > >  }
> > >  early_param("iommu.strict", iommu_dma_setup);
> > >
> > >  void iommu_set_dma_strict(bool strict)
> > >  {
> > > -       if (strict || !(iommu_cmd_line & IOMMU_CMD_LINE_STRICT))
> > > -               iommu_dma_strict = strict;
> > > +       /* A driver can request strictness but not the other way around */
> > > +       if (driver_dma_strict != IOMMU_STRICT)
> > > +               driver_dma_strict = bool_to_strictness(strict);
> > >  }
> > >
> > >  bool iommu_get_dma_strict(struct iommu_domain *domain)
> > >  {
> > > -       /* only allow lazy flushing for DMA domains */
> > > -       if (domain->type == IOMMU_DOMAIN_DMA)
> > > -               return iommu_dma_strict;
> > > +       /* Non-DMA domains or anyone forcing it to strict makes it strict */
> > > +       if (domain->type != IOMMU_DOMAIN_DMA ||
> > > +           cmdline_dma_strict == IOMMU_STRICT ||
> > > +           driver_dma_strict == IOMMU_STRICT ||
> > > +           domain->force_strict)
> > > +               return true;
> > > +
> > > +       /* Anyone requesting non-strict (if no forces) makes it non-strict */
> > > +       if (cmdline_dma_strict == IOMMU_NOT_STRICT ||
> > > +           driver_dma_strict == IOMMU_NOT_STRICT ||
> > > +           domain->request_non_strict)
> > > +               return false;
> > > +
> > > +       /* Nobody said anything, so it's strict by default */
> >
> > If iommu.strict is not set in the command line, upstream treats it as
> > iommu.strict=1. Meaning, no drivers can override it.
> >
> > If I understand it correctly, with your series, if iommu.strict=1 is
> > not set, drivers can override the "default strict mode" and ask for
> > non-strict mode for their domain. So if this series gets in and future
> > driver changes start asking for non-strict mode, systems that are
> > expected to operate in fully strict mode will now have devices
> > operating in non-strict mode.
> >
> > That's breaking backward compatibility for the kernel command line
> > param. It looks like what you really need is to change iommu.strict
> > from 0/1 to lazy (previously 0), strict preferred, strict enforced
> > (previously 1) and you need to default it to "enforced".
>
> I'm not quite sure I'd agree, but certainly it could be up for debate.
> I think I'm keeping full compatibility with the kernel command line
> parameter, specifically:
>
> * iommu.strict=0: default to non-strict mode unless a driver overrides
>
> * iommu.strict=1: force everything to strict no matter what
>
> ...both of those two things are the same before and after my patchset.
>
> You're arguing that I'm changing the behavior of the system when no
> command line parameter is present. To me this seems a little bit of a
> stretch. If no command line parameter is present I'd assert that the
> kernel should do some sort of sane behavior and that we don't have to
> force 100% strictness if the command line parameter isn't present at
> all.
>
> I would also note that your assertion that the system is 100% strict
> under the "no command line parameter" case isn't actually true as far
> as I can tell. The code in mainline is a little hard to follow (for me
> the code after my patch is easier to follow), but you can see that
> even before my patch a call to iommu_set_dma_strict() could be used to
> make the system non-strict if no command-line parameter was passed.

Well, the kernel doc says if you don't list iommu.strict, it defaults
to 1. So no one would have a reason to set all the command line
options to their default values. You can't fit that all into the
commandline anyway :) That's why I think this breaks backward
compatibility. But I'll let the IOMMU folks Ack/Nak based on this. I
just wanted to highlight the difference in behavior.

>
>
> > Alternately (and potentially a better option?), you really should be
> > changing/extending dev_is_untrusted() so that it applies for any
> > struct device (not just PCI device) and then have this overridden in
> > DT (or ACPI or any firmware) to indicate a specific device is safe to
> > use non-strict mode on.
>
> I was really trying _not_ to do that. I believe this has been talked
> about several times, including at last year's Linux Plumbers
> conference. As far as I can tell it always ends in a shouting match w/
> no forward progress.

I feel you :) DT changes can be painful. But in this case, it seems
like the right path. We'll see what other think. Looks like Rajat has
a similar view.

> There are a bunch of problems here, namely:
>
> * Trust isn't necessarily binary. There might be peripherals that you
> sort-of trust, others that you really trust, and others that you don't
> trust at all. For the ones you sort-of trust there may be some things
> you trust about them and other things you don't trust about them.

Right, then let's not create a boolean property. It also doesn't have
to be a "trust" property. It could simply be an "allow-lazy-tlb-flush"
property. Let's figure that out.

> * The firmware isn't necessarily the best arbitrar of trust. For
> instance, if the company that employs me (Google) compiled their own
> firmware for a given peripheral device and they were convinced that
> the peripheral firmware couldn't be compromised any more easily than
> code running in the kernel itself, they might assert that the
> peripheral device should be "trusted".

The peripheral manufacturer IS the right entity to decide if the
hardware is trustworthy or not. Saying you don't trust the DT from a
peripheral manufacturer is a whole other problem that isn't going to
be solved by one series.

> An individual Linux hacker,
> however, might not really trust the firmware blob that Google provides
> and might want the device to be "untrusted".

If the Linux hacker wants to overwrite the kernel in their device, I
expect they'd also be able to change the DT. And they always have
iommu.strict=1 if they don't trust the firmware.

> * In the PCI subsystem I believe that "trusted" vs "untrusted" is
> generally associated with whether a device is soldered down onto the
> board or in some type of slot (the "external" concept). That's been
> working OK for them, I think, but I'm not convinced it'd be easy to
> apply everywhere. One example problem: what do you do about SD cards?
> The thing doing the DMA (the SDHCI controller) is certainly "internal"
> but the cards are "external". I'm making the argument in my series
> that SDHCI should be considered at least trusted enough to use
> non-strict DMA, but it's still technically "external" and you wouldn't
> necessarily, for instance, trust the filesystem structure not to be
> crafted in a malicious way so as to exploit the kernel.

Right, I wasn't clear. I didn't say we should add a "trusted vs
untrusted" property to firmware. We can make it more granular and let
the DT/bus device. In this case, the DT could set it up and PCI can
override it if it wants to.

> > What you are trying to capture (if the device
> > safe enough) really isn't a function of the DMA device's driver, but a
> > function of the DMA device.
>
> It's a function of the DMA device, but the entity in the kernel with
> the most knowledge about this is the device's driver. The driver also
> has the best ability to make informed decisions, perhaps looking at
> the device's properties (like the "non-removable" one for SD/MMC) to
> help make decisions without us having to create a new property to
> describe trust and then argue about who sets it and when.

I'd definitely disagree on this point. The driver doesn't know
anything about the hardware unless it comes from DT. It might not be
an explicit property and you might infer stuff from the compatible
string, but that's still coming from DT. In your example, the QC
SD/MMC driver always sets the flag because it assumes the specific
device (compatible string it's matching) is always secure. So the
driver is just passing along information from DT. So I'm saying let's
make it a generic property that says if a device is secure enough to
allow lazy-tlb-flush.

Again, I'm not going to Nak/Ack the series, but this is my 2 cents.

-Saravana
diff mbox series

Patch

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 808ab70d5df5..0c84a4c06110 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -28,8 +28,19 @@ 
 static struct kset *iommu_group_kset;
 static DEFINE_IDA(iommu_group_ida);
 
+enum iommu_strictness {
+	IOMMU_DEFAULT_STRICTNESS = -1,
+	IOMMU_NOT_STRICT = 0,
+	IOMMU_STRICT = 1,
+};
+static inline enum iommu_strictness bool_to_strictness(bool strictness)
+{
+	return (enum iommu_strictness)strictness;
+}
+
 static unsigned int iommu_def_domain_type __read_mostly;
-static bool iommu_dma_strict __read_mostly = true;
+static enum iommu_strictness cmdline_dma_strict __read_mostly = IOMMU_DEFAULT_STRICTNESS;
+static enum iommu_strictness driver_dma_strict __read_mostly = IOMMU_DEFAULT_STRICTNESS;
 static u32 iommu_cmd_line __read_mostly;
 
 struct iommu_group {
@@ -69,7 +80,6 @@  static const char * const iommu_group_resv_type_string[] = {
 };
 
 #define IOMMU_CMD_LINE_DMA_API		BIT(0)
-#define IOMMU_CMD_LINE_STRICT		BIT(1)
 
 static int iommu_alloc_default_domain(struct iommu_group *group,
 				      struct device *dev);
@@ -336,25 +346,38 @@  early_param("iommu.passthrough", iommu_set_def_domain_type);
 
 static int __init iommu_dma_setup(char *str)
 {
-	int ret = kstrtobool(str, &iommu_dma_strict);
+	bool strict;
+	int ret = kstrtobool(str, &strict);
 
 	if (!ret)
-		iommu_cmd_line |= IOMMU_CMD_LINE_STRICT;
+		cmdline_dma_strict = bool_to_strictness(strict);
 	return ret;
 }
 early_param("iommu.strict", iommu_dma_setup);
 
 void iommu_set_dma_strict(bool strict)
 {
-	if (strict || !(iommu_cmd_line & IOMMU_CMD_LINE_STRICT))
-		iommu_dma_strict = strict;
+	/* A driver can request strictness but not the other way around */
+	if (driver_dma_strict != IOMMU_STRICT)
+		driver_dma_strict = bool_to_strictness(strict);
 }
 
 bool iommu_get_dma_strict(struct iommu_domain *domain)
 {
-	/* only allow lazy flushing for DMA domains */
-	if (domain->type == IOMMU_DOMAIN_DMA)
-		return iommu_dma_strict;
+	/* Non-DMA domains or anyone forcing it to strict makes it strict */
+	if (domain->type != IOMMU_DOMAIN_DMA ||
+	    cmdline_dma_strict == IOMMU_STRICT ||
+	    driver_dma_strict == IOMMU_STRICT ||
+	    domain->force_strict)
+		return true;
+
+	/* Anyone requesting non-strict (if no forces) makes it non-strict */
+	if (cmdline_dma_strict == IOMMU_NOT_STRICT ||
+	    driver_dma_strict == IOMMU_NOT_STRICT ||
+	    domain->request_non_strict)
+		return false;
+
+	/* Nobody said anything, so it's strict by default */
 	return true;
 }
 EXPORT_SYMBOL_GPL(iommu_get_dma_strict);
@@ -1519,7 +1542,8 @@  static int iommu_get_def_domain_type(struct device *dev)
 
 static int iommu_group_alloc_default_domain(struct bus_type *bus,
 					    struct iommu_group *group,
-					    unsigned int type)
+					    unsigned int type,
+					    struct device *dev)
 {
 	struct iommu_domain *dom;
 
@@ -1534,6 +1558,12 @@  static int iommu_group_alloc_default_domain(struct bus_type *bus,
 	if (!dom)
 		return -ENOMEM;
 
+	/* Save the strictness requests from the device */
+	if (dev && type == IOMMU_DOMAIN_DMA) {
+		dom->request_non_strict = dev->request_non_strict_iommu;
+		dom->force_strict = dev->force_strict_iommu;
+	}
+
 	group->default_domain = dom;
 	if (!group->domain)
 		group->domain = dom;
@@ -1550,7 +1580,7 @@  static int iommu_alloc_default_domain(struct iommu_group *group,
 
 	type = iommu_get_def_domain_type(dev) ? : iommu_def_domain_type;
 
-	return iommu_group_alloc_default_domain(dev->bus, group, type);
+	return iommu_group_alloc_default_domain(dev->bus, group, type, dev);
 }
 
 /**
@@ -1721,7 +1751,7 @@  static void probe_alloc_default_domain(struct bus_type *bus,
 	if (!gtype.type)
 		gtype.type = iommu_def_domain_type;
 
-	iommu_group_alloc_default_domain(bus, group, gtype.type);
+	iommu_group_alloc_default_domain(bus, group, gtype.type, NULL);
 
 }
 
@@ -3130,7 +3160,7 @@  static int iommu_change_dev_def_domain(struct iommu_group *group,
 	}
 
 	/* Sets group->default_domain to the newly allocated domain */
-	ret = iommu_group_alloc_default_domain(dev->bus, group, type);
+	ret = iommu_group_alloc_default_domain(dev->bus, group, type, dev);
 	if (ret)
 		goto out;
 
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 32d448050bf7..0bddef77f415 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -81,6 +81,8 @@  struct iommu_domain_geometry {
 
 struct iommu_domain {
 	unsigned type;
+	bool force_strict:1;
+	bool request_non_strict:1;
 	const struct iommu_ops *ops;
 	unsigned long pgsize_bitmap;	/* Bitmap of page sizes in use */
 	iommu_fault_handler_t handler;