diff mbox series

[v3,4/7] x86/iommu: introduce a rangeset to perform hwdom IOMMU setup

Message ID 20231215141832.9492-5-roger.pau@citrix.com (mailing list archive)
State Superseded
Headers show
Series x86/iommu: improve setup time of hwdom IOMMU | expand

Commit Message

Roger Pau Monné Dec. 15, 2023, 2:18 p.m. UTC
This change just introduces the boilerplate code in order to use a rangeset
when setting up the hardware domain IOMMU mappings.  The rangeset is never
populated in this patch, so it's a non-functional change as far as the mappings
the domain gets established.

Note there will be a change for HVM domains (ie: PVH dom0) when the code
introduced here gets used: the p2m mappings will be established using
map_mmio_regions() instead of p2m_add_identity_entry(), so that ranges can be
mapped with a single function call if possible.  Note that the interface of
map_mmio_regions() doesn't allow creating read-only mappings, but so far there
are no such mappings created for PVH dom0 in arch_iommu_hwdom_init().

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
Changes since v2:
 - Better deal with read-only regions.
 - Destroy rangeset earlier.
 - Switch to using map_data.flush_flags.
 - Reword commit message to clarify the change in behaviour for HVM will only
   take effect after later changes.

Changes since v1:
 - Split from bigger patch.
---
 xen/drivers/passthrough/x86/iommu.c | 102 +++++++++++++++++++++++++++-
 1 file changed, 101 insertions(+), 1 deletion(-)

Comments

Jan Beulich Dec. 19, 2023, 4:06 p.m. UTC | #1
On 15.12.2023 15:18, Roger Pau Monne wrote:
> --- a/xen/drivers/passthrough/x86/iommu.c
> +++ b/xen/drivers/passthrough/x86/iommu.c
> @@ -370,10 +370,88 @@ static unsigned int __hwdom_init hwdom_iommu_map(const struct domain *d,
>      return perms;
>  }
>  
> +struct map_data {
> +    struct domain *d;
> +    unsigned int flush_flags;
> +    bool mmio_ro;
> +};
> +
> +static int __hwdom_init cf_check identity_map(unsigned long s, unsigned long e,
> +                                              void *data)
> +{
> +    struct map_data *info = data;
> +    struct domain *d = info->d;
> +    long rc;
> +
> +    if ( iommu_verbose )
> +        printk(XENLOG_INFO " [%010lx, %010lx] R%c\n",
> +               s, e, info->mmio_ro ? 'O' : 'W');
> +
> +    if ( paging_mode_translate(d) )
> +    {
> +        if ( info->mmio_ro )
> +        {
> +            ASSERT_UNREACHABLE();
> +            return 0;

Is this meant to be in line with the main return statement's comment?
I'm inclined to ask for an actual error code (-EOPNOTSUPP?) here.

> +        }
> +        while ( (rc = map_mmio_regions(d, _gfn(s), e - s + 1, _mfn(s))) > 0 )
> +        {
> +            s += rc;
> +            process_pending_softirqs();
> +        }
> +    }
> +    else
> +    {
> +        const unsigned int perms = IOMMUF_readable | IOMMUF_preempt |
> +                                   (info->mmio_ro ? 0 : IOMMUF_writable);
> +
> +        /*
> +         * Read-only ranges are only created based on the contents of mmio
> +         * read-only rangeset, and hence need the additional iomem permissions
> +         * check.
> +         */
> +        while( info->mmio_ro && s <= e && !iomem_access_permitted(d, s, e) )

Nit: Missing blank after "while".

> +        {
> +            /*
> +             * Consume a frame per iteration until the reminder is accessible

Nit: remainder?

> +             * or there's nothing left to map.
> +             */
> +            if ( iomem_access_permitted(d, s, s) )
> +            {
> +                rc = iommu_map(d, _dfn(s), _mfn(s), 1, perms,
> +                               &info->flush_flags);
> +                if ( rc < 0 )
> +                    break;
> +                /* Must map a frame at least, which is what we request for. */
> +                ASSERT(rc == 1);
> +                process_pending_softirqs();
> +            }
> +            s++;
> +        }
> +        while ( (rc = iommu_map(d, _dfn(s), _mfn(s), e - s + 1,
> +                                perms, &info->flush_flags)) > 0 )
> +        {
> +            s += rc;
> +            process_pending_softirqs();
> +        }
> +    }
> +    ASSERT(rc <= 0);
> +    if ( rc )
> +        printk(XENLOG_WARNING
> +               "IOMMU identity mapping of [%lx, %lx] failed: %ld\n",
> +               s, e, rc);
> +
> +    /* Ignore errors and attempt to map the remaining regions. */
> +    return 0;
> +}
> +
>  void __hwdom_init arch_iommu_hwdom_init(struct domain *d)
>  {
>      unsigned long i, top, max_pfn, start, count;
>      unsigned int flush_flags = 0, start_perms = 0;
> +    struct rangeset *map;
> +    struct map_data map_data = { .d = d };
> +    int rc;
>  
>      BUG_ON(!is_hardware_domain(d));
>  
> @@ -397,6 +475,10 @@ void __hwdom_init arch_iommu_hwdom_init(struct domain *d)
>      if ( iommu_hwdom_passthrough )
>          return;
>  
> +    map = rangeset_new(NULL, NULL, 0);
> +    if ( !map )
> +        panic("IOMMU init: unable to allocate rangeset\n");
> +
>      max_pfn = (GB(4) >> PAGE_SHIFT) - 1;
>      top = max(max_pdx, pfn_to_pdx(max_pfn) + 1);
>  
> @@ -451,8 +533,26 @@ void __hwdom_init arch_iommu_hwdom_init(struct domain *d)
>              goto commit;
>      }
>  
> +    if ( iommu_verbose )
> +        printk(XENLOG_INFO "%pd: identity mappings for IOMMU:\n", d);
> +
> +    rc = rangeset_report_ranges(map, 0, ~0UL, identity_map, &map_data);
> +    if ( rc )
> +        panic("IOMMU unable to create mappings: %d\n", rc);
> +    rangeset_destroy(map);
> +
> +    if ( is_pv_domain(d) )
> +    {
> +        map_data.mmio_ro = true;
> +        rc = rangeset_report_ranges(mmio_ro_ranges, 0, ~0UL, identity_map,
> +                                    &map_data);
> +        if ( rc )
> +            panic("IOMMU unable to create read-only mappings: %d\n", rc);
> +    }

As it stands identity_map() deliberately returns no error. Yet here
you panic() in case of receiving an error, despite that being impossible?

Also if we want/need to panic() here, can we avoid having two instances
of almost the same string literal in .rodata? Along the lines of

    rc = rangeset_report_ranges(map, 0, ~0UL, identity_map, &map_data);
    rangeset_destroy(map);
    if ( !rc && is_pv_domain(d) )
    {
        map_data.mmio_ro = true;
        rc = rangeset_report_ranges(mmio_ro_ranges, 0, ~0UL, identity_map,
                                    &map_data);
    }
    if ( rc )
        panic("IOMMU unable to create %smappings: %d\n",
              map_data.mmio_ro ? "read-only " : "", rc);

?

> +    map_data.flush_flags |= flush_flags;

So you decided to still keep the standalone "flush_flags" around. Is
there a particular reason?

Jan
Roger Pau Monné Dec. 20, 2023, 9:52 a.m. UTC | #2
On Tue, Dec 19, 2023 at 05:06:32PM +0100, Jan Beulich wrote:
> On 15.12.2023 15:18, Roger Pau Monne wrote:
> > --- a/xen/drivers/passthrough/x86/iommu.c
> > +++ b/xen/drivers/passthrough/x86/iommu.c
> > @@ -370,10 +370,88 @@ static unsigned int __hwdom_init hwdom_iommu_map(const struct domain *d,
> >      return perms;
> >  }
> >  
> > +struct map_data {
> > +    struct domain *d;
> > +    unsigned int flush_flags;
> > +    bool mmio_ro;
> > +};
> > +
> > +static int __hwdom_init cf_check identity_map(unsigned long s, unsigned long e,
> > +                                              void *data)
> > +{
> > +    struct map_data *info = data;
> > +    struct domain *d = info->d;
> > +    long rc;
> > +
> > +    if ( iommu_verbose )
> > +        printk(XENLOG_INFO " [%010lx, %010lx] R%c\n",
> > +               s, e, info->mmio_ro ? 'O' : 'W');
> > +
> > +    if ( paging_mode_translate(d) )
> > +    {
> > +        if ( info->mmio_ro )
> > +        {
> > +            ASSERT_UNREACHABLE();
> > +            return 0;
> 
> Is this meant to be in line with the main return statement's comment?
> I'm inclined to ask for an actual error code (-EOPNOTSUPP?) here.

Hm, yes, for that one it might make sense to return -EOPNOTSUPP, as
all attempts to map will end up failing anyway.

> > +        }
> > +        while ( (rc = map_mmio_regions(d, _gfn(s), e - s + 1, _mfn(s))) > 0 )
> > +        {
> > +            s += rc;
> > +            process_pending_softirqs();
> > +        }
> > +    }
> > +    else
> > +    {
> > +        const unsigned int perms = IOMMUF_readable | IOMMUF_preempt |
> > +                                   (info->mmio_ro ? 0 : IOMMUF_writable);
> > +
> > +        /*
> > +         * Read-only ranges are only created based on the contents of mmio
> > +         * read-only rangeset, and hence need the additional iomem permissions
> > +         * check.
> > +         */
> > +        while( info->mmio_ro && s <= e && !iomem_access_permitted(d, s, e) )
> 
> Nit: Missing blank after "while".
> 
> > +        {
> > +            /*
> > +             * Consume a frame per iteration until the reminder is accessible
> 
> Nit: remainder?
> 
> > +             * or there's nothing left to map.
> > +             */
> > +            if ( iomem_access_permitted(d, s, s) )
> > +            {
> > +                rc = iommu_map(d, _dfn(s), _mfn(s), 1, perms,
> > +                               &info->flush_flags);
> > +                if ( rc < 0 )
> > +                    break;
> > +                /* Must map a frame at least, which is what we request for. */
> > +                ASSERT(rc == 1);
> > +                process_pending_softirqs();
> > +            }
> > +            s++;
> > +        }
> > +        while ( (rc = iommu_map(d, _dfn(s), _mfn(s), e - s + 1,
> > +                                perms, &info->flush_flags)) > 0 )
> > +        {
> > +            s += rc;
> > +            process_pending_softirqs();
> > +        }
> > +    }
> > +    ASSERT(rc <= 0);
> > +    if ( rc )
> > +        printk(XENLOG_WARNING
> > +               "IOMMU identity mapping of [%lx, %lx] failed: %ld\n",
> > +               s, e, rc);
> > +
> > +    /* Ignore errors and attempt to map the remaining regions. */
> > +    return 0;
> > +}
> > +
> >  void __hwdom_init arch_iommu_hwdom_init(struct domain *d)
> >  {
> >      unsigned long i, top, max_pfn, start, count;
> >      unsigned int flush_flags = 0, start_perms = 0;
> > +    struct rangeset *map;
> > +    struct map_data map_data = { .d = d };
> > +    int rc;
> >  
> >      BUG_ON(!is_hardware_domain(d));
> >  
> > @@ -397,6 +475,10 @@ void __hwdom_init arch_iommu_hwdom_init(struct domain *d)
> >      if ( iommu_hwdom_passthrough )
> >          return;
> >  
> > +    map = rangeset_new(NULL, NULL, 0);
> > +    if ( !map )
> > +        panic("IOMMU init: unable to allocate rangeset\n");
> > +
> >      max_pfn = (GB(4) >> PAGE_SHIFT) - 1;
> >      top = max(max_pdx, pfn_to_pdx(max_pfn) + 1);
> >  
> > @@ -451,8 +533,26 @@ void __hwdom_init arch_iommu_hwdom_init(struct domain *d)
> >              goto commit;
> >      }
> >  
> > +    if ( iommu_verbose )
> > +        printk(XENLOG_INFO "%pd: identity mappings for IOMMU:\n", d);
> > +
> > +    rc = rangeset_report_ranges(map, 0, ~0UL, identity_map, &map_data);
> > +    if ( rc )
> > +        panic("IOMMU unable to create mappings: %d\n", rc);
> > +    rangeset_destroy(map);
> > +
> > +    if ( is_pv_domain(d) )
> > +    {
> > +        map_data.mmio_ro = true;
> > +        rc = rangeset_report_ranges(mmio_ro_ranges, 0, ~0UL, identity_map,
> > +                                    &map_data);
> > +        if ( rc )
> > +            panic("IOMMU unable to create read-only mappings: %d\n", rc);
> > +    }
> 
> As it stands identity_map() deliberately returns no error. Yet here
> you panic() in case of receiving an error, despite that being impossible?

I wasn't sure whether rangeset_report_ranges() itself could return an
error.  Thinking twice, we might want to print a message and attempt
to continue, as in the worst case dom0 might be missing maps.

> Also if we want/need to panic() here, can we avoid having two instances
> of almost the same string literal in .rodata? Along the lines of
> 
>     rc = rangeset_report_ranges(map, 0, ~0UL, identity_map, &map_data);
>     rangeset_destroy(map);
>     if ( !rc && is_pv_domain(d) )
>     {
>         map_data.mmio_ro = true;
>         rc = rangeset_report_ranges(mmio_ro_ranges, 0, ~0UL, identity_map,
>                                     &map_data);
>     }
>     if ( rc )
>         panic("IOMMU unable to create %smappings: %d\n",
>               map_data.mmio_ro ? "read-only " : "", rc);
> 
> ?
> 
> > +    map_data.flush_flags |= flush_flags;
> 
> So you decided to still keep the standalone "flush_flags" around. Is
> there a particular reason?

Oh, git distracted with the other changes and forgot about this one.

Thanks, Roger.
diff mbox series

Patch

diff --git a/xen/drivers/passthrough/x86/iommu.c b/xen/drivers/passthrough/x86/iommu.c
index 857dccb6a465..ca5e0bca810f 100644
--- a/xen/drivers/passthrough/x86/iommu.c
+++ b/xen/drivers/passthrough/x86/iommu.c
@@ -370,10 +370,88 @@  static unsigned int __hwdom_init hwdom_iommu_map(const struct domain *d,
     return perms;
 }
 
+struct map_data {
+    struct domain *d;
+    unsigned int flush_flags;
+    bool mmio_ro;
+};
+
+static int __hwdom_init cf_check identity_map(unsigned long s, unsigned long e,
+                                              void *data)
+{
+    struct map_data *info = data;
+    struct domain *d = info->d;
+    long rc;
+
+    if ( iommu_verbose )
+        printk(XENLOG_INFO " [%010lx, %010lx] R%c\n",
+               s, e, info->mmio_ro ? 'O' : 'W');
+
+    if ( paging_mode_translate(d) )
+    {
+        if ( info->mmio_ro )
+        {
+            ASSERT_UNREACHABLE();
+            return 0;
+        }
+        while ( (rc = map_mmio_regions(d, _gfn(s), e - s + 1, _mfn(s))) > 0 )
+        {
+            s += rc;
+            process_pending_softirqs();
+        }
+    }
+    else
+    {
+        const unsigned int perms = IOMMUF_readable | IOMMUF_preempt |
+                                   (info->mmio_ro ? 0 : IOMMUF_writable);
+
+        /*
+         * Read-only ranges are only created based on the contents of mmio
+         * read-only rangeset, and hence need the additional iomem permissions
+         * check.
+         */
+        while( info->mmio_ro && s <= e && !iomem_access_permitted(d, s, e) )
+        {
+            /*
+             * Consume a frame per iteration until the reminder is accessible
+             * or there's nothing left to map.
+             */
+            if ( iomem_access_permitted(d, s, s) )
+            {
+                rc = iommu_map(d, _dfn(s), _mfn(s), 1, perms,
+                               &info->flush_flags);
+                if ( rc < 0 )
+                    break;
+                /* Must map a frame at least, which is what we request for. */
+                ASSERT(rc == 1);
+                process_pending_softirqs();
+            }
+            s++;
+        }
+        while ( (rc = iommu_map(d, _dfn(s), _mfn(s), e - s + 1,
+                                perms, &info->flush_flags)) > 0 )
+        {
+            s += rc;
+            process_pending_softirqs();
+        }
+    }
+    ASSERT(rc <= 0);
+    if ( rc )
+        printk(XENLOG_WARNING
+               "IOMMU identity mapping of [%lx, %lx] failed: %ld\n",
+               s, e, rc);
+
+    /* Ignore errors and attempt to map the remaining regions. */
+    return 0;
+}
+
 void __hwdom_init arch_iommu_hwdom_init(struct domain *d)
 {
     unsigned long i, top, max_pfn, start, count;
     unsigned int flush_flags = 0, start_perms = 0;
+    struct rangeset *map;
+    struct map_data map_data = { .d = d };
+    int rc;
 
     BUG_ON(!is_hardware_domain(d));
 
@@ -397,6 +475,10 @@  void __hwdom_init arch_iommu_hwdom_init(struct domain *d)
     if ( iommu_hwdom_passthrough )
         return;
 
+    map = rangeset_new(NULL, NULL, 0);
+    if ( !map )
+        panic("IOMMU init: unable to allocate rangeset\n");
+
     max_pfn = (GB(4) >> PAGE_SHIFT) - 1;
     top = max(max_pdx, pfn_to_pdx(max_pfn) + 1);
 
@@ -451,8 +533,26 @@  void __hwdom_init arch_iommu_hwdom_init(struct domain *d)
             goto commit;
     }
 
+    if ( iommu_verbose )
+        printk(XENLOG_INFO "%pd: identity mappings for IOMMU:\n", d);
+
+    rc = rangeset_report_ranges(map, 0, ~0UL, identity_map, &map_data);
+    if ( rc )
+        panic("IOMMU unable to create mappings: %d\n", rc);
+    rangeset_destroy(map);
+
+    if ( is_pv_domain(d) )
+    {
+        map_data.mmio_ro = true;
+        rc = rangeset_report_ranges(mmio_ro_ranges, 0, ~0UL, identity_map,
+                                    &map_data);
+        if ( rc )
+            panic("IOMMU unable to create read-only mappings: %d\n", rc);
+    }
+
+    map_data.flush_flags |= flush_flags;
     /* Use if to avoid compiler warning */
-    if ( iommu_iotlb_flush_all(d, flush_flags) )
+    if ( iommu_iotlb_flush_all(d, map_data.flush_flags) )
         return;
 }