diff mbox series

[v2,8/9] xue: mark DMA buffers as reserved for the device

Message ID 0a30e15d9195d0cd09a5ea94297dc8a74bc12c97.1657121519.git-series.marmarek@invisiblethingslab.com (mailing list archive)
State Superseded
Headers show
Series Add Xue - console over USB 3 Debug Capability | expand

Commit Message

Marek Marczykowski-Górecki July 6, 2022, 3:32 p.m. UTC
The important part is to include those buffers in IOMMU page table
relevant for the USB controller. Otherwise, DbC will stop working as
soon as IOMMU is enabled, regardless of to which domain device assigned
(be it xen or dom0).
If the device is passed through to dom0 or other domain (see later
patches), that domain will effectively have access to those buffers too.
It does give such domain yet another way to DoS the system (as is the
case when having PCI device assigned already), but also possibly steal
the console ring content. Thus, such domain should be a trusted one.
In any case, prevent anything else being placed on those pages by adding
artificial padding.

Using this API for DbC pages requires raising MAX_USER_RMRR_PAGES.

Signed-off-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
---
 xen/drivers/char/xue.c             | 43 ++++++++++++++++++++-----------
 xen/drivers/passthrough/vtd/dmar.c |  2 +-
 2 files changed, 30 insertions(+), 15 deletions(-)

Comments

Jan Beulich July 14, 2022, 11:51 a.m. UTC | #1
On 06.07.2022 17:32, Marek Marczykowski-Górecki wrote:
> The important part is to include those buffers in IOMMU page table
> relevant for the USB controller. Otherwise, DbC will stop working as
> soon as IOMMU is enabled, regardless of to which domain device assigned
> (be it xen or dom0).
> If the device is passed through to dom0 or other domain (see later
> patches), that domain will effectively have access to those buffers too.
> It does give such domain yet another way to DoS the system (as is the
> case when having PCI device assigned already), but also possibly steal
> the console ring content. Thus, such domain should be a trusted one.
> In any case, prevent anything else being placed on those pages by adding
> artificial padding.

I don't think this device should be allowed to be assigned to any
DomU. Just like the EHCI driver, at some point in the series you
will want to call pci_hide_device() (you may already do, and I may
merely have missed it).

> Using this API for DbC pages requires raising MAX_USER_RMRR_PAGES.

I disagree here: This is merely an effect of you re-using the pre-
existing functionality there with too little customization. I think
the respective check shouldn't be applied for internal uses.

> @@ -952,13 +953,21 @@ static struct uart_driver xue_uart_driver = {
>      .flush = xue_uart_flush,
>  };
>  
> -static struct xue_trb evt_trb[XUE_TRB_RING_CAP] __aligned(XUE_PAGE_SIZE);
> -static struct xue_trb out_trb[XUE_TRB_RING_CAP] __aligned(XUE_PAGE_SIZE);
> -static struct xue_trb in_trb[XUE_TRB_RING_CAP] __aligned(XUE_PAGE_SIZE);
> -static struct xue_erst_segment erst __aligned(64);
> -static struct xue_dbc_ctx ctx __aligned(64);
> -static uint8_t wrk_buf[XUE_WORK_RING_CAP] __aligned(XUE_PAGE_SIZE);
> -static char str_buf[XUE_PAGE_SIZE] __aligned(64);
> +struct xue_dma_bufs {
> +    struct xue_trb evt_trb[XUE_TRB_RING_CAP] __aligned(XUE_PAGE_SIZE);
> +    struct xue_trb out_trb[XUE_TRB_RING_CAP] __aligned(XUE_PAGE_SIZE);
> +    struct xue_trb in_trb[XUE_TRB_RING_CAP] __aligned(XUE_PAGE_SIZE);
> +    struct xue_erst_segment erst __aligned(64);
> +    struct xue_dbc_ctx ctx __aligned(64);
> +    uint8_t wrk_buf[XUE_WORK_RING_CAP] __aligned(XUE_PAGE_SIZE);
> +    char str_buf[XUE_PAGE_SIZE] __aligned(64);

Please arrange data such that the amount of unused (padding) space
is minimal. If I'm not mistaken the page-size-aligned fields are
also an even multiple of pages in size, in which case they all want
to go ahead of the 64-byte aligned but sub-page-size fields (as
per an earlier comment str_buf[] likely can go away anyway, being
the one field which is a page in size but having smaller alignment).

> +    /*
> +     * Don't place anything else on this page - it will be
> +     * DMA-reachable by the USB controller.
> +     */
> +    char _pad[0] __aligned(XUE_PAGE_SIZE);

I don't think this is needed, due to sizeof() being required to be
a multiple of alignof().

> +};
> +static struct xue_dma_bufs xue_dma_bufs __aligned(XUE_PAGE_SIZE);

I don't think the alignment here is needed, as the struct will
already have suitable alignment (derived from the biggest field
alignment value). Instead please consider putting this in
.bss.page_aligned.

> @@ -990,16 +999,22 @@ void __init xue_uart_init(void)
>          xue->sbdf = PCI_SBDF(0, bus, slot, func);
>      }
>  
> -    xue->dbc_ctx = &ctx;
> -    xue->dbc_erst = &erst;
> -    xue->dbc_ering.trb = evt_trb;
> -    xue->dbc_oring.trb = out_trb;
> -    xue->dbc_iring.trb = in_trb;
> -    xue->dbc_owork.buf = wrk_buf;
> -    xue->dbc_str = str_buf;
> +    xue->dbc_ctx = &xue_dma_bufs.ctx;
> +    xue->dbc_erst = &xue_dma_bufs.erst;
> +    xue->dbc_ering.trb = xue_dma_bufs.evt_trb;
> +    xue->dbc_oring.trb = xue_dma_bufs.out_trb;
> +    xue->dbc_iring.trb = xue_dma_bufs.in_trb;
> +    xue->dbc_owork.buf = xue_dma_bufs.wrk_buf;
> +    xue->dbc_str = xue_dma_bufs.str_buf;
>  
>      if ( xue_open(xue) )
> +    {
> +        iommu_add_extra_reserved_device_memory(
> +                PFN_DOWN(virt_to_maddr(&xue_dma_bufs)),

virt_to_pfn()?

Jan
Marek Marczykowski-Górecki July 18, 2022, 1:15 p.m. UTC | #2
On Thu, Jul 14, 2022 at 01:51:06PM +0200, Jan Beulich wrote:
> On 06.07.2022 17:32, Marek Marczykowski-Górecki wrote:
> > The important part is to include those buffers in IOMMU page table
> > relevant for the USB controller. Otherwise, DbC will stop working as
> > soon as IOMMU is enabled, regardless of to which domain device assigned
> > (be it xen or dom0).
> > If the device is passed through to dom0 or other domain (see later
> > patches), that domain will effectively have access to those buffers too.
> > It does give such domain yet another way to DoS the system (as is the
> > case when having PCI device assigned already), but also possibly steal
> > the console ring content. Thus, such domain should be a trusted one.
> > In any case, prevent anything else being placed on those pages by adding
> > artificial padding.
> 
> I don't think this device should be allowed to be assigned to any
> DomU. Just like the EHCI driver, at some point in the series you
> will want to call pci_hide_device() (you may already do, and I may
> merely have missed it).

There is the major difference compared to the EHCI driver: the XHCI
hardware interface was designed for debug capability to be driven by
separate driver (see description of patch 9). The device reset is the
only action that needs some coordination and this patch series follows
what Linux does (re-enable DbC part, when it detects the XHCI reset).
Having this capability is especially important when one has only a
single USB controller (many, if not most recent Intel platforms) and has
some important devices on USB (for example system disk, or the only ethernet
controller - I have both of those cases in my test lab...).

Anyway, this patch is necessary even if no domain would use the device.
As Andrew explained in response to the cover letter of the RFC series,
Xen doesn't have IOMMU context for devices used by Xen exclusively. So,
with the current model, it would be pci_ro_device() + dom0's IOMMU
context.

> > Using this API for DbC pages requires raising MAX_USER_RMRR_PAGES.
> 
> I disagree here: This is merely an effect of you re-using the pre-
> existing functionality there with too little customization. I think
> the respective check shouldn't be applied for internal uses.

Ok, I'll move the check.
Marek Marczykowski-Górecki July 20, 2022, 8:17 p.m. UTC | #3
On Thu, Jul 14, 2022 at 01:51:06PM +0200, Jan Beulich wrote:
> On 06.07.2022 17:32, Marek Marczykowski-Górecki wrote:
> > +    /*
> > +     * Don't place anything else on this page - it will be
> > +     * DMA-reachable by the USB controller.
> > +     */
> > +    char _pad[0] __aligned(XUE_PAGE_SIZE);
> 
> I don't think this is needed, due to sizeof() being required to be
> a multiple of alignof().

I'd prefer to be explicit about this, because if some future change
breaks this property (makes alignment smaller than a page size), the
result will be pretty bad.

> > +};
> > +static struct xue_dma_bufs xue_dma_bufs __aligned(XUE_PAGE_SIZE);
> 
> I don't think the alignment here is needed, as the struct will
> already have suitable alignment (derived from the biggest field
> alignment value). Instead please consider putting this in
> .bss.page_aligned.

Ok.

> > @@ -990,16 +999,22 @@ void __init xue_uart_init(void)
> >          xue->sbdf = PCI_SBDF(0, bus, slot, func);
> >      }
> >  
> > -    xue->dbc_ctx = &ctx;
> > -    xue->dbc_erst = &erst;
> > -    xue->dbc_ering.trb = evt_trb;
> > -    xue->dbc_oring.trb = out_trb;
> > -    xue->dbc_iring.trb = in_trb;
> > -    xue->dbc_owork.buf = wrk_buf;
> > -    xue->dbc_str = str_buf;
> > +    xue->dbc_ctx = &xue_dma_bufs.ctx;
> > +    xue->dbc_erst = &xue_dma_bufs.erst;
> > +    xue->dbc_ering.trb = xue_dma_bufs.evt_trb;
> > +    xue->dbc_oring.trb = xue_dma_bufs.out_trb;
> > +    xue->dbc_iring.trb = xue_dma_bufs.in_trb;
> > +    xue->dbc_owork.buf = xue_dma_bufs.wrk_buf;
> > +    xue->dbc_str = xue_dma_bufs.str_buf;
> >  
> >      if ( xue_open(xue) )
> > +    {
> > +        iommu_add_extra_reserved_device_memory(
> > +                PFN_DOWN(virt_to_maddr(&xue_dma_bufs)),
> 
> virt_to_pfn()?

Doesn't exist. Did you mean virt_to_mfn()?
Jan Beulich July 21, 2022, 10:30 a.m. UTC | #4
On 20.07.2022 22:17, Marek Marczykowski-Górecki wrote:
> On Thu, Jul 14, 2022 at 01:51:06PM +0200, Jan Beulich wrote:
>> On 06.07.2022 17:32, Marek Marczykowski-Górecki wrote:
>>> +    /*
>>> +     * Don't place anything else on this page - it will be
>>> +     * DMA-reachable by the USB controller.
>>> +     */
>>> +    char _pad[0] __aligned(XUE_PAGE_SIZE);
>>
>> I don't think this is needed, due to sizeof() being required to be
>> a multiple of alignof().
> 
> I'd prefer to be explicit about this, because if some future change
> breaks this property (makes alignment smaller than a page size), the
> result will be pretty bad.

I don't mind you leaving the comment; anyone making adjustments there
ought to be checking the effects of what they're doing.

>>> @@ -990,16 +999,22 @@ void __init xue_uart_init(void)
>>>          xue->sbdf = PCI_SBDF(0, bus, slot, func);
>>>      }
>>>  
>>> -    xue->dbc_ctx = &ctx;
>>> -    xue->dbc_erst = &erst;
>>> -    xue->dbc_ering.trb = evt_trb;
>>> -    xue->dbc_oring.trb = out_trb;
>>> -    xue->dbc_iring.trb = in_trb;
>>> -    xue->dbc_owork.buf = wrk_buf;
>>> -    xue->dbc_str = str_buf;
>>> +    xue->dbc_ctx = &xue_dma_bufs.ctx;
>>> +    xue->dbc_erst = &xue_dma_bufs.erst;
>>> +    xue->dbc_ering.trb = xue_dma_bufs.evt_trb;
>>> +    xue->dbc_oring.trb = xue_dma_bufs.out_trb;
>>> +    xue->dbc_iring.trb = xue_dma_bufs.in_trb;
>>> +    xue->dbc_owork.buf = xue_dma_bufs.wrk_buf;
>>> +    xue->dbc_str = xue_dma_bufs.str_buf;
>>>  
>>>      if ( xue_open(xue) )
>>> +    {
>>> +        iommu_add_extra_reserved_device_memory(
>>> +                PFN_DOWN(virt_to_maddr(&xue_dma_bufs)),
>>
>> virt_to_pfn()?
> 
> Doesn't exist. Did you mean virt_to_mfn()?

Oh, sorry, never mind then. virt_to_mfn() would be good to use if
the function took mfn_t, but as long as it doesn't what you have
is as good.

Jan
diff mbox series

Patch

diff --git a/xen/drivers/char/xue.c b/xen/drivers/char/xue.c
index 9d48068a5fba..a6c49bb43e97 100644
--- a/xen/drivers/char/xue.c
+++ b/xen/drivers/char/xue.c
@@ -26,6 +26,7 @@ 
 #include <xen/serial.h>
 #include <xen/timer.h>
 #include <xen/param.h>
+#include <xen/iommu.h>
 #include <asm/fixmap.h>
 #include <asm/io.h>
 #include <xen/mm.h>
@@ -952,13 +953,21 @@  static struct uart_driver xue_uart_driver = {
     .flush = xue_uart_flush,
 };
 
-static struct xue_trb evt_trb[XUE_TRB_RING_CAP] __aligned(XUE_PAGE_SIZE);
-static struct xue_trb out_trb[XUE_TRB_RING_CAP] __aligned(XUE_PAGE_SIZE);
-static struct xue_trb in_trb[XUE_TRB_RING_CAP] __aligned(XUE_PAGE_SIZE);
-static struct xue_erst_segment erst __aligned(64);
-static struct xue_dbc_ctx ctx __aligned(64);
-static uint8_t wrk_buf[XUE_WORK_RING_CAP] __aligned(XUE_PAGE_SIZE);
-static char str_buf[XUE_PAGE_SIZE] __aligned(64);
+struct xue_dma_bufs {
+    struct xue_trb evt_trb[XUE_TRB_RING_CAP] __aligned(XUE_PAGE_SIZE);
+    struct xue_trb out_trb[XUE_TRB_RING_CAP] __aligned(XUE_PAGE_SIZE);
+    struct xue_trb in_trb[XUE_TRB_RING_CAP] __aligned(XUE_PAGE_SIZE);
+    struct xue_erst_segment erst __aligned(64);
+    struct xue_dbc_ctx ctx __aligned(64);
+    uint8_t wrk_buf[XUE_WORK_RING_CAP] __aligned(XUE_PAGE_SIZE);
+    char str_buf[XUE_PAGE_SIZE] __aligned(64);
+    /*
+     * Don't place anything else on this page - it will be
+     * DMA-reachable by the USB controller.
+     */
+    char _pad[0] __aligned(XUE_PAGE_SIZE);
+};
+static struct xue_dma_bufs xue_dma_bufs __aligned(XUE_PAGE_SIZE);
 static char __initdata opt_dbgp[30];
 
 string_param("dbgp", opt_dbgp);
@@ -990,16 +999,22 @@  void __init xue_uart_init(void)
         xue->sbdf = PCI_SBDF(0, bus, slot, func);
     }
 
-    xue->dbc_ctx = &ctx;
-    xue->dbc_erst = &erst;
-    xue->dbc_ering.trb = evt_trb;
-    xue->dbc_oring.trb = out_trb;
-    xue->dbc_iring.trb = in_trb;
-    xue->dbc_owork.buf = wrk_buf;
-    xue->dbc_str = str_buf;
+    xue->dbc_ctx = &xue_dma_bufs.ctx;
+    xue->dbc_erst = &xue_dma_bufs.erst;
+    xue->dbc_ering.trb = xue_dma_bufs.evt_trb;
+    xue->dbc_oring.trb = xue_dma_bufs.out_trb;
+    xue->dbc_iring.trb = xue_dma_bufs.in_trb;
+    xue->dbc_owork.buf = xue_dma_bufs.wrk_buf;
+    xue->dbc_str = xue_dma_bufs.str_buf;
 
     if ( xue_open(xue) )
+    {
+        iommu_add_extra_reserved_device_memory(
+                PFN_DOWN(virt_to_maddr(&xue_dma_bufs)),
+                PFN_UP(sizeof(xue_dma_bufs)),
+                uart->xue.sbdf.sbdf);
         serial_register_uart(SERHND_DBGP, &xue_uart_driver, &xue_uart);
+    }
 }
 
 #ifdef XUE_DEBUG
diff --git a/xen/drivers/passthrough/vtd/dmar.c b/xen/drivers/passthrough/vtd/dmar.c
index 661a182b08d9..2caa3e9ad1b0 100644
--- a/xen/drivers/passthrough/vtd/dmar.c
+++ b/xen/drivers/passthrough/vtd/dmar.c
@@ -845,7 +845,7 @@  out:
     return ret;
 }
 
-#define MAX_USER_RMRR_PAGES 16
+#define MAX_USER_RMRR_PAGES 64
 #define MAX_USER_RMRR 10
 
 /* RMRR units derived from command line rmrr option. */