diff mbox series

xen: Fix host pci for stubdom

Message ID 20230320000554.8219-1-jandryuk@gmail.com (mailing list archive)
State New, archived
Headers show
Series xen: Fix host pci for stubdom | expand

Commit Message

Jason Andryuk March 20, 2023, 12:05 a.m. UTC
PCI passthrough for an HVM with a stubdom is PV PCI passthrough from
dom0 to the stubdom, and then QEMU passthrough of the PCI device inside
the stubdom.  xen-pciback has boolean module param passthrough which
controls "how to export PCI topology to guest".  If passthrough=1, the
frontend shows a PCI SBDF matching the backend host device.  When
passthough=0, the frontend will get a sequentially allocated SBDF.

libxl passes the host SBDF over QMP to QEMU.  For non-stubdom or stubdom
with passthrough=1, this works fine.  However, it fails for
passthrough=0 when QEMU can't find the sysfs node for the host SBDF.

Handle all these cases.  Look for the xenstore frontend nodes.  If they
are missing, then default to using the QMP command provided SBDF.  This
is the non-stubdom case.  If xenstore nodes are found, then read the
local SBDF from the xenstore nodes.  This will handle either
passthrough=0/1 case.

Based on a stubdom-specific patch originally by Marek
Marczykowski-Górecki <marmarek@invisiblethingslab.com>, which is based
on earlier work by HW42 <hw42@ipsumj.de>

Signed-off-by: Jason Andryuk <jandryuk@gmail.com>
---
 hw/xen/xen-host-pci-device.c | 96 +++++++++++++++++++++++++++++++++++-
 hw/xen/xen-host-pci-device.h |  6 +++
 2 files changed, 101 insertions(+), 1 deletion(-)

Comments

Bernhard Beschow March 20, 2023, 9:13 a.m. UTC | #1
Am 20. März 2023 00:05:54 UTC schrieb Jason Andryuk <jandryuk@gmail.com>:
>PCI passthrough for an HVM with a stubdom is PV PCI passthrough from
>dom0 to the stubdom, and then QEMU passthrough of the PCI device inside
>the stubdom.  xen-pciback has boolean module param passthrough which
>controls "how to export PCI topology to guest".  If passthrough=1, the
>frontend shows a PCI SBDF matching the backend host device.  When
>passthough=0, the frontend will get a sequentially allocated SBDF.
>
>libxl passes the host SBDF over QMP to QEMU.  For non-stubdom or stubdom
>with passthrough=1, this works fine.  However, it fails for
>passthrough=0 when QEMU can't find the sysfs node for the host SBDF.
>
>Handle all these cases.  Look for the xenstore frontend nodes.  If they
>are missing, then default to using the QMP command provided SBDF.  This
>is the non-stubdom case.  If xenstore nodes are found, then read the
>local SBDF from the xenstore nodes.  This will handle either
>passthrough=0/1 case.
>
>Based on a stubdom-specific patch originally by Marek
>Marczykowski-Górecki <marmarek@invisiblethingslab.com>, which is based
>on earlier work by HW42 <hw42@ipsumj.de>
>
>Signed-off-by: Jason Andryuk <jandryuk@gmail.com>
>---
> hw/xen/xen-host-pci-device.c | 96 +++++++++++++++++++++++++++++++++++-
> hw/xen/xen-host-pci-device.h |  6 +++
> 2 files changed, 101 insertions(+), 1 deletion(-)
>
>diff --git a/hw/xen/xen-host-pci-device.c b/hw/xen/xen-host-pci-device.c
>index 8c6e9a1716..51a72b432d 100644
>--- a/hw/xen/xen-host-pci-device.c
>+++ b/hw/xen/xen-host-pci-device.c
>@@ -9,6 +9,7 @@
> #include "qemu/osdep.h"
> #include "qapi/error.h"
> #include "qemu/cutils.h"
>+#include "hw/xen/xen-legacy-backend.h"
> #include "xen-host-pci-device.h"
> 
> #define XEN_HOST_PCI_MAX_EXT_CAP \
>@@ -33,13 +34,101 @@
> #define IORESOURCE_PREFETCH     0x00001000      /* No side effects */
> #define IORESOURCE_MEM_64       0x00100000
> 
>+/*
>+ * Non-passthrough (dom0) accesses are local PCI devices and use the given BDF
>+ * Passthough (stubdom) accesses are through PV frontend PCI device.  Those

I'm unable to parse this sentence, which may be due to my unfamiliarity with Xen terminology.

There is also an extra space before "Those".

Best regards,
Bernhard

>+ * either have a BDF identical to the backend's BFD (xen-backend.passthrough=1)
>+ * or a local virtual BDF (xen-backend.passthrough=0)
>+ *
>+ * We are always given the backend's BDF and need to lookup the appropriate
>+ * local BDF for sysfs access.
>+ */
>+static void xen_host_pci_fill_local_addr(XenHostPCIDevice *d, Error **errp)
>+{
>+    unsigned int num_devs, len, i;
>+    unsigned int domain, bus, dev, func;
>+    char *be_path;
>+    char path[80];
>+    char *msg;
>+
>+    be_path = qemu_xen_xs_read(xenstore, 0, "device/pci/0/backend", &len);
>+    if (!be_path) {
>+        /*
>+         * be_path doesn't exist, so we are dealing with a local
>+         * (non-passthough) device.
>+         */
>+        d->local_domain = d->domain;
>+        d->local_bus = d->bus;
>+        d->local_dev = d->dev;
>+        d->local_func = d->func;
>+
>+        return;
>+    }
>+
>+    snprintf(path, sizeof(path), "%s/num_devs", be_path);
>+    msg = qemu_xen_xs_read(xenstore, 0, path, &len);
>+    if (!msg) {
>+        goto err_out;
>+    }
>+
>+    if (sscanf(msg, "%u", &num_devs) != 1) {
>+        error_setg(errp, "Failed to parse %s (%s)", msg, path);
>+        goto err_out;
>+    }
>+    free(msg);
>+
>+    for (i = 0; i < num_devs; i++) {
>+        snprintf(path, sizeof(path), "%s/dev-%u", be_path, i);
>+        msg = qemu_xen_xs_read(xenstore, 0, path, &len);
>+        if (!msg) {
>+            error_setg(errp, "Failed to read %s", path);
>+            goto err_out;
>+        }
>+        if (sscanf(msg, "%x:%x:%x.%x", &domain, &bus, &dev, &func) != 4) {
>+            error_setg(errp, "Failed to parse %s (%s)", msg, path);
>+            goto err_out;
>+        }
>+        free(msg);
>+        if (domain != d->domain ||
>+            bus != d->bus ||
>+            dev != d->dev ||
>+            func != d->func)
>+            continue;
>+        snprintf(path, sizeof(path), "%s/vdev-%u", be_path, i);
>+        msg = qemu_xen_xs_read(xenstore, 0, path, &len);
>+        if (!msg) {
>+            error_setg(errp, "Failed to read %s", path);
>+            goto out;
>+        }
>+        if (sscanf(msg, "%x:%x:%x.%x", &domain, &bus, &dev, &func) != 4) {
>+            error_setg(errp, "Failed to parse %s (%s)", msg, path);
>+            goto err_out;
>+        }
>+        free(msg);
>+        d->local_domain = domain;
>+        d->local_bus = bus;
>+        d->local_dev = dev;
>+        d->local_func = func;
>+        goto out;
>+    }
>+
>+    error_setg(errp, "Failed to find local %x:%x:%x.%x", d->domain, d->bus,
>+               d->dev, d->func);
>+
>+err_out:
>+    free(msg);
>+out:
>+    free(be_path);
>+}
>+
> static void xen_host_pci_sysfs_path(const XenHostPCIDevice *d,
>                                     const char *name, char *buf, ssize_t size)
> {
>     int rc;
> 
>     rc = snprintf(buf, size, "/sys/bus/pci/devices/%04x:%02x:%02x.%d/%s",
>-                  d->domain, d->bus, d->dev, d->func, name);
>+                  d->local_domain, d->local_bus, d->local_dev, d->local_func,
>+                  name);
>     assert(rc >= 0 && rc < size);
> }
> 
>@@ -342,6 +431,11 @@ void xen_host_pci_device_get(XenHostPCIDevice *d, uint16_t domain,
>     d->dev = dev;
>     d->func = func;
> 
>+    xen_host_pci_fill_local_addr(d, errp);
>+    if (*errp) {
>+        goto error;
>+    }
>+
>     xen_host_pci_config_open(d, errp);
>     if (*errp) {
>         goto error;
>diff --git a/hw/xen/xen-host-pci-device.h b/hw/xen/xen-host-pci-device.h
>index 4d8d34ecb0..270dcb27f7 100644
>--- a/hw/xen/xen-host-pci-device.h
>+++ b/hw/xen/xen-host-pci-device.h
>@@ -23,6 +23,12 @@ typedef struct XenHostPCIDevice {
>     uint8_t dev;
>     uint8_t func;
> 
>+    /* different from the above in case of stubdomain */
>+    uint16_t local_domain;
>+    uint8_t local_bus;
>+    uint8_t local_dev;
>+    uint8_t local_func;
>+
>     uint16_t vendor_id;
>     uint16_t device_id;
>     uint32_t class_code;
Jason Andryuk March 20, 2023, 8:59 p.m. UTC | #2
On Mon, Mar 20, 2023 at 2:41 PM Bernhard Beschow <shentey@gmail.com> wrote:
>
>
>
> Am 20. März 2023 00:05:54 UTC schrieb Jason Andryuk <jandryuk@gmail.com>:
> >PCI passthrough for an HVM with a stubdom is PV PCI passthrough from
> >dom0 to the stubdom, and then QEMU passthrough of the PCI device inside
> >the stubdom.  xen-pciback has boolean module param passthrough which
> >controls "how to export PCI topology to guest".  If passthrough=1, the
> >frontend shows a PCI SBDF matching the backend host device.  When
> >passthough=0, the frontend will get a sequentially allocated SBDF.
> >
> >libxl passes the host SBDF over QMP to QEMU.  For non-stubdom or stubdom
> >with passthrough=1, this works fine.  However, it fails for
> >passthrough=0 when QEMU can't find the sysfs node for the host SBDF.
> >
> >Handle all these cases.  Look for the xenstore frontend nodes.  If they
> >are missing, then default to using the QMP command provided SBDF.  This
> >is the non-stubdom case.  If xenstore nodes are found, then read the
> >local SBDF from the xenstore nodes.  This will handle either
> >passthrough=0/1 case.
> >
> >Based on a stubdom-specific patch originally by Marek
> >Marczykowski-Górecki <marmarek@invisiblethingslab.com>, which is based
> >on earlier work by HW42 <hw42@ipsumj.de>
> >
> >Signed-off-by: Jason Andryuk <jandryuk@gmail.com>
> >---
> > hw/xen/xen-host-pci-device.c | 96 +++++++++++++++++++++++++++++++++++-
> > hw/xen/xen-host-pci-device.h |  6 +++
> > 2 files changed, 101 insertions(+), 1 deletion(-)
> >
> >diff --git a/hw/xen/xen-host-pci-device.c b/hw/xen/xen-host-pci-device.c
> >index 8c6e9a1716..51a72b432d 100644
> >--- a/hw/xen/xen-host-pci-device.c
> >+++ b/hw/xen/xen-host-pci-device.c
> >@@ -9,6 +9,7 @@
> > #include "qemu/osdep.h"
> > #include "qapi/error.h"
> > #include "qemu/cutils.h"
> >+#include "hw/xen/xen-legacy-backend.h"
> > #include "xen-host-pci-device.h"
> >
> > #define XEN_HOST_PCI_MAX_EXT_CAP \
> >@@ -33,13 +34,101 @@
> > #define IORESOURCE_PREFETCH     0x00001000      /* No side effects */
> > #define IORESOURCE_MEM_64       0x00100000
> >
> >+/*
> >+ * Non-passthrough (dom0) accesses are local PCI devices and use the given BDF
> >+ * Passthough (stubdom) accesses are through PV frontend PCI device.  Those
>
> I'm unable to parse this sentence, which may be due to my unfamiliarity with Xen terminology.

It's two sentences, but it's missing a period.
"Non-passthrough (dom0) accesses are local PCI devices and use the
given BDF."  and "Passthough (stubdom) accesses are through PV
frontend PCI device."

> There is also an extra space before "Those".

It's two spaces between sentences, which visually separates the
sentences.  It's a common formatting, so I think it's okay.

Thanks for taking a look.

> >+ * either have a BDF identical to the backend's BFD (xen-backend.passthrough=1)

(And a typo here: s/BFD/BDF/)

> >+ * or a local virtual BDF (xen-backend.passthrough=0)
> >+ *
> >+ * We are always given the backend's BDF and need to lookup the appropriate
> >+ * local BDF for sysfs access.
> >+ */

Regards,
Jason
Anthony PERARD May 15, 2023, 3:03 p.m. UTC | #3
On Sun, Mar 19, 2023 at 08:05:54PM -0400, Jason Andryuk wrote:
> diff --git a/hw/xen/xen-host-pci-device.c b/hw/xen/xen-host-pci-device.c
> index 8c6e9a1716..51a72b432d 100644
> --- a/hw/xen/xen-host-pci-device.c
> +++ b/hw/xen/xen-host-pci-device.c
> @@ -33,13 +34,101 @@
>  #define IORESOURCE_PREFETCH     0x00001000      /* No side effects */
>  #define IORESOURCE_MEM_64       0x00100000
>  
> +/*
> + * Non-passthrough (dom0) accesses are local PCI devices and use the given BDF
> + * Passthough (stubdom) accesses are through PV frontend PCI device.  Those
> + * either have a BDF identical to the backend's BFD (xen-backend.passthrough=1)
> + * or a local virtual BDF (xen-backend.passthrough=0)
> + *
> + * We are always given the backend's BDF and need to lookup the appropriate
> + * local BDF for sysfs access.
> + */
> +static void xen_host_pci_fill_local_addr(XenHostPCIDevice *d, Error **errp)
> +{
> +    unsigned int num_devs, len, i;
> +    unsigned int domain, bus, dev, func;
> +    char *be_path;
> +    char path[80];
> +    char *msg;
> +
> +    be_path = qemu_xen_xs_read(xenstore, 0, "device/pci/0/backend", &len);
> +    if (!be_path) {
> +        /*
> +         * be_path doesn't exist, so we are dealing with a local
> +         * (non-passthough) device.
> +         */
> +        d->local_domain = d->domain;
> +        d->local_bus = d->bus;
> +        d->local_dev = d->dev;
> +        d->local_func = d->func;
> +
> +        return;
> +    }
> +
> +    snprintf(path, sizeof(path), "%s/num_devs", be_path);

Is 80 bytes for `path` enough?
What if the path is truncated due to the limit?


There's xs_node_scanf() which might be useful. It does the error
handling and call scanf(). But I'm not sure if it can be used here, in
this file.

> +    msg = qemu_xen_xs_read(xenstore, 0, path, &len);
> +    if (!msg) {
> +        goto err_out;
> +    }
> +
> +    if (sscanf(msg, "%u", &num_devs) != 1) {

libxl writes `num_devs` as "%d". So I think qemu should read a %d.


> +        error_setg(errp, "Failed to parse %s (%s)", msg, path);
> +        goto err_out;
> +    }
> +    free(msg);
> +
> +    for (i = 0; i < num_devs; i++) {
> +        snprintf(path, sizeof(path), "%s/dev-%u", be_path, i);

Same here, the path is written with a %d, even if that doesn't change the
result.


Thanks,
Jason Andryuk May 18, 2023, 2:09 a.m. UTC | #4
On Mon, May 15, 2023 at 11:04 AM Anthony PERARD
<anthony.perard@citrix.com> wrote:
>
> On Sun, Mar 19, 2023 at 08:05:54PM -0400, Jason Andryuk wrote:
> > diff --git a/hw/xen/xen-host-pci-device.c b/hw/xen/xen-host-pci-device.c
> > index 8c6e9a1716..51a72b432d 100644
> > --- a/hw/xen/xen-host-pci-device.c
> > +++ b/hw/xen/xen-host-pci-device.c
> > @@ -33,13 +34,101 @@
> >  #define IORESOURCE_PREFETCH     0x00001000      /* No side effects */
> >  #define IORESOURCE_MEM_64       0x00100000
> >
> > +/*
> > + * Non-passthrough (dom0) accesses are local PCI devices and use the given BDF
> > + * Passthough (stubdom) accesses are through PV frontend PCI device.  Those
> > + * either have a BDF identical to the backend's BFD (xen-backend.passthrough=1)
> > + * or a local virtual BDF (xen-backend.passthrough=0)
> > + *
> > + * We are always given the backend's BDF and need to lookup the appropriate
> > + * local BDF for sysfs access.
> > + */
> > +static void xen_host_pci_fill_local_addr(XenHostPCIDevice *d, Error **errp)
> > +{
> > +    unsigned int num_devs, len, i;
> > +    unsigned int domain, bus, dev, func;
> > +    char *be_path;
> > +    char path[80];
> > +    char *msg;
> > +
> > +    be_path = qemu_xen_xs_read(xenstore, 0, "device/pci/0/backend", &len);
> > +    if (!be_path) {
> > +        /*
> > +         * be_path doesn't exist, so we are dealing with a local
> > +         * (non-passthough) device.
> > +         */
> > +        d->local_domain = d->domain;
> > +        d->local_bus = d->bus;
> > +        d->local_dev = d->dev;
> > +        d->local_func = d->func;
> > +
> > +        return;
> > +    }
> > +
> > +    snprintf(path, sizeof(path), "%s/num_devs", be_path);
>
> Is 80 bytes for `path` enough?
> What if the path is truncated due to the limit?
>
>
> There's xs_node_scanf() which might be useful. It does the error
> handling and call scanf(). But I'm not sure if it can be used here, in
> this file.

Thanks for the suggestion - I'll take a look.  Your other comments
sound good, too.

Also, for the next version, I plan to change the From: to Marek. I was
thinking of doing it earlier, but failed to do so when it was time to
send out the patch.  Most of the code is Marek's from his Qubes
stubdom repo.  My modifications were to make it work with non-stubdom
as well.

Thanks,
Jason
diff mbox series

Patch

diff --git a/hw/xen/xen-host-pci-device.c b/hw/xen/xen-host-pci-device.c
index 8c6e9a1716..51a72b432d 100644
--- a/hw/xen/xen-host-pci-device.c
+++ b/hw/xen/xen-host-pci-device.c
@@ -9,6 +9,7 @@ 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu/cutils.h"
+#include "hw/xen/xen-legacy-backend.h"
 #include "xen-host-pci-device.h"
 
 #define XEN_HOST_PCI_MAX_EXT_CAP \
@@ -33,13 +34,101 @@ 
 #define IORESOURCE_PREFETCH     0x00001000      /* No side effects */
 #define IORESOURCE_MEM_64       0x00100000
 
+/*
+ * Non-passthrough (dom0) accesses are local PCI devices and use the given BDF
+ * Passthough (stubdom) accesses are through PV frontend PCI device.  Those
+ * either have a BDF identical to the backend's BFD (xen-backend.passthrough=1)
+ * or a local virtual BDF (xen-backend.passthrough=0)
+ *
+ * We are always given the backend's BDF and need to lookup the appropriate
+ * local BDF for sysfs access.
+ */
+static void xen_host_pci_fill_local_addr(XenHostPCIDevice *d, Error **errp)
+{
+    unsigned int num_devs, len, i;
+    unsigned int domain, bus, dev, func;
+    char *be_path;
+    char path[80];
+    char *msg;
+
+    be_path = qemu_xen_xs_read(xenstore, 0, "device/pci/0/backend", &len);
+    if (!be_path) {
+        /*
+         * be_path doesn't exist, so we are dealing with a local
+         * (non-passthough) device.
+         */
+        d->local_domain = d->domain;
+        d->local_bus = d->bus;
+        d->local_dev = d->dev;
+        d->local_func = d->func;
+
+        return;
+    }
+
+    snprintf(path, sizeof(path), "%s/num_devs", be_path);
+    msg = qemu_xen_xs_read(xenstore, 0, path, &len);
+    if (!msg) {
+        goto err_out;
+    }
+
+    if (sscanf(msg, "%u", &num_devs) != 1) {
+        error_setg(errp, "Failed to parse %s (%s)", msg, path);
+        goto err_out;
+    }
+    free(msg);
+
+    for (i = 0; i < num_devs; i++) {
+        snprintf(path, sizeof(path), "%s/dev-%u", be_path, i);
+        msg = qemu_xen_xs_read(xenstore, 0, path, &len);
+        if (!msg) {
+            error_setg(errp, "Failed to read %s", path);
+            goto err_out;
+        }
+        if (sscanf(msg, "%x:%x:%x.%x", &domain, &bus, &dev, &func) != 4) {
+            error_setg(errp, "Failed to parse %s (%s)", msg, path);
+            goto err_out;
+        }
+        free(msg);
+        if (domain != d->domain ||
+            bus != d->bus ||
+            dev != d->dev ||
+            func != d->func)
+            continue;
+        snprintf(path, sizeof(path), "%s/vdev-%u", be_path, i);
+        msg = qemu_xen_xs_read(xenstore, 0, path, &len);
+        if (!msg) {
+            error_setg(errp, "Failed to read %s", path);
+            goto out;
+        }
+        if (sscanf(msg, "%x:%x:%x.%x", &domain, &bus, &dev, &func) != 4) {
+            error_setg(errp, "Failed to parse %s (%s)", msg, path);
+            goto err_out;
+        }
+        free(msg);
+        d->local_domain = domain;
+        d->local_bus = bus;
+        d->local_dev = dev;
+        d->local_func = func;
+        goto out;
+    }
+
+    error_setg(errp, "Failed to find local %x:%x:%x.%x", d->domain, d->bus,
+               d->dev, d->func);
+
+err_out:
+    free(msg);
+out:
+    free(be_path);
+}
+
 static void xen_host_pci_sysfs_path(const XenHostPCIDevice *d,
                                     const char *name, char *buf, ssize_t size)
 {
     int rc;
 
     rc = snprintf(buf, size, "/sys/bus/pci/devices/%04x:%02x:%02x.%d/%s",
-                  d->domain, d->bus, d->dev, d->func, name);
+                  d->local_domain, d->local_bus, d->local_dev, d->local_func,
+                  name);
     assert(rc >= 0 && rc < size);
 }
 
@@ -342,6 +431,11 @@  void xen_host_pci_device_get(XenHostPCIDevice *d, uint16_t domain,
     d->dev = dev;
     d->func = func;
 
+    xen_host_pci_fill_local_addr(d, errp);
+    if (*errp) {
+        goto error;
+    }
+
     xen_host_pci_config_open(d, errp);
     if (*errp) {
         goto error;
diff --git a/hw/xen/xen-host-pci-device.h b/hw/xen/xen-host-pci-device.h
index 4d8d34ecb0..270dcb27f7 100644
--- a/hw/xen/xen-host-pci-device.h
+++ b/hw/xen/xen-host-pci-device.h
@@ -23,6 +23,12 @@  typedef struct XenHostPCIDevice {
     uint8_t dev;
     uint8_t func;
 
+    /* different from the above in case of stubdomain */
+    uint16_t local_domain;
+    uint8_t local_bus;
+    uint8_t local_dev;
+    uint8_t local_func;
+
     uint16_t vendor_id;
     uint16_t device_id;
     uint32_t class_code;