Message ID | 20230320000554.8219-1-jandryuk@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | xen: Fix host pci for stubdom | expand |
Am 20. März 2023 00:05:54 UTC schrieb Jason Andryuk <jandryuk@gmail.com>: >PCI passthrough for an HVM with a stubdom is PV PCI passthrough from >dom0 to the stubdom, and then QEMU passthrough of the PCI device inside >the stubdom. xen-pciback has boolean module param passthrough which >controls "how to export PCI topology to guest". If passthrough=1, the >frontend shows a PCI SBDF matching the backend host device. When >passthough=0, the frontend will get a sequentially allocated SBDF. > >libxl passes the host SBDF over QMP to QEMU. For non-stubdom or stubdom >with passthrough=1, this works fine. However, it fails for >passthrough=0 when QEMU can't find the sysfs node for the host SBDF. > >Handle all these cases. Look for the xenstore frontend nodes. If they >are missing, then default to using the QMP command provided SBDF. This >is the non-stubdom case. If xenstore nodes are found, then read the >local SBDF from the xenstore nodes. This will handle either >passthrough=0/1 case. > >Based on a stubdom-specific patch originally by Marek >Marczykowski-Górecki <marmarek@invisiblethingslab.com>, which is based >on earlier work by HW42 <hw42@ipsumj.de> > >Signed-off-by: Jason Andryuk <jandryuk@gmail.com> >--- > hw/xen/xen-host-pci-device.c | 96 +++++++++++++++++++++++++++++++++++- > hw/xen/xen-host-pci-device.h | 6 +++ > 2 files changed, 101 insertions(+), 1 deletion(-) > >diff --git a/hw/xen/xen-host-pci-device.c b/hw/xen/xen-host-pci-device.c >index 8c6e9a1716..51a72b432d 100644 >--- a/hw/xen/xen-host-pci-device.c >+++ b/hw/xen/xen-host-pci-device.c >@@ -9,6 +9,7 @@ > #include "qemu/osdep.h" > #include "qapi/error.h" > #include "qemu/cutils.h" >+#include "hw/xen/xen-legacy-backend.h" > #include "xen-host-pci-device.h" > > #define XEN_HOST_PCI_MAX_EXT_CAP \ >@@ -33,13 +34,101 @@ > #define IORESOURCE_PREFETCH 0x00001000 /* No side effects */ > #define IORESOURCE_MEM_64 0x00100000 > >+/* >+ * Non-passthrough (dom0) accesses are local PCI devices and use the given BDF >+ * Passthough (stubdom) accesses are through PV frontend PCI device. Those I'm unable to parse this sentence, which may be due to my unfamiliarity with Xen terminology. There is also an extra space before "Those". Best regards, Bernhard >+ * either have a BDF identical to the backend's BFD (xen-backend.passthrough=1) >+ * or a local virtual BDF (xen-backend.passthrough=0) >+ * >+ * We are always given the backend's BDF and need to lookup the appropriate >+ * local BDF for sysfs access. >+ */ >+static void xen_host_pci_fill_local_addr(XenHostPCIDevice *d, Error **errp) >+{ >+ unsigned int num_devs, len, i; >+ unsigned int domain, bus, dev, func; >+ char *be_path; >+ char path[80]; >+ char *msg; >+ >+ be_path = qemu_xen_xs_read(xenstore, 0, "device/pci/0/backend", &len); >+ if (!be_path) { >+ /* >+ * be_path doesn't exist, so we are dealing with a local >+ * (non-passthough) device. >+ */ >+ d->local_domain = d->domain; >+ d->local_bus = d->bus; >+ d->local_dev = d->dev; >+ d->local_func = d->func; >+ >+ return; >+ } >+ >+ snprintf(path, sizeof(path), "%s/num_devs", be_path); >+ msg = qemu_xen_xs_read(xenstore, 0, path, &len); >+ if (!msg) { >+ goto err_out; >+ } >+ >+ if (sscanf(msg, "%u", &num_devs) != 1) { >+ error_setg(errp, "Failed to parse %s (%s)", msg, path); >+ goto err_out; >+ } >+ free(msg); >+ >+ for (i = 0; i < num_devs; i++) { >+ snprintf(path, sizeof(path), "%s/dev-%u", be_path, i); >+ msg = qemu_xen_xs_read(xenstore, 0, path, &len); >+ if (!msg) { >+ error_setg(errp, "Failed to read %s", path); >+ goto err_out; >+ } >+ if (sscanf(msg, "%x:%x:%x.%x", &domain, &bus, &dev, &func) != 4) { >+ error_setg(errp, "Failed to parse %s (%s)", msg, path); >+ goto err_out; >+ } >+ free(msg); >+ if (domain != d->domain || >+ bus != d->bus || >+ dev != d->dev || >+ func != d->func) >+ continue; >+ snprintf(path, sizeof(path), "%s/vdev-%u", be_path, i); >+ msg = qemu_xen_xs_read(xenstore, 0, path, &len); >+ if (!msg) { >+ error_setg(errp, "Failed to read %s", path); >+ goto out; >+ } >+ if (sscanf(msg, "%x:%x:%x.%x", &domain, &bus, &dev, &func) != 4) { >+ error_setg(errp, "Failed to parse %s (%s)", msg, path); >+ goto err_out; >+ } >+ free(msg); >+ d->local_domain = domain; >+ d->local_bus = bus; >+ d->local_dev = dev; >+ d->local_func = func; >+ goto out; >+ } >+ >+ error_setg(errp, "Failed to find local %x:%x:%x.%x", d->domain, d->bus, >+ d->dev, d->func); >+ >+err_out: >+ free(msg); >+out: >+ free(be_path); >+} >+ > static void xen_host_pci_sysfs_path(const XenHostPCIDevice *d, > const char *name, char *buf, ssize_t size) > { > int rc; > > rc = snprintf(buf, size, "/sys/bus/pci/devices/%04x:%02x:%02x.%d/%s", >- d->domain, d->bus, d->dev, d->func, name); >+ d->local_domain, d->local_bus, d->local_dev, d->local_func, >+ name); > assert(rc >= 0 && rc < size); > } > >@@ -342,6 +431,11 @@ void xen_host_pci_device_get(XenHostPCIDevice *d, uint16_t domain, > d->dev = dev; > d->func = func; > >+ xen_host_pci_fill_local_addr(d, errp); >+ if (*errp) { >+ goto error; >+ } >+ > xen_host_pci_config_open(d, errp); > if (*errp) { > goto error; >diff --git a/hw/xen/xen-host-pci-device.h b/hw/xen/xen-host-pci-device.h >index 4d8d34ecb0..270dcb27f7 100644 >--- a/hw/xen/xen-host-pci-device.h >+++ b/hw/xen/xen-host-pci-device.h >@@ -23,6 +23,12 @@ typedef struct XenHostPCIDevice { > uint8_t dev; > uint8_t func; > >+ /* different from the above in case of stubdomain */ >+ uint16_t local_domain; >+ uint8_t local_bus; >+ uint8_t local_dev; >+ uint8_t local_func; >+ > uint16_t vendor_id; > uint16_t device_id; > uint32_t class_code;
On Mon, Mar 20, 2023 at 2:41 PM Bernhard Beschow <shentey@gmail.com> wrote: > > > > Am 20. März 2023 00:05:54 UTC schrieb Jason Andryuk <jandryuk@gmail.com>: > >PCI passthrough for an HVM with a stubdom is PV PCI passthrough from > >dom0 to the stubdom, and then QEMU passthrough of the PCI device inside > >the stubdom. xen-pciback has boolean module param passthrough which > >controls "how to export PCI topology to guest". If passthrough=1, the > >frontend shows a PCI SBDF matching the backend host device. When > >passthough=0, the frontend will get a sequentially allocated SBDF. > > > >libxl passes the host SBDF over QMP to QEMU. For non-stubdom or stubdom > >with passthrough=1, this works fine. However, it fails for > >passthrough=0 when QEMU can't find the sysfs node for the host SBDF. > > > >Handle all these cases. Look for the xenstore frontend nodes. If they > >are missing, then default to using the QMP command provided SBDF. This > >is the non-stubdom case. If xenstore nodes are found, then read the > >local SBDF from the xenstore nodes. This will handle either > >passthrough=0/1 case. > > > >Based on a stubdom-specific patch originally by Marek > >Marczykowski-Górecki <marmarek@invisiblethingslab.com>, which is based > >on earlier work by HW42 <hw42@ipsumj.de> > > > >Signed-off-by: Jason Andryuk <jandryuk@gmail.com> > >--- > > hw/xen/xen-host-pci-device.c | 96 +++++++++++++++++++++++++++++++++++- > > hw/xen/xen-host-pci-device.h | 6 +++ > > 2 files changed, 101 insertions(+), 1 deletion(-) > > > >diff --git a/hw/xen/xen-host-pci-device.c b/hw/xen/xen-host-pci-device.c > >index 8c6e9a1716..51a72b432d 100644 > >--- a/hw/xen/xen-host-pci-device.c > >+++ b/hw/xen/xen-host-pci-device.c > >@@ -9,6 +9,7 @@ > > #include "qemu/osdep.h" > > #include "qapi/error.h" > > #include "qemu/cutils.h" > >+#include "hw/xen/xen-legacy-backend.h" > > #include "xen-host-pci-device.h" > > > > #define XEN_HOST_PCI_MAX_EXT_CAP \ > >@@ -33,13 +34,101 @@ > > #define IORESOURCE_PREFETCH 0x00001000 /* No side effects */ > > #define IORESOURCE_MEM_64 0x00100000 > > > >+/* > >+ * Non-passthrough (dom0) accesses are local PCI devices and use the given BDF > >+ * Passthough (stubdom) accesses are through PV frontend PCI device. Those > > I'm unable to parse this sentence, which may be due to my unfamiliarity with Xen terminology. It's two sentences, but it's missing a period. "Non-passthrough (dom0) accesses are local PCI devices and use the given BDF." and "Passthough (stubdom) accesses are through PV frontend PCI device." > There is also an extra space before "Those". It's two spaces between sentences, which visually separates the sentences. It's a common formatting, so I think it's okay. Thanks for taking a look. > >+ * either have a BDF identical to the backend's BFD (xen-backend.passthrough=1) (And a typo here: s/BFD/BDF/) > >+ * or a local virtual BDF (xen-backend.passthrough=0) > >+ * > >+ * We are always given the backend's BDF and need to lookup the appropriate > >+ * local BDF for sysfs access. > >+ */ Regards, Jason
On Sun, Mar 19, 2023 at 08:05:54PM -0400, Jason Andryuk wrote: > diff --git a/hw/xen/xen-host-pci-device.c b/hw/xen/xen-host-pci-device.c > index 8c6e9a1716..51a72b432d 100644 > --- a/hw/xen/xen-host-pci-device.c > +++ b/hw/xen/xen-host-pci-device.c > @@ -33,13 +34,101 @@ > #define IORESOURCE_PREFETCH 0x00001000 /* No side effects */ > #define IORESOURCE_MEM_64 0x00100000 > > +/* > + * Non-passthrough (dom0) accesses are local PCI devices and use the given BDF > + * Passthough (stubdom) accesses are through PV frontend PCI device. Those > + * either have a BDF identical to the backend's BFD (xen-backend.passthrough=1) > + * or a local virtual BDF (xen-backend.passthrough=0) > + * > + * We are always given the backend's BDF and need to lookup the appropriate > + * local BDF for sysfs access. > + */ > +static void xen_host_pci_fill_local_addr(XenHostPCIDevice *d, Error **errp) > +{ > + unsigned int num_devs, len, i; > + unsigned int domain, bus, dev, func; > + char *be_path; > + char path[80]; > + char *msg; > + > + be_path = qemu_xen_xs_read(xenstore, 0, "device/pci/0/backend", &len); > + if (!be_path) { > + /* > + * be_path doesn't exist, so we are dealing with a local > + * (non-passthough) device. > + */ > + d->local_domain = d->domain; > + d->local_bus = d->bus; > + d->local_dev = d->dev; > + d->local_func = d->func; > + > + return; > + } > + > + snprintf(path, sizeof(path), "%s/num_devs", be_path); Is 80 bytes for `path` enough? What if the path is truncated due to the limit? There's xs_node_scanf() which might be useful. It does the error handling and call scanf(). But I'm not sure if it can be used here, in this file. > + msg = qemu_xen_xs_read(xenstore, 0, path, &len); > + if (!msg) { > + goto err_out; > + } > + > + if (sscanf(msg, "%u", &num_devs) != 1) { libxl writes `num_devs` as "%d". So I think qemu should read a %d. > + error_setg(errp, "Failed to parse %s (%s)", msg, path); > + goto err_out; > + } > + free(msg); > + > + for (i = 0; i < num_devs; i++) { > + snprintf(path, sizeof(path), "%s/dev-%u", be_path, i); Same here, the path is written with a %d, even if that doesn't change the result. Thanks,
On Mon, May 15, 2023 at 11:04 AM Anthony PERARD <anthony.perard@citrix.com> wrote: > > On Sun, Mar 19, 2023 at 08:05:54PM -0400, Jason Andryuk wrote: > > diff --git a/hw/xen/xen-host-pci-device.c b/hw/xen/xen-host-pci-device.c > > index 8c6e9a1716..51a72b432d 100644 > > --- a/hw/xen/xen-host-pci-device.c > > +++ b/hw/xen/xen-host-pci-device.c > > @@ -33,13 +34,101 @@ > > #define IORESOURCE_PREFETCH 0x00001000 /* No side effects */ > > #define IORESOURCE_MEM_64 0x00100000 > > > > +/* > > + * Non-passthrough (dom0) accesses are local PCI devices and use the given BDF > > + * Passthough (stubdom) accesses are through PV frontend PCI device. Those > > + * either have a BDF identical to the backend's BFD (xen-backend.passthrough=1) > > + * or a local virtual BDF (xen-backend.passthrough=0) > > + * > > + * We are always given the backend's BDF and need to lookup the appropriate > > + * local BDF for sysfs access. > > + */ > > +static void xen_host_pci_fill_local_addr(XenHostPCIDevice *d, Error **errp) > > +{ > > + unsigned int num_devs, len, i; > > + unsigned int domain, bus, dev, func; > > + char *be_path; > > + char path[80]; > > + char *msg; > > + > > + be_path = qemu_xen_xs_read(xenstore, 0, "device/pci/0/backend", &len); > > + if (!be_path) { > > + /* > > + * be_path doesn't exist, so we are dealing with a local > > + * (non-passthough) device. > > + */ > > + d->local_domain = d->domain; > > + d->local_bus = d->bus; > > + d->local_dev = d->dev; > > + d->local_func = d->func; > > + > > + return; > > + } > > + > > + snprintf(path, sizeof(path), "%s/num_devs", be_path); > > Is 80 bytes for `path` enough? > What if the path is truncated due to the limit? > > > There's xs_node_scanf() which might be useful. It does the error > handling and call scanf(). But I'm not sure if it can be used here, in > this file. Thanks for the suggestion - I'll take a look. Your other comments sound good, too. Also, for the next version, I plan to change the From: to Marek. I was thinking of doing it earlier, but failed to do so when it was time to send out the patch. Most of the code is Marek's from his Qubes stubdom repo. My modifications were to make it work with non-stubdom as well. Thanks, Jason
diff --git a/hw/xen/xen-host-pci-device.c b/hw/xen/xen-host-pci-device.c index 8c6e9a1716..51a72b432d 100644 --- a/hw/xen/xen-host-pci-device.c +++ b/hw/xen/xen-host-pci-device.c @@ -9,6 +9,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu/cutils.h" +#include "hw/xen/xen-legacy-backend.h" #include "xen-host-pci-device.h" #define XEN_HOST_PCI_MAX_EXT_CAP \ @@ -33,13 +34,101 @@ #define IORESOURCE_PREFETCH 0x00001000 /* No side effects */ #define IORESOURCE_MEM_64 0x00100000 +/* + * Non-passthrough (dom0) accesses are local PCI devices and use the given BDF + * Passthough (stubdom) accesses are through PV frontend PCI device. Those + * either have a BDF identical to the backend's BFD (xen-backend.passthrough=1) + * or a local virtual BDF (xen-backend.passthrough=0) + * + * We are always given the backend's BDF and need to lookup the appropriate + * local BDF for sysfs access. + */ +static void xen_host_pci_fill_local_addr(XenHostPCIDevice *d, Error **errp) +{ + unsigned int num_devs, len, i; + unsigned int domain, bus, dev, func; + char *be_path; + char path[80]; + char *msg; + + be_path = qemu_xen_xs_read(xenstore, 0, "device/pci/0/backend", &len); + if (!be_path) { + /* + * be_path doesn't exist, so we are dealing with a local + * (non-passthough) device. + */ + d->local_domain = d->domain; + d->local_bus = d->bus; + d->local_dev = d->dev; + d->local_func = d->func; + + return; + } + + snprintf(path, sizeof(path), "%s/num_devs", be_path); + msg = qemu_xen_xs_read(xenstore, 0, path, &len); + if (!msg) { + goto err_out; + } + + if (sscanf(msg, "%u", &num_devs) != 1) { + error_setg(errp, "Failed to parse %s (%s)", msg, path); + goto err_out; + } + free(msg); + + for (i = 0; i < num_devs; i++) { + snprintf(path, sizeof(path), "%s/dev-%u", be_path, i); + msg = qemu_xen_xs_read(xenstore, 0, path, &len); + if (!msg) { + error_setg(errp, "Failed to read %s", path); + goto err_out; + } + if (sscanf(msg, "%x:%x:%x.%x", &domain, &bus, &dev, &func) != 4) { + error_setg(errp, "Failed to parse %s (%s)", msg, path); + goto err_out; + } + free(msg); + if (domain != d->domain || + bus != d->bus || + dev != d->dev || + func != d->func) + continue; + snprintf(path, sizeof(path), "%s/vdev-%u", be_path, i); + msg = qemu_xen_xs_read(xenstore, 0, path, &len); + if (!msg) { + error_setg(errp, "Failed to read %s", path); + goto out; + } + if (sscanf(msg, "%x:%x:%x.%x", &domain, &bus, &dev, &func) != 4) { + error_setg(errp, "Failed to parse %s (%s)", msg, path); + goto err_out; + } + free(msg); + d->local_domain = domain; + d->local_bus = bus; + d->local_dev = dev; + d->local_func = func; + goto out; + } + + error_setg(errp, "Failed to find local %x:%x:%x.%x", d->domain, d->bus, + d->dev, d->func); + +err_out: + free(msg); +out: + free(be_path); +} + static void xen_host_pci_sysfs_path(const XenHostPCIDevice *d, const char *name, char *buf, ssize_t size) { int rc; rc = snprintf(buf, size, "/sys/bus/pci/devices/%04x:%02x:%02x.%d/%s", - d->domain, d->bus, d->dev, d->func, name); + d->local_domain, d->local_bus, d->local_dev, d->local_func, + name); assert(rc >= 0 && rc < size); } @@ -342,6 +431,11 @@ void xen_host_pci_device_get(XenHostPCIDevice *d, uint16_t domain, d->dev = dev; d->func = func; + xen_host_pci_fill_local_addr(d, errp); + if (*errp) { + goto error; + } + xen_host_pci_config_open(d, errp); if (*errp) { goto error; diff --git a/hw/xen/xen-host-pci-device.h b/hw/xen/xen-host-pci-device.h index 4d8d34ecb0..270dcb27f7 100644 --- a/hw/xen/xen-host-pci-device.h +++ b/hw/xen/xen-host-pci-device.h @@ -23,6 +23,12 @@ typedef struct XenHostPCIDevice { uint8_t dev; uint8_t func; + /* different from the above in case of stubdomain */ + uint16_t local_domain; + uint8_t local_bus; + uint8_t local_dev; + uint8_t local_func; + uint16_t vendor_id; uint16_t device_id; uint32_t class_code;
PCI passthrough for an HVM with a stubdom is PV PCI passthrough from dom0 to the stubdom, and then QEMU passthrough of the PCI device inside the stubdom. xen-pciback has boolean module param passthrough which controls "how to export PCI topology to guest". If passthrough=1, the frontend shows a PCI SBDF matching the backend host device. When passthough=0, the frontend will get a sequentially allocated SBDF. libxl passes the host SBDF over QMP to QEMU. For non-stubdom or stubdom with passthrough=1, this works fine. However, it fails for passthrough=0 when QEMU can't find the sysfs node for the host SBDF. Handle all these cases. Look for the xenstore frontend nodes. If they are missing, then default to using the QMP command provided SBDF. This is the non-stubdom case. If xenstore nodes are found, then read the local SBDF from the xenstore nodes. This will handle either passthrough=0/1 case. Based on a stubdom-specific patch originally by Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>, which is based on earlier work by HW42 <hw42@ipsumj.de> Signed-off-by: Jason Andryuk <jandryuk@gmail.com> --- hw/xen/xen-host-pci-device.c | 96 +++++++++++++++++++++++++++++++++++- hw/xen/xen-host-pci-device.h | 6 +++ 2 files changed, 101 insertions(+), 1 deletion(-)