Message ID | 1565888460-38694-1-git-send-email-haiyangz@microsoft.com (mailing list archive) |
---|---|
State | Mainlined, archived |
Commit | be700103efd1050808db1cf00e52c3a2837bf802 |
Headers | show |
Series | [v6,1/2] PCI: hv: Detect and fix Hyper-V PCI domain number collision | expand |
On Thu, Aug 15, 2019 at 05:01:37PM +0000, Haiyang Zhang wrote: > Currently in Azure cloud, for passthrough devices, the host sets the device > instance ID's bytes 8 - 15 to a value derived from the host HWID, which is > the same on all devices in a VM. So, the device instance ID's bytes 8 and 9 > provided by the host are no longer unique. This affects all Azure hosts > since July 2018, and can cause device passthrough to VMs to fail because > the bytes 8 and 9 are used as PCI domain number. Collision of domain > numbers will cause the second device with the same domain number fail to > load. > > In the cases of collision, we will detect and find another number that is > not in use. > > Suggested-by: Michael Kelley <mikelley@microsoft.com> > Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com> > Acked-by: Sasha Levin <sashal@kernel.org> I assume you will take care of backporting and sending this patch to stable kernels given that you have not applied any tag with such request. I appreciate it may not be easy to define but a Fixes: tag would help. Thanks, Lorenzo > --- > drivers/pci/controller/pci-hyperv.c | 92 +++++++++++++++++++++++++++++++------ > 1 file changed, 79 insertions(+), 13 deletions(-) > > diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c > index 40b6254..31b8fd5 100644 > --- a/drivers/pci/controller/pci-hyperv.c > +++ b/drivers/pci/controller/pci-hyperv.c > @@ -2510,6 +2510,48 @@ static void put_hvpcibus(struct hv_pcibus_device *hbus) > complete(&hbus->remove_event); > } > > +#define HVPCI_DOM_MAP_SIZE (64 * 1024) > +static DECLARE_BITMAP(hvpci_dom_map, HVPCI_DOM_MAP_SIZE); > + > +/* > + * PCI domain number 0 is used by emulated devices on Gen1 VMs, so define 0 > + * as invalid for passthrough PCI devices of this driver. > + */ > +#define HVPCI_DOM_INVALID 0 > + > +/** > + * hv_get_dom_num() - Get a valid PCI domain number > + * Check if the PCI domain number is in use, and return another number if > + * it is in use. > + * > + * @dom: Requested domain number > + * > + * return: domain number on success, HVPCI_DOM_INVALID on failure > + */ > +static u16 hv_get_dom_num(u16 dom) > +{ > + unsigned int i; > + > + if (test_and_set_bit(dom, hvpci_dom_map) == 0) > + return dom; > + > + for_each_clear_bit(i, hvpci_dom_map, HVPCI_DOM_MAP_SIZE) { > + if (test_and_set_bit(i, hvpci_dom_map) == 0) > + return i; > + } > + > + return HVPCI_DOM_INVALID; > +} > + > +/** > + * hv_put_dom_num() - Mark the PCI domain number as free > + * @dom: Domain number to be freed > + */ > +static void hv_put_dom_num(u16 dom) > +{ > + clear_bit(dom, hvpci_dom_map); > +} > + > /** > * hv_pci_probe() - New VMBus channel probe, for a root PCI bus > * @hdev: VMBus's tracking struct for this root PCI bus > @@ -2521,6 +2563,7 @@ static int hv_pci_probe(struct hv_device *hdev, > const struct hv_vmbus_device_id *dev_id) > { > struct hv_pcibus_device *hbus; > + u16 dom_req, dom; > int ret; > > /* > @@ -2535,19 +2578,34 @@ static int hv_pci_probe(struct hv_device *hdev, > hbus->state = hv_pcibus_init; > > /* > - * The PCI bus "domain" is what is called "segment" in ACPI and > - * other specs. Pull it from the instance ID, to get something > - * unique. Bytes 8 and 9 are what is used in Windows guests, so > - * do the same thing for consistency. Note that, since this code > - * only runs in a Hyper-V VM, Hyper-V can (and does) guarantee > - * that (1) the only domain in use for something that looks like > - * a physical PCI bus (which is actually emulated by the > - * hypervisor) is domain 0 and (2) there will be no overlap > - * between domains derived from these instance IDs in the same > - * VM. > + * The PCI bus "domain" is what is called "segment" in ACPI and other > + * specs. Pull it from the instance ID, to get something usually > + * unique. In rare cases of collision, we will find out another number > + * not in use. > + * > + * Note that, since this code only runs in a Hyper-V VM, Hyper-V > + * together with this guest driver can guarantee that (1) The only > + * domain used by Gen1 VMs for something that looks like a physical > + * PCI bus (which is actually emulated by the hypervisor) is domain 0. > + * (2) There will be no overlap between domains (after fixing possible > + * collisions) in the same VM. > */ > - hbus->sysdata.domain = hdev->dev_instance.b[9] | > - hdev->dev_instance.b[8] << 8; > + dom_req = hdev->dev_instance.b[8] << 8 | hdev->dev_instance.b[9]; > + dom = hv_get_dom_num(dom_req); > + > + if (dom == HVPCI_DOM_INVALID) { > + dev_err(&hdev->device, > + "Unable to use dom# 0x%hx or other numbers", dom_req); > + ret = -EINVAL; > + goto free_bus; > + } > + > + if (dom != dom_req) > + dev_info(&hdev->device, > + "PCI dom# 0x%hx has collision, using 0x%hx", > + dom_req, dom); > + > + hbus->sysdata.domain = dom; > > hbus->hdev = hdev; > refcount_set(&hbus->remove_lock, 1); > @@ -2562,7 +2620,7 @@ static int hv_pci_probe(struct hv_device *hdev, > hbus->sysdata.domain); > if (!hbus->wq) { > ret = -ENOMEM; > - goto free_bus; > + goto free_dom; > } > > ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, 0, > @@ -2639,6 +2697,8 @@ static int hv_pci_probe(struct hv_device *hdev, > vmbus_close(hdev->channel); > destroy_wq: > destroy_workqueue(hbus->wq); > +free_dom: > + hv_put_dom_num(hbus->sysdata.domain); > free_bus: > free_page((unsigned long)hbus); > return ret; > @@ -2720,6 +2780,9 @@ static int hv_pci_remove(struct hv_device *hdev) > put_hvpcibus(hbus); > wait_for_completion(&hbus->remove_event); > destroy_workqueue(hbus->wq); > + > + hv_put_dom_num(hbus->sysdata.domain); > + > free_page((unsigned long)hbus); > return 0; > } > @@ -2747,6 +2810,9 @@ static void __exit exit_hv_pci_drv(void) > > static int __init init_hv_pci_drv(void) > { > + /* Set the invalid domain number's bit, so it will not be used */ > + set_bit(HVPCI_DOM_INVALID, hvpci_dom_map); > + > return vmbus_driver_register(&hv_pci_drv); > } > > -- > 1.8.3.1 >
> -----Original Message----- > From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> > Sent: Friday, August 16, 2019 5:52 AM > To: Haiyang Zhang <haiyangz@microsoft.com> > Cc: sashal@kernel.org; bhelgaas@google.com; linux- > hyperv@vger.kernel.org; linux-pci@vger.kernel.org; KY Srinivasan > <kys@microsoft.com>; Stephen Hemminger <sthemmin@microsoft.com>; > olaf@aepfle.de; vkuznets <vkuznets@redhat.com>; linux- > kernel@vger.kernel.org > Subject: Re: [PATCH v6,1/2] PCI: hv: Detect and fix Hyper-V PCI domain > number collision > > On Thu, Aug 15, 2019 at 05:01:37PM +0000, Haiyang Zhang wrote: > > Currently in Azure cloud, for passthrough devices, the host sets the > > device instance ID's bytes 8 - 15 to a value derived from the host > > HWID, which is the same on all devices in a VM. So, the device > > instance ID's bytes 8 and 9 provided by the host are no longer unique. > > This affects all Azure hosts since July 2018, and can cause device > > passthrough to VMs to fail because the bytes 8 and 9 are used as PCI > > domain number. Collision of domain numbers will cause the second > > device with the same domain number fail to load. > > > > In the cases of collision, we will detect and find another number that > > is not in use. > > > > Suggested-by: Michael Kelley <mikelley@microsoft.com> > > Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com> > > Acked-by: Sasha Levin <sashal@kernel.org> > > I assume you will take care of backporting and sending this patch to stable > kernels given that you have not applied any tag with such request. > > I appreciate it may not be easy to define but a Fixes: tag would help. Sure, I will add a Fixes tag, and Cc stable. Usually Sasha from our team will do the stable porting in batches. Thanks, - Haiyang
On Thu, Aug 15, 2019 at 05:01:37PM +0000, Haiyang Zhang wrote: > Currently in Azure cloud, for passthrough devices, the host sets the device > instance ID's bytes 8 - 15 to a value derived from the host HWID, which is > the same on all devices in a VM. So, the device instance ID's bytes 8 and 9 > provided by the host are no longer unique. This affects all Azure hosts > since July 2018, and can cause device passthrough to VMs to fail because > the bytes 8 and 9 are used as PCI domain number. Collision of domain > numbers will cause the second device with the same domain number fail to > load. > > In the cases of collision, we will detect and find another number that is > not in use. > > Suggested-by: Michael Kelley <mikelley@microsoft.com> > Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com> > Acked-by: Sasha Levin <sashal@kernel.org> > --- > drivers/pci/controller/pci-hyperv.c | 92 +++++++++++++++++++++++++++++++------ > 1 file changed, 79 insertions(+), 13 deletions(-) I have applied both patches to pci/hv for v5.4. Thanks, Lorenzo > diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c > index 40b6254..31b8fd5 100644 > --- a/drivers/pci/controller/pci-hyperv.c > +++ b/drivers/pci/controller/pci-hyperv.c > @@ -2510,6 +2510,48 @@ static void put_hvpcibus(struct hv_pcibus_device *hbus) > complete(&hbus->remove_event); > } > > +#define HVPCI_DOM_MAP_SIZE (64 * 1024) > +static DECLARE_BITMAP(hvpci_dom_map, HVPCI_DOM_MAP_SIZE); > + > +/* > + * PCI domain number 0 is used by emulated devices on Gen1 VMs, so define 0 > + * as invalid for passthrough PCI devices of this driver. > + */ > +#define HVPCI_DOM_INVALID 0 > + > +/** > + * hv_get_dom_num() - Get a valid PCI domain number > + * Check if the PCI domain number is in use, and return another number if > + * it is in use. > + * > + * @dom: Requested domain number > + * > + * return: domain number on success, HVPCI_DOM_INVALID on failure > + */ > +static u16 hv_get_dom_num(u16 dom) > +{ > + unsigned int i; > + > + if (test_and_set_bit(dom, hvpci_dom_map) == 0) > + return dom; > + > + for_each_clear_bit(i, hvpci_dom_map, HVPCI_DOM_MAP_SIZE) { > + if (test_and_set_bit(i, hvpci_dom_map) == 0) > + return i; > + } > + > + return HVPCI_DOM_INVALID; > +} > + > +/** > + * hv_put_dom_num() - Mark the PCI domain number as free > + * @dom: Domain number to be freed > + */ > +static void hv_put_dom_num(u16 dom) > +{ > + clear_bit(dom, hvpci_dom_map); > +} > + > /** > * hv_pci_probe() - New VMBus channel probe, for a root PCI bus > * @hdev: VMBus's tracking struct for this root PCI bus > @@ -2521,6 +2563,7 @@ static int hv_pci_probe(struct hv_device *hdev, > const struct hv_vmbus_device_id *dev_id) > { > struct hv_pcibus_device *hbus; > + u16 dom_req, dom; > int ret; > > /* > @@ -2535,19 +2578,34 @@ static int hv_pci_probe(struct hv_device *hdev, > hbus->state = hv_pcibus_init; > > /* > - * The PCI bus "domain" is what is called "segment" in ACPI and > - * other specs. Pull it from the instance ID, to get something > - * unique. Bytes 8 and 9 are what is used in Windows guests, so > - * do the same thing for consistency. Note that, since this code > - * only runs in a Hyper-V VM, Hyper-V can (and does) guarantee > - * that (1) the only domain in use for something that looks like > - * a physical PCI bus (which is actually emulated by the > - * hypervisor) is domain 0 and (2) there will be no overlap > - * between domains derived from these instance IDs in the same > - * VM. > + * The PCI bus "domain" is what is called "segment" in ACPI and other > + * specs. Pull it from the instance ID, to get something usually > + * unique. In rare cases of collision, we will find out another number > + * not in use. > + * > + * Note that, since this code only runs in a Hyper-V VM, Hyper-V > + * together with this guest driver can guarantee that (1) The only > + * domain used by Gen1 VMs for something that looks like a physical > + * PCI bus (which is actually emulated by the hypervisor) is domain 0. > + * (2) There will be no overlap between domains (after fixing possible > + * collisions) in the same VM. > */ > - hbus->sysdata.domain = hdev->dev_instance.b[9] | > - hdev->dev_instance.b[8] << 8; > + dom_req = hdev->dev_instance.b[8] << 8 | hdev->dev_instance.b[9]; > + dom = hv_get_dom_num(dom_req); > + > + if (dom == HVPCI_DOM_INVALID) { > + dev_err(&hdev->device, > + "Unable to use dom# 0x%hx or other numbers", dom_req); > + ret = -EINVAL; > + goto free_bus; > + } > + > + if (dom != dom_req) > + dev_info(&hdev->device, > + "PCI dom# 0x%hx has collision, using 0x%hx", > + dom_req, dom); > + > + hbus->sysdata.domain = dom; > > hbus->hdev = hdev; > refcount_set(&hbus->remove_lock, 1); > @@ -2562,7 +2620,7 @@ static int hv_pci_probe(struct hv_device *hdev, > hbus->sysdata.domain); > if (!hbus->wq) { > ret = -ENOMEM; > - goto free_bus; > + goto free_dom; > } > > ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, 0, > @@ -2639,6 +2697,8 @@ static int hv_pci_probe(struct hv_device *hdev, > vmbus_close(hdev->channel); > destroy_wq: > destroy_workqueue(hbus->wq); > +free_dom: > + hv_put_dom_num(hbus->sysdata.domain); > free_bus: > free_page((unsigned long)hbus); > return ret; > @@ -2720,6 +2780,9 @@ static int hv_pci_remove(struct hv_device *hdev) > put_hvpcibus(hbus); > wait_for_completion(&hbus->remove_event); > destroy_workqueue(hbus->wq); > + > + hv_put_dom_num(hbus->sysdata.domain); > + > free_page((unsigned long)hbus); > return 0; > } > @@ -2747,6 +2810,9 @@ static void __exit exit_hv_pci_drv(void) > > static int __init init_hv_pci_drv(void) > { > + /* Set the invalid domain number's bit, so it will not be used */ > + set_bit(HVPCI_DOM_INVALID, hvpci_dom_map); > + > return vmbus_driver_register(&hv_pci_drv); > } > > -- > 1.8.3.1 >
diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 40b6254..31b8fd5 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -2510,6 +2510,48 @@ static void put_hvpcibus(struct hv_pcibus_device *hbus) complete(&hbus->remove_event); } +#define HVPCI_DOM_MAP_SIZE (64 * 1024) +static DECLARE_BITMAP(hvpci_dom_map, HVPCI_DOM_MAP_SIZE); + +/* + * PCI domain number 0 is used by emulated devices on Gen1 VMs, so define 0 + * as invalid for passthrough PCI devices of this driver. + */ +#define HVPCI_DOM_INVALID 0 + +/** + * hv_get_dom_num() - Get a valid PCI domain number + * Check if the PCI domain number is in use, and return another number if + * it is in use. + * + * @dom: Requested domain number + * + * return: domain number on success, HVPCI_DOM_INVALID on failure + */ +static u16 hv_get_dom_num(u16 dom) +{ + unsigned int i; + + if (test_and_set_bit(dom, hvpci_dom_map) == 0) + return dom; + + for_each_clear_bit(i, hvpci_dom_map, HVPCI_DOM_MAP_SIZE) { + if (test_and_set_bit(i, hvpci_dom_map) == 0) + return i; + } + + return HVPCI_DOM_INVALID; +} + +/** + * hv_put_dom_num() - Mark the PCI domain number as free + * @dom: Domain number to be freed + */ +static void hv_put_dom_num(u16 dom) +{ + clear_bit(dom, hvpci_dom_map); +} + /** * hv_pci_probe() - New VMBus channel probe, for a root PCI bus * @hdev: VMBus's tracking struct for this root PCI bus @@ -2521,6 +2563,7 @@ static int hv_pci_probe(struct hv_device *hdev, const struct hv_vmbus_device_id *dev_id) { struct hv_pcibus_device *hbus; + u16 dom_req, dom; int ret; /* @@ -2535,19 +2578,34 @@ static int hv_pci_probe(struct hv_device *hdev, hbus->state = hv_pcibus_init; /* - * The PCI bus "domain" is what is called "segment" in ACPI and - * other specs. Pull it from the instance ID, to get something - * unique. Bytes 8 and 9 are what is used in Windows guests, so - * do the same thing for consistency. Note that, since this code - * only runs in a Hyper-V VM, Hyper-V can (and does) guarantee - * that (1) the only domain in use for something that looks like - * a physical PCI bus (which is actually emulated by the - * hypervisor) is domain 0 and (2) there will be no overlap - * between domains derived from these instance IDs in the same - * VM. + * The PCI bus "domain" is what is called "segment" in ACPI and other + * specs. Pull it from the instance ID, to get something usually + * unique. In rare cases of collision, we will find out another number + * not in use. + * + * Note that, since this code only runs in a Hyper-V VM, Hyper-V + * together with this guest driver can guarantee that (1) The only + * domain used by Gen1 VMs for something that looks like a physical + * PCI bus (which is actually emulated by the hypervisor) is domain 0. + * (2) There will be no overlap between domains (after fixing possible + * collisions) in the same VM. */ - hbus->sysdata.domain = hdev->dev_instance.b[9] | - hdev->dev_instance.b[8] << 8; + dom_req = hdev->dev_instance.b[8] << 8 | hdev->dev_instance.b[9]; + dom = hv_get_dom_num(dom_req); + + if (dom == HVPCI_DOM_INVALID) { + dev_err(&hdev->device, + "Unable to use dom# 0x%hx or other numbers", dom_req); + ret = -EINVAL; + goto free_bus; + } + + if (dom != dom_req) + dev_info(&hdev->device, + "PCI dom# 0x%hx has collision, using 0x%hx", + dom_req, dom); + + hbus->sysdata.domain = dom; hbus->hdev = hdev; refcount_set(&hbus->remove_lock, 1); @@ -2562,7 +2620,7 @@ static int hv_pci_probe(struct hv_device *hdev, hbus->sysdata.domain); if (!hbus->wq) { ret = -ENOMEM; - goto free_bus; + goto free_dom; } ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, 0, @@ -2639,6 +2697,8 @@ static int hv_pci_probe(struct hv_device *hdev, vmbus_close(hdev->channel); destroy_wq: destroy_workqueue(hbus->wq); +free_dom: + hv_put_dom_num(hbus->sysdata.domain); free_bus: free_page((unsigned long)hbus); return ret; @@ -2720,6 +2780,9 @@ static int hv_pci_remove(struct hv_device *hdev) put_hvpcibus(hbus); wait_for_completion(&hbus->remove_event); destroy_workqueue(hbus->wq); + + hv_put_dom_num(hbus->sysdata.domain); + free_page((unsigned long)hbus); return 0; } @@ -2747,6 +2810,9 @@ static void __exit exit_hv_pci_drv(void) static int __init init_hv_pci_drv(void) { + /* Set the invalid domain number's bit, so it will not be used */ + set_bit(HVPCI_DOM_INVALID, hvpci_dom_map); + return vmbus_driver_register(&hv_pci_drv); }