diff mbox

[v3,7/9] vfio: vote the function 0 to do host bus reset when aer occurred

Message ID 1458005749-4878-8-git-send-email-caoj.fnst@cn.fujitsu.com (mailing list archive)
State New, archived
Headers show

Commit Message

Cao jin March 15, 2016, 1:35 a.m. UTC
From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/pci/pci.c             |  2 ++
 hw/vfio/pci.c            | 14 ++++++++++++++
 hw/vfio/pci.h            |  1 +
 include/hw/pci/pci_bus.h |  2 ++
 4 files changed, 19 insertions(+)

Comments

Alex Williamson March 15, 2016, 8:38 p.m. UTC | #1
On Tue, 15 Mar 2016 09:35:47 +0800
Cao jin <caoj.fnst@cn.fujitsu.com> wrote:

> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
>  hw/pci/pci.c             |  2 ++
>  hw/vfio/pci.c            | 14 ++++++++++++++
>  hw/vfio/pci.h            |  1 +
>  include/hw/pci/pci_bus.h |  2 ++
>  4 files changed, 19 insertions(+)
> 
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index e67664d..953745d 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -276,6 +276,8 @@ static void pcibus_reset(BusState *qbus)
>      for (i = 0; i < bus->nirq; i++) {
>          assert(bus->irq_count[i] == 0);
>      }
> +
> +    bus->is_bus_rst = false;
>  }
>  
>  static void pci_host_bus_register(DeviceState *host)
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 223c0ee..b944d0b 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -1901,6 +1901,8 @@ static void vfio_check_hot_bus_reset(VFIOPCIDevice *vdev, Error **errp)
>      /* List all affected devices by bus reset */
>      devices = &info->devices[0];
>  
> +    vdev->single_depend_dev = (info->count == 1);
> +
>      /* Verify that we have all the groups required */
>      for (i = 0; i < info->count; i++) {
>          PCIHostDeviceAddress host;
> @@ -2593,6 +2595,10 @@ static void vfio_err_notifier_handler(void *opaque)
>          return;
>      }
>  
> +    if ((vdev->features & VFIO_FEATURE_ENABLE_AER)) {
> +        vdev->pdev.bus->is_bus_rst = true;
> +    }
> +

So we're *assuming* that the next reset will be a bus reset because we
took an AER fault... what if that particular error got handled in
another way, maybe a device specific handler that doesn't do a bus
reset?  The asymmetry of setting a value here and clearing it in PCI
code is pretty undesirable as well.  Can we detect that the bus is in
reset on our own given the current set of configuration restrictions?
Seems pretty easy to test for an AER device with the parent bridge
having PCI_BRIDGE_CTL_BUS_RESET set and wait for function #0 to do a
hot reset.  Thanks,

Alex

>      /*
>       * TBD. Retrieve the error details and decide what action
>       * needs to be taken. One of the actions could be to pass
> @@ -3060,6 +3066,14 @@ static void vfio_pci_reset(DeviceState *dev)
>  
>      trace_vfio_pci_reset(vdev->vbasedev.name);
>  
> +    if (pdev->bus->is_bus_rst) {
> +        /* simply voting the function 0 to do hot bus reset */
> +        if (pci_get_function_0(pdev) == pdev) {
> +            vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
> +        }
> +        return;
> +    }
> +
>      vfio_pci_pre_reset(vdev);
>  
>      if (vdev->resetfn && !vdev->resetfn(vdev)) {
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index aff46c2..32bd31f 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -143,6 +143,7 @@ typedef struct VFIOPCIDevice {
>      bool no_kvm_intx;
>      bool no_kvm_msi;
>      bool no_kvm_msix;
> +    bool single_depend_dev;
>  } VFIOPCIDevice;
>  
>  uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
> diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
> index 403fec6..6bcd334 100644
> --- a/include/hw/pci/pci_bus.h
> +++ b/include/hw/pci/pci_bus.h
> @@ -39,6 +39,8 @@ struct PCIBus {
>         Keep a count of the number of devices with raised IRQs.  */
>      int nirq;
>      int *irq_count;
> +
> +    bool is_bus_rst;
>  };
>  
>  typedef struct PCIBridgeWindows PCIBridgeWindows;
Chen Fan March 18, 2016, 1:15 a.m. UTC | #2
On 03/16/2016 04:38 AM, Alex Williamson wrote:
> On Tue, 15 Mar 2016 09:35:47 +0800
> Cao jin <caoj.fnst@cn.fujitsu.com> wrote:
>
>> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>>
>> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>> ---
>>   hw/pci/pci.c             |  2 ++
>>   hw/vfio/pci.c            | 14 ++++++++++++++
>>   hw/vfio/pci.h            |  1 +
>>   include/hw/pci/pci_bus.h |  2 ++
>>   4 files changed, 19 insertions(+)
>>
>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>> index e67664d..953745d 100644
>> --- a/hw/pci/pci.c
>> +++ b/hw/pci/pci.c
>> @@ -276,6 +276,8 @@ static void pcibus_reset(BusState *qbus)
>>       for (i = 0; i < bus->nirq; i++) {
>>           assert(bus->irq_count[i] == 0);
>>       }
>> +
>> +    bus->is_bus_rst = false;
>>   }
>>   
>>   static void pci_host_bus_register(DeviceState *host)
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 223c0ee..b944d0b 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -1901,6 +1901,8 @@ static void vfio_check_hot_bus_reset(VFIOPCIDevice *vdev, Error **errp)
>>       /* List all affected devices by bus reset */
>>       devices = &info->devices[0];
>>   
>> +    vdev->single_depend_dev = (info->count == 1);
>> +
>>       /* Verify that we have all the groups required */
>>       for (i = 0; i < info->count; i++) {
>>           PCIHostDeviceAddress host;
>> @@ -2593,6 +2595,10 @@ static void vfio_err_notifier_handler(void *opaque)
>>           return;
>>       }
>>   
>> +    if ((vdev->features & VFIO_FEATURE_ENABLE_AER)) {
>> +        vdev->pdev.bus->is_bus_rst = true;
>> +    }
>> +
> So we're *assuming* that the next reset will be a bus reset because we
> took an AER fault... what if that particular error got handled in
> another way, maybe a device specific handler that doesn't do a bus
> reset?  The asymmetry of setting a value here and clearing it in PCI
> code is pretty undesirable as well.  Can we detect that the bus is in
> reset on our own given the current set of configuration restrictions?
> Seems pretty easy to test for an AER device with the parent bridge
> having PCI_BRIDGE_CTL_BUS_RESET set and wait for function #0 to do a
> hot reset.  Thanks,
Indeed. Thanks for your help.

Chen
>
> Alex
>
>>       /*
>>        * TBD. Retrieve the error details and decide what action
>>        * needs to be taken. One of the actions could be to pass
>> @@ -3060,6 +3066,14 @@ static void vfio_pci_reset(DeviceState *dev)
>>   
>>       trace_vfio_pci_reset(vdev->vbasedev.name);
>>   
>> +    if (pdev->bus->is_bus_rst) {
>> +        /* simply voting the function 0 to do hot bus reset */
>> +        if (pci_get_function_0(pdev) == pdev) {
>> +            vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
>> +        }
>> +        return;
>> +    }
>> +
>>       vfio_pci_pre_reset(vdev);
>>   
>>       if (vdev->resetfn && !vdev->resetfn(vdev)) {
>> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
>> index aff46c2..32bd31f 100644
>> --- a/hw/vfio/pci.h
>> +++ b/hw/vfio/pci.h
>> @@ -143,6 +143,7 @@ typedef struct VFIOPCIDevice {
>>       bool no_kvm_intx;
>>       bool no_kvm_msi;
>>       bool no_kvm_msix;
>> +    bool single_depend_dev;
>>   } VFIOPCIDevice;
>>   
>>   uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
>> diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
>> index 403fec6..6bcd334 100644
>> --- a/include/hw/pci/pci_bus.h
>> +++ b/include/hw/pci/pci_bus.h
>> @@ -39,6 +39,8 @@ struct PCIBus {
>>          Keep a count of the number of devices with raised IRQs.  */
>>       int nirq;
>>       int *irq_count;
>> +
>> +    bool is_bus_rst;
>>   };
>>   
>>   typedef struct PCIBridgeWindows PCIBridgeWindows;
>
>
> .
>
diff mbox

Patch

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index e67664d..953745d 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -276,6 +276,8 @@  static void pcibus_reset(BusState *qbus)
     for (i = 0; i < bus->nirq; i++) {
         assert(bus->irq_count[i] == 0);
     }
+
+    bus->is_bus_rst = false;
 }
 
 static void pci_host_bus_register(DeviceState *host)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 223c0ee..b944d0b 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1901,6 +1901,8 @@  static void vfio_check_hot_bus_reset(VFIOPCIDevice *vdev, Error **errp)
     /* List all affected devices by bus reset */
     devices = &info->devices[0];
 
+    vdev->single_depend_dev = (info->count == 1);
+
     /* Verify that we have all the groups required */
     for (i = 0; i < info->count; i++) {
         PCIHostDeviceAddress host;
@@ -2593,6 +2595,10 @@  static void vfio_err_notifier_handler(void *opaque)
         return;
     }
 
+    if ((vdev->features & VFIO_FEATURE_ENABLE_AER)) {
+        vdev->pdev.bus->is_bus_rst = true;
+    }
+
     /*
      * TBD. Retrieve the error details and decide what action
      * needs to be taken. One of the actions could be to pass
@@ -3060,6 +3066,14 @@  static void vfio_pci_reset(DeviceState *dev)
 
     trace_vfio_pci_reset(vdev->vbasedev.name);
 
+    if (pdev->bus->is_bus_rst) {
+        /* simply voting the function 0 to do hot bus reset */
+        if (pci_get_function_0(pdev) == pdev) {
+            vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
+        }
+        return;
+    }
+
     vfio_pci_pre_reset(vdev);
 
     if (vdev->resetfn && !vdev->resetfn(vdev)) {
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index aff46c2..32bd31f 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -143,6 +143,7 @@  typedef struct VFIOPCIDevice {
     bool no_kvm_intx;
     bool no_kvm_msi;
     bool no_kvm_msix;
+    bool single_depend_dev;
 } VFIOPCIDevice;
 
 uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
index 403fec6..6bcd334 100644
--- a/include/hw/pci/pci_bus.h
+++ b/include/hw/pci/pci_bus.h
@@ -39,6 +39,8 @@  struct PCIBus {
        Keep a count of the number of devices with raised IRQs.  */
     int nirq;
     int *irq_count;
+
+    bool is_bus_rst;
 };
 
 typedef struct PCIBridgeWindows PCIBridgeWindows;