diff mbox series

[RFC] pcie: Defer hot unplug until migration is complete

Message ID 20200113140122.29486-1-yury-kotov@yandex-team.ru (mailing list archive)
State New, archived
Headers show
Series [RFC] pcie: Defer hot unplug until migration is complete | expand

Commit Message

Yury Kotov Jan. 13, 2020, 2:01 p.m. UTC
Devices hot-plug during migration is not allowed and disabled in
corresponding QMP-commands (device_add, device_del).
But guest still can unplug device by powering it off
(Example: echo 0 > /sys/bus/pci/slots/XXX/power).

Fix it by deferring unplugging until the migration is complete.

Signed-off-by: Yury Kotov <yury-kotov@yandex-team.ru>
---
 hw/pci-bridge/gen_pcie_root_port.c |  7 ++++
 hw/pci-bridge/ioh3420.c            |  7 ++++
 hw/pci-bridge/xio3130_downstream.c |  7 ++++
 hw/pci/pcie.c                      | 54 +++++++++++++++++++++++-------
 hw/pci/pcie_port.c                 | 47 ++++++++++++++++++++++++++
 include/hw/pci/pcie.h              |  1 +
 include/hw/pci/pcie_port.h         | 20 +++++++++++
 7 files changed, 130 insertions(+), 13 deletions(-)

Comments

Julia Suvorova Jan. 16, 2020, 10:47 a.m. UTC | #1
On Mon, Jan 13, 2020 at 3:04 PM Yury Kotov <yury-kotov@yandex-team.ru> wrote:
>
> Devices hot-plug during migration is not allowed and disabled in
> corresponding QMP-commands (device_add, device_del).
> But guest still can unplug device by powering it off
> (Example: echo 0 > /sys/bus/pci/slots/XXX/power).

You don't want to unplug device due to powering the slot off in the
first place. Instead, you can hide the device (see f3a8505656), and
make it visible again when the slot is powered on. Thus, the guest
will not be able to unplug the device and your problem will disappear.

Best regards, Julia Suvorova.

> Fix it by deferring unplugging until the migration is complete.
>
> Signed-off-by: Yury Kotov <yury-kotov@yandex-team.ru>
> ---
>  hw/pci-bridge/gen_pcie_root_port.c |  7 ++++
>  hw/pci-bridge/ioh3420.c            |  7 ++++
>  hw/pci-bridge/xio3130_downstream.c |  7 ++++
>  hw/pci/pcie.c                      | 54 +++++++++++++++++++++++-------
>  hw/pci/pcie_port.c                 | 47 ++++++++++++++++++++++++++
>  include/hw/pci/pcie.h              |  1 +
>  include/hw/pci/pcie_port.h         | 20 +++++++++++
>  7 files changed, 130 insertions(+), 13 deletions(-)
>
> diff --git a/hw/pci-bridge/gen_pcie_root_port.c b/hw/pci-bridge/gen_pcie_root_port.c
> index 9eaefebca8..5b3c202341 100644
> --- a/hw/pci-bridge/gen_pcie_root_port.c
> +++ b/hw/pci-bridge/gen_pcie_root_port.c
> @@ -100,6 +100,9 @@ static void gen_rp_realize(DeviceState *dev, Error **errp)
>      }
>  }
>
> +static const VMStateDescription vmstate_rp_deffered_unplug =
> +    VMSTATE_DEFFERED_UNPLUG("pcie-root-port");
> +
>  static const VMStateDescription vmstate_rp_dev = {
>      .name = "pcie-root-port",
>      .priority = MIG_PRI_PCI_BUS,
> @@ -114,6 +117,10 @@ static const VMStateDescription vmstate_rp_dev = {
>                            GenPCIERootPort,
>                            gen_rp_test_migrate_msix),
>          VMSTATE_END_OF_LIST()
> +    },
> +    .subsections = (const VMStateDescription * []) {
> +        &vmstate_rp_deffered_unplug,
> +        NULL
>      }
>  };
>
> diff --git a/hw/pci-bridge/ioh3420.c b/hw/pci-bridge/ioh3420.c
> index f1e16135a3..2399a9a87f 100644
> --- a/hw/pci-bridge/ioh3420.c
> +++ b/hw/pci-bridge/ioh3420.c
> @@ -82,6 +82,9 @@ static void ioh3420_interrupts_uninit(PCIDevice *d)
>      msi_uninit(d);
>  }
>
> +static const VMStateDescription vmstate_ioh3420_deffered_unplug =
> +    VMSTATE_DEFFERED_UNPLUG("ioh-3240-express-root-port");
> +
>  static const VMStateDescription vmstate_ioh3420 = {
>      .name = "ioh-3240-express-root-port",
>      .priority = MIG_PRI_PCI_BUS,
> @@ -93,6 +96,10 @@ static const VMStateDescription vmstate_ioh3420 = {
>          VMSTATE_STRUCT(parent_obj.parent_obj.parent_obj.exp.aer_log,
>                         PCIESlot, 0, vmstate_pcie_aer_log, PCIEAERLog),
>          VMSTATE_END_OF_LIST()
> +    },
> +    .subsections = (const VMStateDescription * []) {
> +        &vmstate_ioh3420_deffered_unplug,
> +        NULL
>      }
>  };
>
> diff --git a/hw/pci-bridge/xio3130_downstream.c b/hw/pci-bridge/xio3130_downstream.c
> index a9f084b863..a5b4fe08ee 100644
> --- a/hw/pci-bridge/xio3130_downstream.c
> +++ b/hw/pci-bridge/xio3130_downstream.c
> @@ -139,6 +139,9 @@ static Property xio3130_downstream_props[] = {
>      DEFINE_PROP_END_OF_LIST()
>  };
>
> +static const VMStateDescription vmstate_xio3130_downstream_deffered_unplug =
> +    VMSTATE_DEFFERED_UNPLUG("xio3130-express-downstream-port");
> +
>  static const VMStateDescription vmstate_xio3130_downstream = {
>      .name = "xio3130-express-downstream-port",
>      .priority = MIG_PRI_PCI_BUS,
> @@ -150,6 +153,10 @@ static const VMStateDescription vmstate_xio3130_downstream = {
>          VMSTATE_STRUCT(parent_obj.parent_obj.parent_obj.exp.aer_log,
>                         PCIESlot, 0, vmstate_pcie_aer_log, PCIEAERLog),
>          VMSTATE_END_OF_LIST()
> +    },
> +    .subsections = (const VMStateDescription * []) {
> +        &vmstate_xio3130_downstream_deffered_unplug,
> +        NULL
>      }
>  };
>
> diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
> index 08718188bb..29f0e5c05b 100644
> --- a/hw/pci/pcie.c
> +++ b/hw/pci/pcie.c
> @@ -28,6 +28,8 @@
>  #include "hw/pci/pcie_regs.h"
>  #include "hw/pci/pcie_port.h"
>  #include "qemu/range.h"
> +#include "sysemu/sysemu.h"
> +#include "migration/misc.h"
>
>  //#define DEBUG_PCIE
>  #ifdef DEBUG_PCIE
> @@ -575,6 +577,7 @@ void pcie_cap_slot_reset(PCIDevice *dev)
>
>      if (dev->cap_present & QEMU_PCIE_SLTCAP_PCP) {
>          /* Downstream ports enforce device number 0. */
> +        PCIESlot *slot = PCIE_SLOT(dev);
>          bool populated = pci_bridge_get_sec_bus(PCI_BRIDGE(dev))->devices[0];
>          uint16_t pic;
>
> @@ -588,6 +591,7 @@ void pcie_cap_slot_reset(PCIDevice *dev)
>
>          pic = populated ? PCI_EXP_SLTCTL_PIC_ON : PCI_EXP_SLTCTL_PIC_OFF;
>          pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTCTL, pic);
> +        slot->unplug_is_deferred = false;
>      }
>
>      pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA,
> @@ -608,13 +612,42 @@ void pcie_cap_slot_get(PCIDevice *dev, uint16_t *slt_ctl, uint16_t *slt_sta)
>      *slt_sta = pci_get_word(exp_cap + PCI_EXP_SLTSTA);
>  }
>
> +static void pcie_cap_slot_unplug(PCIDevice *dev)
> +{
> +    uint32_t pos = dev->exp.exp_cap;
> +    uint8_t *exp_cap = dev->config + pos;
> +    PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev));
> +
> +    pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
> +                        pcie_unplug_device, NULL);
> +    pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDS);
> +    if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) {
> +        pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA,
> +                                     PCI_EXP_LNKSTA_DLLLA);
> +    }
> +    pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDC);
> +    hotplug_event_notify(dev);
> +}
> +
> +void pcie_cap_slot_deferred_unplug(PCIDevice *dev)
> +{
> +    PCIESlot *slot = PCIE_SLOT(dev);
> +
> +    if (migration_is_idle() && slot->unplug_is_deferred) {
> +        pcie_cap_slot_unplug(dev);
> +        slot->unplug_is_deferred = false;
> +    }
> +}
> +
>  void pcie_cap_slot_write_config(PCIDevice *dev,
>                                  uint16_t old_slt_ctl, uint16_t old_slt_sta,
>                                  uint32_t addr, uint32_t val, int len)
>  {
> +    PCIESlot *slot = PCIE_SLOT(dev);
>      uint32_t pos = dev->exp.exp_cap;
>      uint8_t *exp_cap = dev->config + pos;
>      uint16_t sltsta = pci_get_word(exp_cap + PCI_EXP_SLTSTA);
> +    bool may_unplug;
>
>      if (ranges_overlap(addr, len, pos + PCI_EXP_SLTSTA, 2)) {
>          /*
> @@ -660,22 +693,17 @@ void pcie_cap_slot_write_config(PCIDevice *dev,
>       * this is a work around for guests that overwrite
>       * control of powered off slots before powering them on.
>       */
> -    if ((sltsta & PCI_EXP_SLTSTA_PDS) && (val & PCI_EXP_SLTCTL_PCC) &&
> -        (val & PCI_EXP_SLTCTL_PIC_OFF) == PCI_EXP_SLTCTL_PIC_OFF &&
> +    may_unplug = (val & PCI_EXP_SLTCTL_PCC) &&
> +                 (val & PCI_EXP_SLTCTL_PIC_OFF) == PCI_EXP_SLTCTL_PIC_OFF;
> +    if (may_unplug && (sltsta & PCI_EXP_SLTSTA_PDS) &&
>          (!(old_slt_ctl & PCI_EXP_SLTCTL_PCC) ||
>          (old_slt_ctl & PCI_EXP_SLTCTL_PIC_OFF) != PCI_EXP_SLTCTL_PIC_OFF)) {
> -        PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev));
> -        pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
> -                            pcie_unplug_device, NULL);
> -
> -        pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA,
> -                                     PCI_EXP_SLTSTA_PDS);
> -        if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) {
> -            pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA,
> -                                         PCI_EXP_LNKSTA_DLLLA);
> +        slot->unplug_is_deferred = !migration_is_idle();
> +        if (!slot->unplug_is_deferred) {
> +            pcie_cap_slot_unplug(dev);
>          }
> -        pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA,
> -                                       PCI_EXP_SLTSTA_PDC);
> +    } else if (!may_unplug) {
> +        slot->unplug_is_deferred = false;
>      }
>
>      hotplug_event_notify(dev);
> diff --git a/hw/pci/pcie_port.c b/hw/pci/pcie_port.c
> index c19a9be592..bd5fbf6827 100644
> --- a/hw/pci/pcie_port.c
> +++ b/hw/pci/pcie_port.c
> @@ -23,6 +23,9 @@
>  #include "hw/qdev-properties.h"
>  #include "qemu/module.h"
>  #include "hw/hotplug.h"
> +#include "sysemu/runstate.h"
> +#include "migration/migration.h"
> +#include "migration/misc.h"
>
>  void pcie_port_init_reg(PCIDevice *d)
>  {
> @@ -150,6 +153,48 @@ static Property pcie_slot_props[] = {
>      DEFINE_PROP_END_OF_LIST()
>  };
>
> +bool vmstate_deffered_unplug_needed(void *opaque)
> +{
> +    PCIESlot *slot = opaque;
> +
> +    return slot->unplug_is_deferred;
> +}
> +
> +static void pcie_slot_migration_notifier_cb(Notifier *notifier, void *data)
> +{
> +    PCIESlot *slot = container_of(notifier, PCIESlot, migration_notifier);
> +
> +    pcie_cap_slot_deferred_unplug(PCI_DEVICE(slot));
> +}
> +
> +static void pcie_slot_vm_state_change(void *opaque, int running, RunState state)
> +{
> +    PCIESlot *slot = opaque;
> +
> +    pcie_cap_slot_deferred_unplug(PCI_DEVICE(slot));
> +}
> +
> +static void pcie_slot_init(Object *obj)
> +{
> +    PCIESlot *slot = PCIE_SLOT(obj);
> +
> +    slot->unplug_is_deferred = false;
> +    slot->migration_notifier = (Notifier) {
> +        .notify = pcie_slot_migration_notifier_cb
> +    };
> +    add_migration_state_change_notifier(&slot->migration_notifier);
> +    slot->vmstate_change =
> +        qemu_add_vm_change_state_handler(pcie_slot_vm_state_change, slot);
> +}
> +
> +static void pcie_slot_finalize(Object *obj)
> +{
> +    PCIESlot *slot = PCIE_SLOT(obj);
> +
> +    remove_migration_state_change_notifier(&slot->migration_notifier);
> +    qemu_del_vm_change_state_handler(slot->vmstate_change);
> +}
> +
>  static void pcie_slot_class_init(ObjectClass *oc, void *data)
>  {
>      DeviceClass *dc = DEVICE_CLASS(oc);
> @@ -166,6 +211,8 @@ static const TypeInfo pcie_slot_type_info = {
>      .name = TYPE_PCIE_SLOT,
>      .parent = TYPE_PCIE_PORT,
>      .instance_size = sizeof(PCIESlot),
> +    .instance_init = pcie_slot_init,
> +    .instance_finalize = pcie_slot_finalize,
>      .abstract = true,
>      .class_init = pcie_slot_class_init,
>      .interfaces = (InterfaceInfo[]) {
> diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
> index 7064875835..128f26199e 100644
> --- a/include/hw/pci/pcie.h
> +++ b/include/hw/pci/pcie.h
> @@ -110,6 +110,7 @@ void pcie_cap_slot_get(PCIDevice *dev, uint16_t *slt_ctl, uint16_t *slt_sta);
>  void pcie_cap_slot_write_config(PCIDevice *dev,
>                                  uint16_t old_slt_ctl, uint16_t old_slt_sta,
>                                  uint32_t addr, uint32_t val, int len);
> +void pcie_cap_slot_deferred_unplug(PCIDevice *dev);
>  int pcie_cap_slot_post_load(void *opaque, int version_id);
>  void pcie_cap_slot_push_attention_button(PCIDevice *dev);
>
> diff --git a/include/hw/pci/pcie_port.h b/include/hw/pci/pcie_port.h
> index 7515430087..32e45f0c89 100644
> --- a/include/hw/pci/pcie_port.h
> +++ b/include/hw/pci/pcie_port.h
> @@ -23,6 +23,9 @@
>
>  #include "hw/pci/pci_bridge.h"
>  #include "hw/pci/pci_bus.h"
> +#include "qemu/notify.h"
> +#include "sysemu/sysemu.h"
> +#include "migration/vmstate.h"
>
>  #define TYPE_PCIE_PORT "pcie-port"
>  #define PCIE_PORT(obj) OBJECT_CHECK(PCIEPort, (obj), TYPE_PCIE_PORT)
> @@ -44,6 +47,10 @@ void pcie_port_init_reg(PCIDevice *d);
>  struct PCIESlot {
>      /*< private >*/
>      PCIEPort    parent_obj;
> +    bool        unplug_is_deferred;
> +    Notifier    migration_notifier;
> +    VMChangeStateEntry *vmstate_change;
> +
>      /*< public >*/
>
>      /* pci express switch port with slot */
> @@ -58,6 +65,19 @@ struct PCIESlot {
>      QLIST_ENTRY(PCIESlot) next;
>  };
>
> +bool vmstate_deffered_unplug_needed(void *opaque);
> +
> +#define VMSTATE_DEFFERED_UNPLUG(parent_section_name) {            \
> +    .name = parent_section_name "/deffered-unplug",               \
> +    .version_id = 1,                                              \
> +    .minimum_version_id = 1,                                      \
> +    .needed = vmstate_deffered_unplug_needed,                     \
> +    .fields = (VMStateField[]) {                                  \
> +        VMSTATE_BOOL(unplug_is_deferred, PCIESlot),               \
> +        VMSTATE_END_OF_LIST()                                     \
> +    }                                                             \
> +}
> +
>  void pcie_chassis_create(uint8_t chassis_number);
>  PCIESlot *pcie_chassis_find_slot(uint8_t chassis, uint16_t slot);
>  int pcie_chassis_add_slot(struct PCIESlot *slot);
> --
> 2.24.1
>
>
Dr. David Alan Gilbert Jan. 17, 2020, 4:56 p.m. UTC | #2
* Julia Suvorova (jusual@redhat.com) wrote:
> On Mon, Jan 13, 2020 at 3:04 PM Yury Kotov <yury-kotov@yandex-team.ru> wrote:
> >
> > Devices hot-plug during migration is not allowed and disabled in
> > corresponding QMP-commands (device_add, device_del).
> > But guest still can unplug device by powering it off
> > (Example: echo 0 > /sys/bus/pci/slots/XXX/power).
> 
> You don't want to unplug device due to powering the slot off in the
> first place. Instead, you can hide the device (see f3a8505656), and
> make it visible again when the slot is powered on. Thus, the guest
> will not be able to unplug the device and your problem will disappear.
> 
> Best regards, Julia Suvorova.

I don't really understand how hidden a hidden device is;
will it still get it's migration data migrated?
  - if so what state is it in? What gets migrated?
is the behaviour after reset/reboot consistent with what we want
in the existing unplug behaviour?
Say it's a disk device; will it release the lock on the disk backend?

It feels like the semantics need to be tied down a bit.

Dave

> > Fix it by deferring unplugging until the migration is complete.
> >
> > Signed-off-by: Yury Kotov <yury-kotov@yandex-team.ru>
> > ---
> >  hw/pci-bridge/gen_pcie_root_port.c |  7 ++++
> >  hw/pci-bridge/ioh3420.c            |  7 ++++
> >  hw/pci-bridge/xio3130_downstream.c |  7 ++++
> >  hw/pci/pcie.c                      | 54 +++++++++++++++++++++++-------
> >  hw/pci/pcie_port.c                 | 47 ++++++++++++++++++++++++++
> >  include/hw/pci/pcie.h              |  1 +
> >  include/hw/pci/pcie_port.h         | 20 +++++++++++
> >  7 files changed, 130 insertions(+), 13 deletions(-)
> >
> > diff --git a/hw/pci-bridge/gen_pcie_root_port.c b/hw/pci-bridge/gen_pcie_root_port.c
> > index 9eaefebca8..5b3c202341 100644
> > --- a/hw/pci-bridge/gen_pcie_root_port.c
> > +++ b/hw/pci-bridge/gen_pcie_root_port.c
> > @@ -100,6 +100,9 @@ static void gen_rp_realize(DeviceState *dev, Error **errp)
> >      }
> >  }
> >
> > +static const VMStateDescription vmstate_rp_deffered_unplug =
> > +    VMSTATE_DEFFERED_UNPLUG("pcie-root-port");
> > +
> >  static const VMStateDescription vmstate_rp_dev = {
> >      .name = "pcie-root-port",
> >      .priority = MIG_PRI_PCI_BUS,
> > @@ -114,6 +117,10 @@ static const VMStateDescription vmstate_rp_dev = {
> >                            GenPCIERootPort,
> >                            gen_rp_test_migrate_msix),
> >          VMSTATE_END_OF_LIST()
> > +    },
> > +    .subsections = (const VMStateDescription * []) {
> > +        &vmstate_rp_deffered_unplug,
> > +        NULL
> >      }
> >  };
> >
> > diff --git a/hw/pci-bridge/ioh3420.c b/hw/pci-bridge/ioh3420.c
> > index f1e16135a3..2399a9a87f 100644
> > --- a/hw/pci-bridge/ioh3420.c
> > +++ b/hw/pci-bridge/ioh3420.c
> > @@ -82,6 +82,9 @@ static void ioh3420_interrupts_uninit(PCIDevice *d)
> >      msi_uninit(d);
> >  }
> >
> > +static const VMStateDescription vmstate_ioh3420_deffered_unplug =
> > +    VMSTATE_DEFFERED_UNPLUG("ioh-3240-express-root-port");
> > +
> >  static const VMStateDescription vmstate_ioh3420 = {
> >      .name = "ioh-3240-express-root-port",
> >      .priority = MIG_PRI_PCI_BUS,
> > @@ -93,6 +96,10 @@ static const VMStateDescription vmstate_ioh3420 = {
> >          VMSTATE_STRUCT(parent_obj.parent_obj.parent_obj.exp.aer_log,
> >                         PCIESlot, 0, vmstate_pcie_aer_log, PCIEAERLog),
> >          VMSTATE_END_OF_LIST()
> > +    },
> > +    .subsections = (const VMStateDescription * []) {
> > +        &vmstate_ioh3420_deffered_unplug,
> > +        NULL
> >      }
> >  };
> >
> > diff --git a/hw/pci-bridge/xio3130_downstream.c b/hw/pci-bridge/xio3130_downstream.c
> > index a9f084b863..a5b4fe08ee 100644
> > --- a/hw/pci-bridge/xio3130_downstream.c
> > +++ b/hw/pci-bridge/xio3130_downstream.c
> > @@ -139,6 +139,9 @@ static Property xio3130_downstream_props[] = {
> >      DEFINE_PROP_END_OF_LIST()
> >  };
> >
> > +static const VMStateDescription vmstate_xio3130_downstream_deffered_unplug =
> > +    VMSTATE_DEFFERED_UNPLUG("xio3130-express-downstream-port");
> > +
> >  static const VMStateDescription vmstate_xio3130_downstream = {
> >      .name = "xio3130-express-downstream-port",
> >      .priority = MIG_PRI_PCI_BUS,
> > @@ -150,6 +153,10 @@ static const VMStateDescription vmstate_xio3130_downstream = {
> >          VMSTATE_STRUCT(parent_obj.parent_obj.parent_obj.exp.aer_log,
> >                         PCIESlot, 0, vmstate_pcie_aer_log, PCIEAERLog),
> >          VMSTATE_END_OF_LIST()
> > +    },
> > +    .subsections = (const VMStateDescription * []) {
> > +        &vmstate_xio3130_downstream_deffered_unplug,
> > +        NULL
> >      }
> >  };
> >
> > diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
> > index 08718188bb..29f0e5c05b 100644
> > --- a/hw/pci/pcie.c
> > +++ b/hw/pci/pcie.c
> > @@ -28,6 +28,8 @@
> >  #include "hw/pci/pcie_regs.h"
> >  #include "hw/pci/pcie_port.h"
> >  #include "qemu/range.h"
> > +#include "sysemu/sysemu.h"
> > +#include "migration/misc.h"
> >
> >  //#define DEBUG_PCIE
> >  #ifdef DEBUG_PCIE
> > @@ -575,6 +577,7 @@ void pcie_cap_slot_reset(PCIDevice *dev)
> >
> >      if (dev->cap_present & QEMU_PCIE_SLTCAP_PCP) {
> >          /* Downstream ports enforce device number 0. */
> > +        PCIESlot *slot = PCIE_SLOT(dev);
> >          bool populated = pci_bridge_get_sec_bus(PCI_BRIDGE(dev))->devices[0];
> >          uint16_t pic;
> >
> > @@ -588,6 +591,7 @@ void pcie_cap_slot_reset(PCIDevice *dev)
> >
> >          pic = populated ? PCI_EXP_SLTCTL_PIC_ON : PCI_EXP_SLTCTL_PIC_OFF;
> >          pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTCTL, pic);
> > +        slot->unplug_is_deferred = false;
> >      }
> >
> >      pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA,
> > @@ -608,13 +612,42 @@ void pcie_cap_slot_get(PCIDevice *dev, uint16_t *slt_ctl, uint16_t *slt_sta)
> >      *slt_sta = pci_get_word(exp_cap + PCI_EXP_SLTSTA);
> >  }
> >
> > +static void pcie_cap_slot_unplug(PCIDevice *dev)
> > +{
> > +    uint32_t pos = dev->exp.exp_cap;
> > +    uint8_t *exp_cap = dev->config + pos;
> > +    PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev));
> > +
> > +    pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
> > +                        pcie_unplug_device, NULL);
> > +    pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDS);
> > +    if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) {
> > +        pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA,
> > +                                     PCI_EXP_LNKSTA_DLLLA);
> > +    }
> > +    pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDC);
> > +    hotplug_event_notify(dev);
> > +}
> > +
> > +void pcie_cap_slot_deferred_unplug(PCIDevice *dev)
> > +{
> > +    PCIESlot *slot = PCIE_SLOT(dev);
> > +
> > +    if (migration_is_idle() && slot->unplug_is_deferred) {
> > +        pcie_cap_slot_unplug(dev);
> > +        slot->unplug_is_deferred = false;
> > +    }
> > +}
> > +
> >  void pcie_cap_slot_write_config(PCIDevice *dev,
> >                                  uint16_t old_slt_ctl, uint16_t old_slt_sta,
> >                                  uint32_t addr, uint32_t val, int len)
> >  {
> > +    PCIESlot *slot = PCIE_SLOT(dev);
> >      uint32_t pos = dev->exp.exp_cap;
> >      uint8_t *exp_cap = dev->config + pos;
> >      uint16_t sltsta = pci_get_word(exp_cap + PCI_EXP_SLTSTA);
> > +    bool may_unplug;
> >
> >      if (ranges_overlap(addr, len, pos + PCI_EXP_SLTSTA, 2)) {
> >          /*
> > @@ -660,22 +693,17 @@ void pcie_cap_slot_write_config(PCIDevice *dev,
> >       * this is a work around for guests that overwrite
> >       * control of powered off slots before powering them on.
> >       */
> > -    if ((sltsta & PCI_EXP_SLTSTA_PDS) && (val & PCI_EXP_SLTCTL_PCC) &&
> > -        (val & PCI_EXP_SLTCTL_PIC_OFF) == PCI_EXP_SLTCTL_PIC_OFF &&
> > +    may_unplug = (val & PCI_EXP_SLTCTL_PCC) &&
> > +                 (val & PCI_EXP_SLTCTL_PIC_OFF) == PCI_EXP_SLTCTL_PIC_OFF;
> > +    if (may_unplug && (sltsta & PCI_EXP_SLTSTA_PDS) &&
> >          (!(old_slt_ctl & PCI_EXP_SLTCTL_PCC) ||
> >          (old_slt_ctl & PCI_EXP_SLTCTL_PIC_OFF) != PCI_EXP_SLTCTL_PIC_OFF)) {
> > -        PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev));
> > -        pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
> > -                            pcie_unplug_device, NULL);
> > -
> > -        pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA,
> > -                                     PCI_EXP_SLTSTA_PDS);
> > -        if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) {
> > -            pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA,
> > -                                         PCI_EXP_LNKSTA_DLLLA);
> > +        slot->unplug_is_deferred = !migration_is_idle();
> > +        if (!slot->unplug_is_deferred) {
> > +            pcie_cap_slot_unplug(dev);
> >          }
> > -        pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA,
> > -                                       PCI_EXP_SLTSTA_PDC);
> > +    } else if (!may_unplug) {
> > +        slot->unplug_is_deferred = false;
> >      }
> >
> >      hotplug_event_notify(dev);
> > diff --git a/hw/pci/pcie_port.c b/hw/pci/pcie_port.c
> > index c19a9be592..bd5fbf6827 100644
> > --- a/hw/pci/pcie_port.c
> > +++ b/hw/pci/pcie_port.c
> > @@ -23,6 +23,9 @@
> >  #include "hw/qdev-properties.h"
> >  #include "qemu/module.h"
> >  #include "hw/hotplug.h"
> > +#include "sysemu/runstate.h"
> > +#include "migration/migration.h"
> > +#include "migration/misc.h"
> >
> >  void pcie_port_init_reg(PCIDevice *d)
> >  {
> > @@ -150,6 +153,48 @@ static Property pcie_slot_props[] = {
> >      DEFINE_PROP_END_OF_LIST()
> >  };
> >
> > +bool vmstate_deffered_unplug_needed(void *opaque)
> > +{
> > +    PCIESlot *slot = opaque;
> > +
> > +    return slot->unplug_is_deferred;
> > +}
> > +
> > +static void pcie_slot_migration_notifier_cb(Notifier *notifier, void *data)
> > +{
> > +    PCIESlot *slot = container_of(notifier, PCIESlot, migration_notifier);
> > +
> > +    pcie_cap_slot_deferred_unplug(PCI_DEVICE(slot));
> > +}
> > +
> > +static void pcie_slot_vm_state_change(void *opaque, int running, RunState state)
> > +{
> > +    PCIESlot *slot = opaque;
> > +
> > +    pcie_cap_slot_deferred_unplug(PCI_DEVICE(slot));
> > +}
> > +
> > +static void pcie_slot_init(Object *obj)
> > +{
> > +    PCIESlot *slot = PCIE_SLOT(obj);
> > +
> > +    slot->unplug_is_deferred = false;
> > +    slot->migration_notifier = (Notifier) {
> > +        .notify = pcie_slot_migration_notifier_cb
> > +    };
> > +    add_migration_state_change_notifier(&slot->migration_notifier);
> > +    slot->vmstate_change =
> > +        qemu_add_vm_change_state_handler(pcie_slot_vm_state_change, slot);
> > +}
> > +
> > +static void pcie_slot_finalize(Object *obj)
> > +{
> > +    PCIESlot *slot = PCIE_SLOT(obj);
> > +
> > +    remove_migration_state_change_notifier(&slot->migration_notifier);
> > +    qemu_del_vm_change_state_handler(slot->vmstate_change);
> > +}
> > +
> >  static void pcie_slot_class_init(ObjectClass *oc, void *data)
> >  {
> >      DeviceClass *dc = DEVICE_CLASS(oc);
> > @@ -166,6 +211,8 @@ static const TypeInfo pcie_slot_type_info = {
> >      .name = TYPE_PCIE_SLOT,
> >      .parent = TYPE_PCIE_PORT,
> >      .instance_size = sizeof(PCIESlot),
> > +    .instance_init = pcie_slot_init,
> > +    .instance_finalize = pcie_slot_finalize,
> >      .abstract = true,
> >      .class_init = pcie_slot_class_init,
> >      .interfaces = (InterfaceInfo[]) {
> > diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
> > index 7064875835..128f26199e 100644
> > --- a/include/hw/pci/pcie.h
> > +++ b/include/hw/pci/pcie.h
> > @@ -110,6 +110,7 @@ void pcie_cap_slot_get(PCIDevice *dev, uint16_t *slt_ctl, uint16_t *slt_sta);
> >  void pcie_cap_slot_write_config(PCIDevice *dev,
> >                                  uint16_t old_slt_ctl, uint16_t old_slt_sta,
> >                                  uint32_t addr, uint32_t val, int len);
> > +void pcie_cap_slot_deferred_unplug(PCIDevice *dev);
> >  int pcie_cap_slot_post_load(void *opaque, int version_id);
> >  void pcie_cap_slot_push_attention_button(PCIDevice *dev);
> >
> > diff --git a/include/hw/pci/pcie_port.h b/include/hw/pci/pcie_port.h
> > index 7515430087..32e45f0c89 100644
> > --- a/include/hw/pci/pcie_port.h
> > +++ b/include/hw/pci/pcie_port.h
> > @@ -23,6 +23,9 @@
> >
> >  #include "hw/pci/pci_bridge.h"
> >  #include "hw/pci/pci_bus.h"
> > +#include "qemu/notify.h"
> > +#include "sysemu/sysemu.h"
> > +#include "migration/vmstate.h"
> >
> >  #define TYPE_PCIE_PORT "pcie-port"
> >  #define PCIE_PORT(obj) OBJECT_CHECK(PCIEPort, (obj), TYPE_PCIE_PORT)
> > @@ -44,6 +47,10 @@ void pcie_port_init_reg(PCIDevice *d);
> >  struct PCIESlot {
> >      /*< private >*/
> >      PCIEPort    parent_obj;
> > +    bool        unplug_is_deferred;
> > +    Notifier    migration_notifier;
> > +    VMChangeStateEntry *vmstate_change;
> > +
> >      /*< public >*/
> >
> >      /* pci express switch port with slot */
> > @@ -58,6 +65,19 @@ struct PCIESlot {
> >      QLIST_ENTRY(PCIESlot) next;
> >  };
> >
> > +bool vmstate_deffered_unplug_needed(void *opaque);
> > +
> > +#define VMSTATE_DEFFERED_UNPLUG(parent_section_name) {            \
> > +    .name = parent_section_name "/deffered-unplug",               \
> > +    .version_id = 1,                                              \
> > +    .minimum_version_id = 1,                                      \
> > +    .needed = vmstate_deffered_unplug_needed,                     \
> > +    .fields = (VMStateField[]) {                                  \
> > +        VMSTATE_BOOL(unplug_is_deferred, PCIESlot),               \
> > +        VMSTATE_END_OF_LIST()                                     \
> > +    }                                                             \
> > +}
> > +
> >  void pcie_chassis_create(uint8_t chassis_number);
> >  PCIESlot *pcie_chassis_find_slot(uint8_t chassis, uint16_t slot);
> >  int pcie_chassis_add_slot(struct PCIESlot *slot);
> > --
> > 2.24.1
> >
> >
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Julia Suvorova Jan. 27, 2020, 3:12 p.m. UTC | #3
On Fri, Jan 17, 2020 at 5:56 PM Dr. David Alan Gilbert
<dgilbert@redhat.com> wrote:
>
> * Julia Suvorova (jusual@redhat.com) wrote:
> > On Mon, Jan 13, 2020 at 3:04 PM Yury Kotov <yury-kotov@yandex-team.ru> wrote:
> > >
> > > Devices hot-plug during migration is not allowed and disabled in
> > > corresponding QMP-commands (device_add, device_del).
> > > But guest still can unplug device by powering it off
> > > (Example: echo 0 > /sys/bus/pci/slots/XXX/power).
> >
> > You don't want to unplug device due to powering the slot off in the
> > first place. Instead, you can hide the device (see f3a8505656), and
> > make it visible again when the slot is powered on. Thus, the guest
> > will not be able to unplug the device and your problem will disappear.
> >
> > Best regards, Julia Suvorova.
>
> I don't really understand how hidden a hidden device is;
> will it still get it's migration data migrated?
>   - if so what state is it in? What gets migrated?
> is the behaviour after reset/reboot consistent with what we want
> in the existing unplug behaviour?
> Say it's a disk device; will it release the lock on the disk backend?
>
> It feels like the semantics need to be tied down a bit.

Yes. the concept is still raw, and patches presented don't provide
functionality we need. I'm working on it now.

Best regards, Julia Suvorova.

> > > Fix it by deferring unplugging until the migration is complete.
> > >
> > > Signed-off-by: Yury Kotov <yury-kotov@yandex-team.ru>
> > > ---
> > >  hw/pci-bridge/gen_pcie_root_port.c |  7 ++++
> > >  hw/pci-bridge/ioh3420.c            |  7 ++++
> > >  hw/pci-bridge/xio3130_downstream.c |  7 ++++
> > >  hw/pci/pcie.c                      | 54 +++++++++++++++++++++++-------
> > >  hw/pci/pcie_port.c                 | 47 ++++++++++++++++++++++++++
> > >  include/hw/pci/pcie.h              |  1 +
> > >  include/hw/pci/pcie_port.h         | 20 +++++++++++
> > >  7 files changed, 130 insertions(+), 13 deletions(-)
> > >
> > > diff --git a/hw/pci-bridge/gen_pcie_root_port.c b/hw/pci-bridge/gen_pcie_root_port.c
> > > index 9eaefebca8..5b3c202341 100644
> > > --- a/hw/pci-bridge/gen_pcie_root_port.c
> > > +++ b/hw/pci-bridge/gen_pcie_root_port.c
> > > @@ -100,6 +100,9 @@ static void gen_rp_realize(DeviceState *dev, Error **errp)
> > >      }
> > >  }
> > >
> > > +static const VMStateDescription vmstate_rp_deffered_unplug =
> > > +    VMSTATE_DEFFERED_UNPLUG("pcie-root-port");
> > > +
> > >  static const VMStateDescription vmstate_rp_dev = {
> > >      .name = "pcie-root-port",
> > >      .priority = MIG_PRI_PCI_BUS,
> > > @@ -114,6 +117,10 @@ static const VMStateDescription vmstate_rp_dev = {
> > >                            GenPCIERootPort,
> > >                            gen_rp_test_migrate_msix),
> > >          VMSTATE_END_OF_LIST()
> > > +    },
> > > +    .subsections = (const VMStateDescription * []) {
> > > +        &vmstate_rp_deffered_unplug,
> > > +        NULL
> > >      }
> > >  };
> > >
> > > diff --git a/hw/pci-bridge/ioh3420.c b/hw/pci-bridge/ioh3420.c
> > > index f1e16135a3..2399a9a87f 100644
> > > --- a/hw/pci-bridge/ioh3420.c
> > > +++ b/hw/pci-bridge/ioh3420.c
> > > @@ -82,6 +82,9 @@ static void ioh3420_interrupts_uninit(PCIDevice *d)
> > >      msi_uninit(d);
> > >  }
> > >
> > > +static const VMStateDescription vmstate_ioh3420_deffered_unplug =
> > > +    VMSTATE_DEFFERED_UNPLUG("ioh-3240-express-root-port");
> > > +
> > >  static const VMStateDescription vmstate_ioh3420 = {
> > >      .name = "ioh-3240-express-root-port",
> > >      .priority = MIG_PRI_PCI_BUS,
> > > @@ -93,6 +96,10 @@ static const VMStateDescription vmstate_ioh3420 = {
> > >          VMSTATE_STRUCT(parent_obj.parent_obj.parent_obj.exp.aer_log,
> > >                         PCIESlot, 0, vmstate_pcie_aer_log, PCIEAERLog),
> > >          VMSTATE_END_OF_LIST()
> > > +    },
> > > +    .subsections = (const VMStateDescription * []) {
> > > +        &vmstate_ioh3420_deffered_unplug,
> > > +        NULL
> > >      }
> > >  };
> > >
> > > diff --git a/hw/pci-bridge/xio3130_downstream.c b/hw/pci-bridge/xio3130_downstream.c
> > > index a9f084b863..a5b4fe08ee 100644
> > > --- a/hw/pci-bridge/xio3130_downstream.c
> > > +++ b/hw/pci-bridge/xio3130_downstream.c
> > > @@ -139,6 +139,9 @@ static Property xio3130_downstream_props[] = {
> > >      DEFINE_PROP_END_OF_LIST()
> > >  };
> > >
> > > +static const VMStateDescription vmstate_xio3130_downstream_deffered_unplug =
> > > +    VMSTATE_DEFFERED_UNPLUG("xio3130-express-downstream-port");
> > > +
> > >  static const VMStateDescription vmstate_xio3130_downstream = {
> > >      .name = "xio3130-express-downstream-port",
> > >      .priority = MIG_PRI_PCI_BUS,
> > > @@ -150,6 +153,10 @@ static const VMStateDescription vmstate_xio3130_downstream = {
> > >          VMSTATE_STRUCT(parent_obj.parent_obj.parent_obj.exp.aer_log,
> > >                         PCIESlot, 0, vmstate_pcie_aer_log, PCIEAERLog),
> > >          VMSTATE_END_OF_LIST()
> > > +    },
> > > +    .subsections = (const VMStateDescription * []) {
> > > +        &vmstate_xio3130_downstream_deffered_unplug,
> > > +        NULL
> > >      }
> > >  };
> > >
> > > diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
> > > index 08718188bb..29f0e5c05b 100644
> > > --- a/hw/pci/pcie.c
> > > +++ b/hw/pci/pcie.c
> > > @@ -28,6 +28,8 @@
> > >  #include "hw/pci/pcie_regs.h"
> > >  #include "hw/pci/pcie_port.h"
> > >  #include "qemu/range.h"
> > > +#include "sysemu/sysemu.h"
> > > +#include "migration/misc.h"
> > >
> > >  //#define DEBUG_PCIE
> > >  #ifdef DEBUG_PCIE
> > > @@ -575,6 +577,7 @@ void pcie_cap_slot_reset(PCIDevice *dev)
> > >
> > >      if (dev->cap_present & QEMU_PCIE_SLTCAP_PCP) {
> > >          /* Downstream ports enforce device number 0. */
> > > +        PCIESlot *slot = PCIE_SLOT(dev);
> > >          bool populated = pci_bridge_get_sec_bus(PCI_BRIDGE(dev))->devices[0];
> > >          uint16_t pic;
> > >
> > > @@ -588,6 +591,7 @@ void pcie_cap_slot_reset(PCIDevice *dev)
> > >
> > >          pic = populated ? PCI_EXP_SLTCTL_PIC_ON : PCI_EXP_SLTCTL_PIC_OFF;
> > >          pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTCTL, pic);
> > > +        slot->unplug_is_deferred = false;
> > >      }
> > >
> > >      pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA,
> > > @@ -608,13 +612,42 @@ void pcie_cap_slot_get(PCIDevice *dev, uint16_t *slt_ctl, uint16_t *slt_sta)
> > >      *slt_sta = pci_get_word(exp_cap + PCI_EXP_SLTSTA);
> > >  }
> > >
> > > +static void pcie_cap_slot_unplug(PCIDevice *dev)
> > > +{
> > > +    uint32_t pos = dev->exp.exp_cap;
> > > +    uint8_t *exp_cap = dev->config + pos;
> > > +    PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev));
> > > +
> > > +    pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
> > > +                        pcie_unplug_device, NULL);
> > > +    pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDS);
> > > +    if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) {
> > > +        pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA,
> > > +                                     PCI_EXP_LNKSTA_DLLLA);
> > > +    }
> > > +    pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDC);
> > > +    hotplug_event_notify(dev);
> > > +}
> > > +
> > > +void pcie_cap_slot_deferred_unplug(PCIDevice *dev)
> > > +{
> > > +    PCIESlot *slot = PCIE_SLOT(dev);
> > > +
> > > +    if (migration_is_idle() && slot->unplug_is_deferred) {
> > > +        pcie_cap_slot_unplug(dev);
> > > +        slot->unplug_is_deferred = false;
> > > +    }
> > > +}
> > > +
> > >  void pcie_cap_slot_write_config(PCIDevice *dev,
> > >                                  uint16_t old_slt_ctl, uint16_t old_slt_sta,
> > >                                  uint32_t addr, uint32_t val, int len)
> > >  {
> > > +    PCIESlot *slot = PCIE_SLOT(dev);
> > >      uint32_t pos = dev->exp.exp_cap;
> > >      uint8_t *exp_cap = dev->config + pos;
> > >      uint16_t sltsta = pci_get_word(exp_cap + PCI_EXP_SLTSTA);
> > > +    bool may_unplug;
> > >
> > >      if (ranges_overlap(addr, len, pos + PCI_EXP_SLTSTA, 2)) {
> > >          /*
> > > @@ -660,22 +693,17 @@ void pcie_cap_slot_write_config(PCIDevice *dev,
> > >       * this is a work around for guests that overwrite
> > >       * control of powered off slots before powering them on.
> > >       */
> > > -    if ((sltsta & PCI_EXP_SLTSTA_PDS) && (val & PCI_EXP_SLTCTL_PCC) &&
> > > -        (val & PCI_EXP_SLTCTL_PIC_OFF) == PCI_EXP_SLTCTL_PIC_OFF &&
> > > +    may_unplug = (val & PCI_EXP_SLTCTL_PCC) &&
> > > +                 (val & PCI_EXP_SLTCTL_PIC_OFF) == PCI_EXP_SLTCTL_PIC_OFF;
> > > +    if (may_unplug && (sltsta & PCI_EXP_SLTSTA_PDS) &&
> > >          (!(old_slt_ctl & PCI_EXP_SLTCTL_PCC) ||
> > >          (old_slt_ctl & PCI_EXP_SLTCTL_PIC_OFF) != PCI_EXP_SLTCTL_PIC_OFF)) {
> > > -        PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev));
> > > -        pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
> > > -                            pcie_unplug_device, NULL);
> > > -
> > > -        pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA,
> > > -                                     PCI_EXP_SLTSTA_PDS);
> > > -        if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) {
> > > -            pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA,
> > > -                                         PCI_EXP_LNKSTA_DLLLA);
> > > +        slot->unplug_is_deferred = !migration_is_idle();
> > > +        if (!slot->unplug_is_deferred) {
> > > +            pcie_cap_slot_unplug(dev);
> > >          }
> > > -        pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA,
> > > -                                       PCI_EXP_SLTSTA_PDC);
> > > +    } else if (!may_unplug) {
> > > +        slot->unplug_is_deferred = false;
> > >      }
> > >
> > >      hotplug_event_notify(dev);
> > > diff --git a/hw/pci/pcie_port.c b/hw/pci/pcie_port.c
> > > index c19a9be592..bd5fbf6827 100644
> > > --- a/hw/pci/pcie_port.c
> > > +++ b/hw/pci/pcie_port.c
> > > @@ -23,6 +23,9 @@
> > >  #include "hw/qdev-properties.h"
> > >  #include "qemu/module.h"
> > >  #include "hw/hotplug.h"
> > > +#include "sysemu/runstate.h"
> > > +#include "migration/migration.h"
> > > +#include "migration/misc.h"
> > >
> > >  void pcie_port_init_reg(PCIDevice *d)
> > >  {
> > > @@ -150,6 +153,48 @@ static Property pcie_slot_props[] = {
> > >      DEFINE_PROP_END_OF_LIST()
> > >  };
> > >
> > > +bool vmstate_deffered_unplug_needed(void *opaque)
> > > +{
> > > +    PCIESlot *slot = opaque;
> > > +
> > > +    return slot->unplug_is_deferred;
> > > +}
> > > +
> > > +static void pcie_slot_migration_notifier_cb(Notifier *notifier, void *data)
> > > +{
> > > +    PCIESlot *slot = container_of(notifier, PCIESlot, migration_notifier);
> > > +
> > > +    pcie_cap_slot_deferred_unplug(PCI_DEVICE(slot));
> > > +}
> > > +
> > > +static void pcie_slot_vm_state_change(void *opaque, int running, RunState state)
> > > +{
> > > +    PCIESlot *slot = opaque;
> > > +
> > > +    pcie_cap_slot_deferred_unplug(PCI_DEVICE(slot));
> > > +}
> > > +
> > > +static void pcie_slot_init(Object *obj)
> > > +{
> > > +    PCIESlot *slot = PCIE_SLOT(obj);
> > > +
> > > +    slot->unplug_is_deferred = false;
> > > +    slot->migration_notifier = (Notifier) {
> > > +        .notify = pcie_slot_migration_notifier_cb
> > > +    };
> > > +    add_migration_state_change_notifier(&slot->migration_notifier);
> > > +    slot->vmstate_change =
> > > +        qemu_add_vm_change_state_handler(pcie_slot_vm_state_change, slot);
> > > +}
> > > +
> > > +static void pcie_slot_finalize(Object *obj)
> > > +{
> > > +    PCIESlot *slot = PCIE_SLOT(obj);
> > > +
> > > +    remove_migration_state_change_notifier(&slot->migration_notifier);
> > > +    qemu_del_vm_change_state_handler(slot->vmstate_change);
> > > +}
> > > +
> > >  static void pcie_slot_class_init(ObjectClass *oc, void *data)
> > >  {
> > >      DeviceClass *dc = DEVICE_CLASS(oc);
> > > @@ -166,6 +211,8 @@ static const TypeInfo pcie_slot_type_info = {
> > >      .name = TYPE_PCIE_SLOT,
> > >      .parent = TYPE_PCIE_PORT,
> > >      .instance_size = sizeof(PCIESlot),
> > > +    .instance_init = pcie_slot_init,
> > > +    .instance_finalize = pcie_slot_finalize,
> > >      .abstract = true,
> > >      .class_init = pcie_slot_class_init,
> > >      .interfaces = (InterfaceInfo[]) {
> > > diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
> > > index 7064875835..128f26199e 100644
> > > --- a/include/hw/pci/pcie.h
> > > +++ b/include/hw/pci/pcie.h
> > > @@ -110,6 +110,7 @@ void pcie_cap_slot_get(PCIDevice *dev, uint16_t *slt_ctl, uint16_t *slt_sta);
> > >  void pcie_cap_slot_write_config(PCIDevice *dev,
> > >                                  uint16_t old_slt_ctl, uint16_t old_slt_sta,
> > >                                  uint32_t addr, uint32_t val, int len);
> > > +void pcie_cap_slot_deferred_unplug(PCIDevice *dev);
> > >  int pcie_cap_slot_post_load(void *opaque, int version_id);
> > >  void pcie_cap_slot_push_attention_button(PCIDevice *dev);
> > >
> > > diff --git a/include/hw/pci/pcie_port.h b/include/hw/pci/pcie_port.h
> > > index 7515430087..32e45f0c89 100644
> > > --- a/include/hw/pci/pcie_port.h
> > > +++ b/include/hw/pci/pcie_port.h
> > > @@ -23,6 +23,9 @@
> > >
> > >  #include "hw/pci/pci_bridge.h"
> > >  #include "hw/pci/pci_bus.h"
> > > +#include "qemu/notify.h"
> > > +#include "sysemu/sysemu.h"
> > > +#include "migration/vmstate.h"
> > >
> > >  #define TYPE_PCIE_PORT "pcie-port"
> > >  #define PCIE_PORT(obj) OBJECT_CHECK(PCIEPort, (obj), TYPE_PCIE_PORT)
> > > @@ -44,6 +47,10 @@ void pcie_port_init_reg(PCIDevice *d);
> > >  struct PCIESlot {
> > >      /*< private >*/
> > >      PCIEPort    parent_obj;
> > > +    bool        unplug_is_deferred;
> > > +    Notifier    migration_notifier;
> > > +    VMChangeStateEntry *vmstate_change;
> > > +
> > >      /*< public >*/
> > >
> > >      /* pci express switch port with slot */
> > > @@ -58,6 +65,19 @@ struct PCIESlot {
> > >      QLIST_ENTRY(PCIESlot) next;
> > >  };
> > >
> > > +bool vmstate_deffered_unplug_needed(void *opaque);
> > > +
> > > +#define VMSTATE_DEFFERED_UNPLUG(parent_section_name) {            \
> > > +    .name = parent_section_name "/deffered-unplug",               \
> > > +    .version_id = 1,                                              \
> > > +    .minimum_version_id = 1,                                      \
> > > +    .needed = vmstate_deffered_unplug_needed,                     \
> > > +    .fields = (VMStateField[]) {                                  \
> > > +        VMSTATE_BOOL(unplug_is_deferred, PCIESlot),               \
> > > +        VMSTATE_END_OF_LIST()                                     \
> > > +    }                                                             \
> > > +}
> > > +
> > >  void pcie_chassis_create(uint8_t chassis_number);
> > >  PCIESlot *pcie_chassis_find_slot(uint8_t chassis, uint16_t slot);
> > >  int pcie_chassis_add_slot(struct PCIESlot *slot);
> > > --
> > > 2.24.1
> > >
> > >
> >
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>
diff mbox series

Patch

diff --git a/hw/pci-bridge/gen_pcie_root_port.c b/hw/pci-bridge/gen_pcie_root_port.c
index 9eaefebca8..5b3c202341 100644
--- a/hw/pci-bridge/gen_pcie_root_port.c
+++ b/hw/pci-bridge/gen_pcie_root_port.c
@@ -100,6 +100,9 @@  static void gen_rp_realize(DeviceState *dev, Error **errp)
     }
 }
 
+static const VMStateDescription vmstate_rp_deffered_unplug =
+    VMSTATE_DEFFERED_UNPLUG("pcie-root-port");
+
 static const VMStateDescription vmstate_rp_dev = {
     .name = "pcie-root-port",
     .priority = MIG_PRI_PCI_BUS,
@@ -114,6 +117,10 @@  static const VMStateDescription vmstate_rp_dev = {
                           GenPCIERootPort,
                           gen_rp_test_migrate_msix),
         VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription * []) {
+        &vmstate_rp_deffered_unplug,
+        NULL
     }
 };
 
diff --git a/hw/pci-bridge/ioh3420.c b/hw/pci-bridge/ioh3420.c
index f1e16135a3..2399a9a87f 100644
--- a/hw/pci-bridge/ioh3420.c
+++ b/hw/pci-bridge/ioh3420.c
@@ -82,6 +82,9 @@  static void ioh3420_interrupts_uninit(PCIDevice *d)
     msi_uninit(d);
 }
 
+static const VMStateDescription vmstate_ioh3420_deffered_unplug =
+    VMSTATE_DEFFERED_UNPLUG("ioh-3240-express-root-port");
+
 static const VMStateDescription vmstate_ioh3420 = {
     .name = "ioh-3240-express-root-port",
     .priority = MIG_PRI_PCI_BUS,
@@ -93,6 +96,10 @@  static const VMStateDescription vmstate_ioh3420 = {
         VMSTATE_STRUCT(parent_obj.parent_obj.parent_obj.exp.aer_log,
                        PCIESlot, 0, vmstate_pcie_aer_log, PCIEAERLog),
         VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription * []) {
+        &vmstate_ioh3420_deffered_unplug,
+        NULL
     }
 };
 
diff --git a/hw/pci-bridge/xio3130_downstream.c b/hw/pci-bridge/xio3130_downstream.c
index a9f084b863..a5b4fe08ee 100644
--- a/hw/pci-bridge/xio3130_downstream.c
+++ b/hw/pci-bridge/xio3130_downstream.c
@@ -139,6 +139,9 @@  static Property xio3130_downstream_props[] = {
     DEFINE_PROP_END_OF_LIST()
 };
 
+static const VMStateDescription vmstate_xio3130_downstream_deffered_unplug =
+    VMSTATE_DEFFERED_UNPLUG("xio3130-express-downstream-port");
+
 static const VMStateDescription vmstate_xio3130_downstream = {
     .name = "xio3130-express-downstream-port",
     .priority = MIG_PRI_PCI_BUS,
@@ -150,6 +153,10 @@  static const VMStateDescription vmstate_xio3130_downstream = {
         VMSTATE_STRUCT(parent_obj.parent_obj.parent_obj.exp.aer_log,
                        PCIESlot, 0, vmstate_pcie_aer_log, PCIEAERLog),
         VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription * []) {
+        &vmstate_xio3130_downstream_deffered_unplug,
+        NULL
     }
 };
 
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 08718188bb..29f0e5c05b 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -28,6 +28,8 @@ 
 #include "hw/pci/pcie_regs.h"
 #include "hw/pci/pcie_port.h"
 #include "qemu/range.h"
+#include "sysemu/sysemu.h"
+#include "migration/misc.h"
 
 //#define DEBUG_PCIE
 #ifdef DEBUG_PCIE
@@ -575,6 +577,7 @@  void pcie_cap_slot_reset(PCIDevice *dev)
 
     if (dev->cap_present & QEMU_PCIE_SLTCAP_PCP) {
         /* Downstream ports enforce device number 0. */
+        PCIESlot *slot = PCIE_SLOT(dev);
         bool populated = pci_bridge_get_sec_bus(PCI_BRIDGE(dev))->devices[0];
         uint16_t pic;
 
@@ -588,6 +591,7 @@  void pcie_cap_slot_reset(PCIDevice *dev)
 
         pic = populated ? PCI_EXP_SLTCTL_PIC_ON : PCI_EXP_SLTCTL_PIC_OFF;
         pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTCTL, pic);
+        slot->unplug_is_deferred = false;
     }
 
     pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA,
@@ -608,13 +612,42 @@  void pcie_cap_slot_get(PCIDevice *dev, uint16_t *slt_ctl, uint16_t *slt_sta)
     *slt_sta = pci_get_word(exp_cap + PCI_EXP_SLTSTA);
 }
 
+static void pcie_cap_slot_unplug(PCIDevice *dev)
+{
+    uint32_t pos = dev->exp.exp_cap;
+    uint8_t *exp_cap = dev->config + pos;
+    PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev));
+
+    pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
+                        pcie_unplug_device, NULL);
+    pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDS);
+    if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) {
+        pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA,
+                                     PCI_EXP_LNKSTA_DLLLA);
+    }
+    pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDC);
+    hotplug_event_notify(dev);
+}
+
+void pcie_cap_slot_deferred_unplug(PCIDevice *dev)
+{
+    PCIESlot *slot = PCIE_SLOT(dev);
+
+    if (migration_is_idle() && slot->unplug_is_deferred) {
+        pcie_cap_slot_unplug(dev);
+        slot->unplug_is_deferred = false;
+    }
+}
+
 void pcie_cap_slot_write_config(PCIDevice *dev,
                                 uint16_t old_slt_ctl, uint16_t old_slt_sta,
                                 uint32_t addr, uint32_t val, int len)
 {
+    PCIESlot *slot = PCIE_SLOT(dev);
     uint32_t pos = dev->exp.exp_cap;
     uint8_t *exp_cap = dev->config + pos;
     uint16_t sltsta = pci_get_word(exp_cap + PCI_EXP_SLTSTA);
+    bool may_unplug;
 
     if (ranges_overlap(addr, len, pos + PCI_EXP_SLTSTA, 2)) {
         /*
@@ -660,22 +693,17 @@  void pcie_cap_slot_write_config(PCIDevice *dev,
      * this is a work around for guests that overwrite
      * control of powered off slots before powering them on.
      */
-    if ((sltsta & PCI_EXP_SLTSTA_PDS) && (val & PCI_EXP_SLTCTL_PCC) &&
-        (val & PCI_EXP_SLTCTL_PIC_OFF) == PCI_EXP_SLTCTL_PIC_OFF &&
+    may_unplug = (val & PCI_EXP_SLTCTL_PCC) &&
+                 (val & PCI_EXP_SLTCTL_PIC_OFF) == PCI_EXP_SLTCTL_PIC_OFF;
+    if (may_unplug && (sltsta & PCI_EXP_SLTSTA_PDS) &&
         (!(old_slt_ctl & PCI_EXP_SLTCTL_PCC) ||
         (old_slt_ctl & PCI_EXP_SLTCTL_PIC_OFF) != PCI_EXP_SLTCTL_PIC_OFF)) {
-        PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev));
-        pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
-                            pcie_unplug_device, NULL);
-
-        pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA,
-                                     PCI_EXP_SLTSTA_PDS);
-        if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) {
-            pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA,
-                                         PCI_EXP_LNKSTA_DLLLA);
+        slot->unplug_is_deferred = !migration_is_idle();
+        if (!slot->unplug_is_deferred) {
+            pcie_cap_slot_unplug(dev);
         }
-        pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA,
-                                       PCI_EXP_SLTSTA_PDC);
+    } else if (!may_unplug) {
+        slot->unplug_is_deferred = false;
     }
 
     hotplug_event_notify(dev);
diff --git a/hw/pci/pcie_port.c b/hw/pci/pcie_port.c
index c19a9be592..bd5fbf6827 100644
--- a/hw/pci/pcie_port.c
+++ b/hw/pci/pcie_port.c
@@ -23,6 +23,9 @@ 
 #include "hw/qdev-properties.h"
 #include "qemu/module.h"
 #include "hw/hotplug.h"
+#include "sysemu/runstate.h"
+#include "migration/migration.h"
+#include "migration/misc.h"
 
 void pcie_port_init_reg(PCIDevice *d)
 {
@@ -150,6 +153,48 @@  static Property pcie_slot_props[] = {
     DEFINE_PROP_END_OF_LIST()
 };
 
+bool vmstate_deffered_unplug_needed(void *opaque)
+{
+    PCIESlot *slot = opaque;
+
+    return slot->unplug_is_deferred;
+}
+
+static void pcie_slot_migration_notifier_cb(Notifier *notifier, void *data)
+{
+    PCIESlot *slot = container_of(notifier, PCIESlot, migration_notifier);
+
+    pcie_cap_slot_deferred_unplug(PCI_DEVICE(slot));
+}
+
+static void pcie_slot_vm_state_change(void *opaque, int running, RunState state)
+{
+    PCIESlot *slot = opaque;
+
+    pcie_cap_slot_deferred_unplug(PCI_DEVICE(slot));
+}
+
+static void pcie_slot_init(Object *obj)
+{
+    PCIESlot *slot = PCIE_SLOT(obj);
+
+    slot->unplug_is_deferred = false;
+    slot->migration_notifier = (Notifier) {
+        .notify = pcie_slot_migration_notifier_cb
+    };
+    add_migration_state_change_notifier(&slot->migration_notifier);
+    slot->vmstate_change =
+        qemu_add_vm_change_state_handler(pcie_slot_vm_state_change, slot);
+}
+
+static void pcie_slot_finalize(Object *obj)
+{
+    PCIESlot *slot = PCIE_SLOT(obj);
+
+    remove_migration_state_change_notifier(&slot->migration_notifier);
+    qemu_del_vm_change_state_handler(slot->vmstate_change);
+}
+
 static void pcie_slot_class_init(ObjectClass *oc, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(oc);
@@ -166,6 +211,8 @@  static const TypeInfo pcie_slot_type_info = {
     .name = TYPE_PCIE_SLOT,
     .parent = TYPE_PCIE_PORT,
     .instance_size = sizeof(PCIESlot),
+    .instance_init = pcie_slot_init,
+    .instance_finalize = pcie_slot_finalize,
     .abstract = true,
     .class_init = pcie_slot_class_init,
     .interfaces = (InterfaceInfo[]) {
diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
index 7064875835..128f26199e 100644
--- a/include/hw/pci/pcie.h
+++ b/include/hw/pci/pcie.h
@@ -110,6 +110,7 @@  void pcie_cap_slot_get(PCIDevice *dev, uint16_t *slt_ctl, uint16_t *slt_sta);
 void pcie_cap_slot_write_config(PCIDevice *dev,
                                 uint16_t old_slt_ctl, uint16_t old_slt_sta,
                                 uint32_t addr, uint32_t val, int len);
+void pcie_cap_slot_deferred_unplug(PCIDevice *dev);
 int pcie_cap_slot_post_load(void *opaque, int version_id);
 void pcie_cap_slot_push_attention_button(PCIDevice *dev);
 
diff --git a/include/hw/pci/pcie_port.h b/include/hw/pci/pcie_port.h
index 7515430087..32e45f0c89 100644
--- a/include/hw/pci/pcie_port.h
+++ b/include/hw/pci/pcie_port.h
@@ -23,6 +23,9 @@ 
 
 #include "hw/pci/pci_bridge.h"
 #include "hw/pci/pci_bus.h"
+#include "qemu/notify.h"
+#include "sysemu/sysemu.h"
+#include "migration/vmstate.h"
 
 #define TYPE_PCIE_PORT "pcie-port"
 #define PCIE_PORT(obj) OBJECT_CHECK(PCIEPort, (obj), TYPE_PCIE_PORT)
@@ -44,6 +47,10 @@  void pcie_port_init_reg(PCIDevice *d);
 struct PCIESlot {
     /*< private >*/
     PCIEPort    parent_obj;
+    bool        unplug_is_deferred;
+    Notifier    migration_notifier;
+    VMChangeStateEntry *vmstate_change;
+
     /*< public >*/
 
     /* pci express switch port with slot */
@@ -58,6 +65,19 @@  struct PCIESlot {
     QLIST_ENTRY(PCIESlot) next;
 };
 
+bool vmstate_deffered_unplug_needed(void *opaque);
+
+#define VMSTATE_DEFFERED_UNPLUG(parent_section_name) {            \
+    .name = parent_section_name "/deffered-unplug",               \
+    .version_id = 1,                                              \
+    .minimum_version_id = 1,                                      \
+    .needed = vmstate_deffered_unplug_needed,                     \
+    .fields = (VMStateField[]) {                                  \
+        VMSTATE_BOOL(unplug_is_deferred, PCIESlot),               \
+        VMSTATE_END_OF_LIST()                                     \
+    }                                                             \
+}
+
 void pcie_chassis_create(uint8_t chassis_number);
 PCIESlot *pcie_chassis_find_slot(uint8_t chassis, uint16_t slot);
 int pcie_chassis_add_slot(struct PCIESlot *slot);