@@ -435,6 +435,74 @@ static void pci_msi_trigger(PCIDevice *dev, MSIMessage msg)
attrs, NULL);
}
+int pci_pm_init(PCIDevice *d, uint8_t offset, Error **errp)
+{
+ int cap = pci_add_capability(d, PCI_CAP_ID_PM, offset, PCI_PM_SIZEOF, errp);
+
+ if (cap < 0) {
+ return cap;
+ }
+
+ d->pm_cap = cap;
+ d->cap_present |= QEMU_PCI_CAP_PM;
+ pci_set_word(d->wmask + cap + PCI_PM_CTRL, PCI_PM_CTRL_STATE_MASK);
+
+ return cap;
+}
+
+static uint8_t pci_pm_state(PCIDevice *d)
+{
+ uint16_t pmcsr;
+
+ if (!(d->cap_present & QEMU_PCI_CAP_PM)) {
+ return 0;
+ }
+
+ pmcsr = pci_get_word(d->config + d->pm_cap + PCI_PM_CTRL);
+
+ return pmcsr & PCI_PM_CTRL_STATE_MASK;
+}
+
+static uint8_t pci_pm_update(PCIDevice *d, uint32_t addr, int l, uint8_t old)
+{
+ uint16_t pmc;
+ uint8_t new;
+
+ if (!(d->cap_present & QEMU_PCI_CAP_PM) ||
+ !range_covers_byte(addr, l, d->pm_cap + PCI_PM_CTRL)) {
+ return old;
+ }
+
+ new = pci_pm_state(d);
+ if (new == old) {
+ return old;
+ }
+
+ pmc = pci_get_word(d->config + d->pm_cap + PCI_PM_PMC);
+
+ /*
+ * Transitions to D1 & D2 are only allowed if supported. Devices may
+ * only transition to higher D-states or to D0. The write is dropped
+ * for rejected transitions by restoring the old state.
+ */
+ if ((!(pmc & PCI_PM_CAP_D1) && new == 1) ||
+ (!(pmc & PCI_PM_CAP_D2) && new == 2) ||
+ (old && new && new < old)) {
+ pci_word_test_and_clear_mask(d->config + d->pm_cap + PCI_PM_CTRL,
+ PCI_PM_CTRL_STATE_MASK);
+ pci_word_test_and_set_mask(d->config + d->pm_cap + PCI_PM_CTRL,
+ old);
+ trace_pci_pm_bad_transition(d->name, pci_dev_bus_num(d),
+ PCI_SLOT(d->devfn), PCI_FUNC(d->devfn),
+ old, new);
+ return old;
+ }
+
+ trace_pci_pm_transition(d->name, pci_dev_bus_num(d), PCI_SLOT(d->devfn),
+ PCI_FUNC(d->devfn), old, new);
+ return new;
+}
+
static void pci_reset_regions(PCIDevice *dev)
{
int r;
@@ -474,6 +542,11 @@ static void pci_do_device_reset(PCIDevice *dev)
pci_get_word(dev->wmask + PCI_INTERRUPT_LINE) |
pci_get_word(dev->w1cmask + PCI_INTERRUPT_LINE));
dev->config[PCI_CACHE_LINE_SIZE] = 0x0;
+ /* Default PM state is D0 */
+ if (dev->cap_present & QEMU_PCI_CAP_PM) {
+ pci_word_test_and_clear_mask(dev->config + dev->pm_cap + PCI_PM_CTRL,
+ PCI_PM_CTRL_STATE_MASK);
+ }
pci_reset_regions(dev);
pci_update_mappings(dev);
@@ -1598,7 +1671,7 @@ static void pci_update_mappings(PCIDevice *d)
continue;
new_addr = pci_bar_address(d, i, r->type, r->size);
- if (!d->enabled) {
+ if (!d->enabled || pci_pm_state(d)) {
new_addr = PCI_BAR_UNMAPPED;
}
@@ -1664,6 +1737,7 @@ uint32_t pci_default_read_config(PCIDevice *d,
void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int l)
{
+ uint8_t new_pm_state, old_pm_state = pci_pm_state(d);
int i, was_irq_disabled = pci_irq_disabled(d);
uint32_t val = val_in;
@@ -1676,11 +1750,16 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int
d->config[addr + i] = (d->config[addr + i] & ~wmask) | (val & wmask);
d->config[addr + i] &= ~(val & w1cmask); /* W1C: Write 1 to Clear */
}
+
+ new_pm_state = pci_pm_update(d, addr, l, old_pm_state);
+
if (ranges_overlap(addr, l, PCI_BASE_ADDRESS_0, 24) ||
ranges_overlap(addr, l, PCI_ROM_ADDRESS, 4) ||
ranges_overlap(addr, l, PCI_ROM_ADDRESS1, 4) ||
- range_covers_byte(addr, l, PCI_COMMAND))
+ range_covers_byte(addr, l, PCI_COMMAND) ||
+ !!new_pm_state != !!old_pm_state) {
pci_update_mappings(d);
+ }
if (ranges_overlap(addr, l, PCI_COMMAND, 2)) {
pci_update_irq_disabled(d, was_irq_disabled);
@@ -1,6 +1,8 @@
# See docs/devel/tracing.rst for syntax documentation.
# pci.c
+pci_pm_bad_transition(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, uint8_t old, uint8_t new) "%s %02x:%02x.%x bad PM transition D%d->D%d"
+pci_pm_transition(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, uint8_t old, uint8_t new) "%s %02x:%02x.%x PM transition D%d->D%d"
pci_update_mappings_del(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64
pci_update_mappings_add(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64
pci_route_irq(int dev_irq, const char *dev_path, int parent_irq, const char *parent_path) "IRQ %d @%s -> IRQ %d @%s"
@@ -216,6 +216,8 @@ enum {
QEMU_PCIE_ARI_NEXTFN_1 = (1 << QEMU_PCIE_ARI_NEXTFN_1_BITNR),
#define QEMU_PCIE_EXT_TAG_BITNR 13
QEMU_PCIE_EXT_TAG = (1 << QEMU_PCIE_EXT_TAG_BITNR),
+#define QEMU_PCI_CAP_PM_BITNR 14
+ QEMU_PCI_CAP_PM = (1 << QEMU_PCI_CAP_PM_BITNR),
};
typedef struct PCIINTxRoute {
@@ -676,5 +678,6 @@ static inline void pci_irq_deassert(PCIDevice *pci_dev)
MSIMessage pci_get_msi_message(PCIDevice *dev, int vector);
void pci_set_enabled(PCIDevice *pci_dev, bool state);
void pci_set_power(PCIDevice *pci_dev, bool state);
+int pci_pm_init(PCIDevice *pci_dev, uint8_t offset, Error **errp);
#endif
@@ -105,6 +105,9 @@ struct PCIDevice {
/* Capability bits */
uint32_t cap_present;
+ /* Offset of PM capability in config space */
+ uint8_t pm_cap;
+
/* Offset of MSI-X capability in config space */
uint8_t msix_cap;
The memory and IO BARs for devices are only accessible in the D0 power state. In other power states the PCI spec defines that the device should respond to TLPs and messages with an Unsupported Request response. The closest we can come to emulating this behavior is to consider the BARs as unmapped, removing them from the address space. Introduce an interface to register the PM capability such that the device power state is considered relative to the BAR mapping state. Cc: Michael S. Tsirkin <mst@redhat.com> Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com> Signed-off-by: Alex Williamson <alex.williamson@redhat.com> --- hw/pci/pci.c | 83 ++++++++++++++++++++++++++++++++++++- hw/pci/trace-events | 2 + include/hw/pci/pci.h | 3 ++ include/hw/pci/pci_device.h | 3 ++ 4 files changed, 89 insertions(+), 2 deletions(-)