@@ -889,10 +889,16 @@ void vpci_msix_arch_init_entry(struct vpci_msix_entry *entry)
entry->arch.pirq = INVALID_PIRQ;
}
-int vpci_msix_arch_print(const struct vpci_msix *msix)
+int vpci_msix_arch_print(const struct domain *d, const struct vpci_msix *msix)
{
unsigned int i;
+ /*
+ * FIXME: this is not immediately correct, as the lock can be grabbed
+ * by a different CPU. But this is better then nothing.
+ */
+ ASSERT(pcidevs_read_locked());
+
for ( i = 0; i < msix->max_entries; i++ )
{
const struct vpci_msix_entry *entry = &msix->entries[i];
@@ -909,11 +915,23 @@ int vpci_msix_arch_print(const struct vpci_msix *msix)
if ( i && !(i % 64) )
{
struct pci_dev *pdev = msix->pdev;
+ pci_sbdf_t sbdf = pdev->sbdf;
spin_unlock(&msix->pdev->vpci->lock);
+ pcidevs_read_unlock();
+
+ /* NB: we still hold rcu_read_lock(&domlist_read_lock); here. */
process_pending_softirqs();
- /* NB: we assume that pdev cannot go away for an alive domain. */
- if ( !pdev->vpci || !spin_trylock(&pdev->vpci->lock) )
+
+ if ( !pcidevs_read_trylock() )
+ return -EBUSY;
+ pdev = pci_get_pdev_by_domain(d, sbdf.seg, sbdf.bus, sbdf.devfn);
+ /*
+ * FIXME: we may find a re-allocated pdev's copy here.
+ * Even occupying the same address as before. Do our best.
+ */
+ if ( !pdev || (pdev != msix->pdev) || !pdev->vpci ||
+ !spin_trylock(&pdev->vpci->lock) )
return -EBUSY;
if ( pdev->vpci->msix != msix )
{
@@ -70,6 +70,26 @@ bool_t pcidevs_locked(void)
return !!spin_is_locked(&_pcidevs_lock) || pcidevs_write_locked();
}
+void pcidevs_read_lock(void)
+{
+ read_lock(&_pcidevs_rwlock);
+}
+
+int pcidevs_read_trylock(void)
+{
+ return read_trylock(&_pcidevs_rwlock);
+}
+
+void pcidevs_read_unlock(void)
+{
+ read_unlock(&_pcidevs_rwlock);
+}
+
+bool pcidevs_read_locked(void)
+{
+ return !!rw_is_locked(&_pcidevs_rwlock);
+}
+
void pcidevs_write_lock(void)
{
write_lock(&_pcidevs_rwlock);
@@ -142,16 +142,19 @@ bool vpci_process_pending(struct vcpu *v)
if ( rc == -ERESTART )
return true;
+ pcidevs_read_lock();
spin_lock(&v->vpci.pdev->vpci->lock);
/* Disable memory decoding unconditionally on failure. */
modify_decoding(v->vpci.pdev,
rc ? v->vpci.cmd & ~PCI_COMMAND_MEMORY : v->vpci.cmd,
!rc && v->vpci.rom_only);
spin_unlock(&v->vpci.pdev->vpci->lock);
+ pcidevs_read_unlock();
rangeset_destroy(v->vpci.mem);
v->vpci.mem = NULL;
if ( rc )
+ {
/*
* FIXME: in case of failure remove the device from the domain.
* Note that there might still be leftover mappings. While this is
@@ -159,7 +162,10 @@ bool vpci_process_pending(struct vcpu *v)
* killed in order to avoid leaking stale p2m mappings on
* failure.
*/
+ pcidevs_write_lock();
vpci_remove_device(v->vpci.pdev);
+ pcidevs_write_unlock();
+ }
}
return false;
@@ -172,7 +178,16 @@ static int __init apply_map(struct domain *d, const struct pci_dev *pdev,
int rc;
while ( (rc = rangeset_consume_ranges(mem, map_range, &data)) == -ERESTART )
+ {
+ /*
+ * It's safe to drop and re-acquire the lock in this context
+ * without risking pdev disappearing because devices cannot be
+ * removed until the initial domain has been started.
+ */
+ pcidevs_write_unlock();
process_pending_softirqs();
+ pcidevs_write_lock();
+ }
rangeset_destroy(mem);
if ( !rc )
modify_decoding(pdev, cmd, false);
@@ -450,10 +465,15 @@ static int init_bars(struct pci_dev *pdev)
uint16_t cmd;
uint64_t addr, size;
unsigned int i, num_bars, rom_reg;
- struct vpci_header *header = &pdev->vpci->header;
- struct vpci_bar *bars = header->bars;
+ struct vpci_header *header;
+ struct vpci_bar *bars;
int rc;
+ ASSERT(pcidevs_write_locked());
+
+ header = &pdev->vpci->header;
+ bars = header->bars;
+
switch ( pci_conf_read8(pdev->sbdf, PCI_HEADER_TYPE) & 0x7f )
{
case PCI_HEADER_TYPE_NORMAL:
@@ -184,12 +184,17 @@ static void mask_write(const struct pci_dev *pdev, unsigned int reg,
static int init_msi(struct pci_dev *pdev)
{
- uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
- unsigned int pos = pci_find_cap_offset(pdev->seg, pdev->bus, slot, func,
- PCI_CAP_ID_MSI);
+ uint8_t slot, func;
+ unsigned int pos;
uint16_t control;
int ret;
+ ASSERT(pcidevs_write_locked());
+
+ slot = PCI_SLOT(pdev->devfn);
+ func = PCI_FUNC(pdev->devfn);
+ pos = pci_find_cap_offset(pdev->seg, pdev->bus, slot, func, PCI_CAP_ID_MSI);
+
if ( !pos )
return 0;
@@ -277,6 +282,9 @@ void vpci_dump_msi(void)
printk("vPCI MSI/MSI-X d%d\n", d->domain_id);
+ if ( !pcidevs_read_trylock() )
+ continue;
+
for_each_pdev ( d, pdev )
{
const struct vpci_msi *msi;
@@ -310,7 +318,7 @@ void vpci_dump_msi(void)
printk(" entries: %u maskall: %d enabled: %d\n",
msix->max_entries, msix->masked, msix->enabled);
- rc = vpci_msix_arch_print(msix);
+ rc = vpci_msix_arch_print(d, msix);
if ( rc )
{
/*
@@ -318,12 +326,13 @@ void vpci_dump_msi(void)
* holding the lock.
*/
printk("unable to print all MSI-X entries: %d\n", rc);
- process_pending_softirqs();
- continue;
+ goto pdev_done;
}
}
spin_unlock(&pdev->vpci->lock);
+ pdev_done:
+ pcidevs_read_unlock();
process_pending_softirqs();
}
}
@@ -144,9 +144,13 @@ static struct vpci_msix *msix_find(const struct domain *d, unsigned long addr)
list_for_each_entry ( msix, &d->arch.hvm.msix_tables, next )
{
- const struct vpci_bar *bars = msix->pdev->vpci->header.bars;
+ const struct vpci_bar *bars;
unsigned int i;
+ if ( !msix->pdev->vpci )
+ continue;
+
+ bars = msix->pdev->vpci->header.bars;
for ( i = 0; i < ARRAY_SIZE(msix->tables); i++ )
if ( bars[msix->tables[i] & PCI_MSIX_BIRMASK].enabled &&
VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, i) )
@@ -158,7 +162,13 @@ static struct vpci_msix *msix_find(const struct domain *d, unsigned long addr)
static int msix_accept(struct vcpu *v, unsigned long addr)
{
- return !!msix_find(v->domain, addr);
+ int rc;
+
+ pcidevs_read_lock();
+ rc = !!msix_find(v->domain, addr);
+ pcidevs_read_unlock();
+
+ return rc;
}
static bool access_allowed(const struct pci_dev *pdev, unsigned long addr,
@@ -186,17 +196,26 @@ static int msix_read(struct vcpu *v, unsigned long addr, unsigned int len,
unsigned long *data)
{
const struct domain *d = v->domain;
- struct vpci_msix *msix = msix_find(d, addr);
+ struct vpci_msix *msix;
const struct vpci_msix_entry *entry;
unsigned int offset;
*data = ~0ul;
+ pcidevs_read_lock();
+
+ msix = msix_find(d, addr);
if ( !msix )
+ {
+ pcidevs_read_unlock();
return X86EMUL_RETRY;
+ }
if ( !access_allowed(msix->pdev, addr, len) )
+ {
+ pcidevs_read_unlock();
return X86EMUL_OKAY;
+ }
if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
{
@@ -255,6 +274,7 @@ static int msix_read(struct vcpu *v, unsigned long addr, unsigned int len,
break;
}
spin_unlock(&msix->pdev->vpci->lock);
+ pcidevs_read_unlock();
return X86EMUL_OKAY;
}
@@ -263,15 +283,24 @@ static int msix_write(struct vcpu *v, unsigned long addr, unsigned int len,
unsigned long data)
{
const struct domain *d = v->domain;
- struct vpci_msix *msix = msix_find(d, addr);
+ struct vpci_msix *msix;
struct vpci_msix_entry *entry;
unsigned int offset;
+ pcidevs_read_lock();
+
+ msix = msix_find(d, addr);
if ( !msix )
+ {
+ pcidevs_read_unlock();
return X86EMUL_RETRY;
+ }
if ( !access_allowed(msix->pdev, addr, len) )
+ {
+ pcidevs_read_unlock();
return X86EMUL_OKAY;
+ }
if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
{
@@ -294,6 +323,7 @@ static int msix_write(struct vcpu *v, unsigned long addr, unsigned int len,
}
}
+ pcidevs_read_unlock();
return X86EMUL_OKAY;
}
@@ -371,6 +401,7 @@ static int msix_write(struct vcpu *v, unsigned long addr, unsigned int len,
break;
}
spin_unlock(&msix->pdev->vpci->lock);
+ pcidevs_read_unlock();
return X86EMUL_OKAY;
}
@@ -383,9 +414,13 @@ static const struct hvm_mmio_ops vpci_msix_table_ops = {
int vpci_make_msix_hole(const struct pci_dev *pdev)
{
- struct domain *d = pdev->domain;
+ struct domain *d;
unsigned int i;
+ ASSERT(pcidevs_read_locked());
+
+ d = pdev->domain;
+
if ( !pdev->vpci->msix )
return 0;
@@ -430,13 +465,19 @@ int vpci_make_msix_hole(const struct pci_dev *pdev)
static int init_msix(struct pci_dev *pdev)
{
- struct domain *d = pdev->domain;
- uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
+ struct domain *d;
+ uint8_t slot, func;
unsigned int msix_offset, i, max_entries;
uint16_t control;
struct vpci_msix *msix;
int rc;
+ ASSERT(pcidevs_write_locked());
+
+ d = pdev->domain;
+ slot = PCI_SLOT(pdev->devfn);
+ func = PCI_FUNC(pdev->devfn);
+
msix_offset = pci_find_cap_offset(pdev->seg, pdev->bus, slot, func,
PCI_CAP_ID_MSIX);
if ( !msix_offset )
@@ -37,7 +37,9 @@ extern vpci_register_init_t *const __end_vpci_array[];
void vpci_remove_device(struct pci_dev *pdev)
{
- if ( !has_vpci(pdev->domain) )
+ ASSERT(pcidevs_write_locked());
+
+ if ( !has_vpci(pdev->domain) || !pdev->vpci )
return;
spin_lock(&pdev->vpci->lock);
@@ -62,6 +64,8 @@ int vpci_add_handlers(struct pci_dev *pdev)
unsigned int i;
int rc = 0;
+ ASSERT(pcidevs_write_locked());
+
if ( !has_vpci(pdev->domain) )
return 0;
@@ -136,6 +140,8 @@ int vpci_add_register(struct vpci *vpci, vpci_read_t *read_handler,
struct list_head *prev;
struct vpci_register *r;
+ ASSERT(pcidevs_write_locked());
+
/* Some sanity checks. */
if ( (size != 1 && size != 2 && size != 4) ||
offset >= PCI_CFG_SPACE_EXP_SIZE || (offset & (size - 1)) ||
@@ -183,6 +189,8 @@ int vpci_remove_register(struct vpci *vpci, unsigned int offset,
const struct vpci_register r = { .offset = offset, .size = size };
struct vpci_register *rm;
+ ASSERT(pcidevs_write_locked());
+
spin_lock(&vpci->lock);
list_for_each_entry ( rm, &vpci->handlers, node )
{
@@ -330,10 +338,14 @@ uint32_t vpci_read(pci_sbdf_t sbdf, unsigned int reg, unsigned int size)
return data;
}
+ pcidevs_read_lock();
/* Find the PCI dev matching the address. */
pdev = pci_get_pdev_by_domain(d, sbdf.seg, sbdf.bus, sbdf.devfn);
- if ( !pdev )
+ if ( !pdev || (pdev && !pdev->vpci) )
+ {
+ pcidevs_read_unlock();
return vpci_read_hw(sbdf, reg, size);
+ }
spin_lock(&pdev->vpci->lock);
@@ -379,6 +391,7 @@ uint32_t vpci_read(pci_sbdf_t sbdf, unsigned int reg, unsigned int size)
ASSERT(data_offset < size);
}
spin_unlock(&pdev->vpci->lock);
+ pcidevs_read_unlock();
if ( data_offset < size )
{
@@ -441,9 +454,11 @@ void vpci_write(pci_sbdf_t sbdf, unsigned int reg, unsigned int size,
* Find the PCI dev matching the address.
* Passthrough everything that's not trapped.
*/
+ pcidevs_read_lock();
pdev = pci_get_pdev_by_domain(d, sbdf.seg, sbdf.bus, sbdf.devfn);
- if ( !pdev )
+ if ( !pdev || (pdev && !pdev->vpci) )
{
+ pcidevs_read_unlock();
vpci_write_hw(sbdf, reg, size, data);
return;
}
@@ -484,6 +499,7 @@ void vpci_write(pci_sbdf_t sbdf, unsigned int reg, unsigned int size,
ASSERT(data_offset < size);
}
spin_unlock(&pdev->vpci->lock);
+ pcidevs_read_unlock();
if ( data_offset < size )
/* Tailing gap, write the remaining. */
@@ -152,6 +152,11 @@ void pcidevs_lock(void);
void pcidevs_unlock(void);
bool_t __must_check pcidevs_locked(void);
+void pcidevs_read_lock(void);
+int pcidevs_read_trylock(void);
+void pcidevs_read_unlock(void);
+bool __must_check pcidevs_read_locked(void);
+
void pcidevs_write_lock(void);
void pcidevs_write_unlock(void);
bool __must_check pcidevs_write_locked(void);
@@ -173,7 +173,7 @@ int __must_check vpci_msix_arch_enable_entry(struct vpci_msix_entry *entry,
int __must_check vpci_msix_arch_disable_entry(struct vpci_msix_entry *entry,
const struct pci_dev *pdev);
void vpci_msix_arch_init_entry(struct vpci_msix_entry *entry);
-int vpci_msix_arch_print(const struct vpci_msix *msix);
+int vpci_msix_arch_print(const struct domain *d, const struct vpci_msix *msix);
/*
* Helper functions to fetch MSIX related data. They are used by both the