@@ -161,63 +161,107 @@ static void modify_decoding(const struct pci_dev *pdev, uint16_t cmd,
bool vpci_process_pending(struct vcpu *v)
{
- if ( v->vpci.mem )
+ struct pci_dev *pdev = v->vpci.pdev;
+ struct map_data data = {
+ .d = v->domain,
+ .map = v->vpci.cmd & PCI_COMMAND_MEMORY,
+ };
+ struct vpci_header *header = NULL;
+ unsigned int i;
+
+ if ( !pdev )
+ return false;
+
+ read_lock(&v->domain->pci_lock);
+
+ if ( !pdev->vpci || (v->domain != pdev->domain) )
+ {
+ v->vpci.pdev = NULL;
+ read_unlock(&v->domain->pci_lock);
+ return false;
+ }
+
+ header = &pdev->vpci->header;
+ for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
{
- struct map_data data = {
- .d = v->domain,
- .map = v->vpci.cmd & PCI_COMMAND_MEMORY,
- };
- int rc = rangeset_consume_ranges(v->vpci.mem, map_range, &data);
+ struct vpci_bar *bar = &header->bars[i];
+ int rc;
+
+ if ( rangeset_is_empty(bar->mem) )
+ continue;
+
+ rc = rangeset_consume_ranges(bar->mem, map_range, &data);
if ( rc == -ERESTART )
+ {
+ read_unlock(&v->domain->pci_lock);
return true;
+ }
- write_lock(&v->domain->pci_lock);
- spin_lock(&v->vpci.pdev->vpci->lock);
- /* Disable memory decoding unconditionally on failure. */
- modify_decoding(v->vpci.pdev,
- rc ? v->vpci.cmd & ~PCI_COMMAND_MEMORY : v->vpci.cmd,
- !rc && v->vpci.rom_only);
- spin_unlock(&v->vpci.pdev->vpci->lock);
-
- rangeset_destroy(v->vpci.mem);
- v->vpci.mem = NULL;
if ( rc )
- /*
- * FIXME: in case of failure remove the device from the domain.
- * Note that there might still be leftover mappings. While this is
- * safe for Dom0, for DomUs the domain will likely need to be
- * killed in order to avoid leaking stale p2m mappings on
- * failure.
- */
- vpci_deassign_device(v->vpci.pdev);
- write_unlock(&v->domain->pci_lock);
+ {
+ spin_lock(&pdev->vpci->lock);
+ /* Disable memory decoding unconditionally on failure. */
+ modify_decoding(pdev, v->vpci.cmd & ~PCI_COMMAND_MEMORY,
+ false);
+ spin_unlock(&pdev->vpci->lock);
+
+ /* Clean all the rangesets */
+ for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
+ if ( !rangeset_is_empty(header->bars[i].mem) )
+ rangeset_empty(header->bars[i].mem);
+
+ v->vpci.pdev = NULL;
+
+ read_unlock(&v->domain->pci_lock);
+
+ if ( !is_hardware_domain(v->domain) )
+ domain_crash(v->domain);
+
+ return false;
+ }
}
+ v->vpci.pdev = NULL;
+
+ spin_lock(&pdev->vpci->lock);
+ modify_decoding(pdev, v->vpci.cmd, v->vpci.rom_only);
+ spin_unlock(&pdev->vpci->lock);
+
+ read_unlock(&v->domain->pci_lock);
return false;
}
static int __init apply_map(struct domain *d, const struct pci_dev *pdev,
- struct rangeset *mem, uint16_t cmd)
+ uint16_t cmd)
{
struct map_data data = { .d = d, .map = true };
- int rc;
+ struct vpci_header *header = &pdev->vpci->header;
+ int rc = 0;
+ unsigned int i;
ASSERT(rw_is_write_locked(&d->pci_lock));
- while ( (rc = rangeset_consume_ranges(mem, map_range, &data)) == -ERESTART )
+ for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
{
- /*
- * It's safe to drop and reacquire the lock in this context
- * without risking pdev disappearing because devices cannot be
- * removed until the initial domain has been started.
- */
- write_unlock(&d->pci_lock);
- process_pending_softirqs();
- write_lock(&d->pci_lock);
- }
+ struct vpci_bar *bar = &header->bars[i];
+
+ if ( rangeset_is_empty(bar->mem) )
+ continue;
- rangeset_destroy(mem);
+ while ( (rc = rangeset_consume_ranges(bar->mem, map_range,
+ &data)) == -ERESTART )
+ {
+ /*
+ * It's safe to drop and reacquire the lock in this context
+ * without risking pdev disappearing because devices cannot be
+ * removed until the initial domain has been started.
+ */
+ write_unlock(&d->pci_lock);
+ process_pending_softirqs();
+ write_lock(&d->pci_lock);
+ }
+ }
if ( !rc )
modify_decoding(pdev, cmd, false);
@@ -225,7 +269,7 @@ static int __init apply_map(struct domain *d, const struct pci_dev *pdev,
}
static void defer_map(struct domain *d, struct pci_dev *pdev,
- struct rangeset *mem, uint16_t cmd, bool rom_only)
+ uint16_t cmd, bool rom_only)
{
struct vcpu *curr = current;
@@ -236,7 +280,6 @@ static void defer_map(struct domain *d, struct pci_dev *pdev,
* started for the same device if the domain is not well-behaved.
*/
curr->vpci.pdev = pdev;
- curr->vpci.mem = mem;
curr->vpci.cmd = cmd;
curr->vpci.rom_only = rom_only;
/*
@@ -250,33 +293,33 @@ static void defer_map(struct domain *d, struct pci_dev *pdev,
static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
{
struct vpci_header *header = &pdev->vpci->header;
- struct rangeset *mem = rangeset_new(NULL, NULL, 0);
struct pci_dev *tmp, *dev = NULL;
const struct domain *d;
const struct vpci_msix *msix = pdev->vpci->msix;
- unsigned int i;
+ unsigned int i, j;
int rc;
ASSERT(rw_is_write_locked(&pdev->domain->pci_lock));
- if ( !mem )
- return -ENOMEM;
-
/*
- * Create a rangeset that represents the current device BARs memory region
- * and compare it against all the currently active BAR memory regions. If
- * an overlap is found, subtract it from the region to be mapped/unmapped.
+ * Create a rangeset per BAR that represents the current device memory
+ * region and compare it against all the currently active BAR memory
+ * regions. If an overlap is found, subtract it from the region to be
+ * mapped/unmapped.
*
- * First fill the rangeset with all the BARs of this device or with the ROM
+ * First fill the rangesets with the BAR of this device or with the ROM
* BAR only, depending on whether the guest is toggling the memory decode
* bit of the command register, or the enable bit of the ROM BAR register.
*/
for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
{
- const struct vpci_bar *bar = &header->bars[i];
+ struct vpci_bar *bar = &header->bars[i];
unsigned long start = PFN_DOWN(bar->addr);
unsigned long end = PFN_DOWN(bar->addr + bar->size - 1);
+ if ( !bar->mem )
+ continue;
+
if ( !MAPPABLE_BAR(bar) ||
(rom_only ? bar->type != VPCI_BAR_ROM
: (bar->type == VPCI_BAR_ROM && !header->rom_enabled)) ||
@@ -292,14 +335,31 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
continue;
}
- rc = rangeset_add_range(mem, start, end);
+ rc = rangeset_add_range(bar->mem, start, end);
if ( rc )
{
printk(XENLOG_G_WARNING "Failed to add [%lx, %lx]: %d\n",
start, end, rc);
- rangeset_destroy(mem);
return rc;
}
+
+ /* Check for overlap with the already setup BAR ranges. */
+ for ( j = 0; j < i; j++ )
+ {
+ struct vpci_bar *prev_bar = &header->bars[j];
+
+ if ( rangeset_is_empty(prev_bar->mem) )
+ continue;
+
+ rc = rangeset_remove_range(prev_bar->mem, start, end);
+ if ( rc )
+ {
+ gprintk(XENLOG_WARNING,
+ "%pp: failed to remove overlapping range [%lx, %lx]: %d\n",
+ &pdev->sbdf, start, end, rc);
+ return rc;
+ }
+ }
}
/* Remove any MSIX regions if present. */
@@ -309,14 +369,21 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
unsigned long end = PFN_DOWN(vmsix_table_addr(pdev->vpci, i) +
vmsix_table_size(pdev->vpci, i) - 1);
- rc = rangeset_remove_range(mem, start, end);
- if ( rc )
+ for ( j = 0; j < ARRAY_SIZE(header->bars); j++ )
{
- printk(XENLOG_G_WARNING
- "Failed to remove MSIX table [%lx, %lx]: %d\n",
- start, end, rc);
- rangeset_destroy(mem);
- return rc;
+ const struct vpci_bar *bar = &header->bars[j];
+
+ if ( rangeset_is_empty(bar->mem) )
+ continue;
+
+ rc = rangeset_remove_range(bar->mem, start, end);
+ if ( rc )
+ {
+ gprintk(XENLOG_WARNING,
+ "%pp: failed to remove MSIX table [%lx, %lx]: %d\n",
+ &pdev->sbdf, start, end, rc);
+ return rc;
+ }
}
}
@@ -356,27 +423,37 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
for ( i = 0; i < ARRAY_SIZE(tmp->vpci->header.bars); i++ )
{
- const struct vpci_bar *bar = &tmp->vpci->header.bars[i];
- unsigned long start = PFN_DOWN(bar->addr);
- unsigned long end = PFN_DOWN(bar->addr + bar->size - 1);
-
- if ( !bar->enabled ||
- !rangeset_overlaps_range(mem, start, end) ||
- /*
- * If only the ROM enable bit is toggled check against
- * other BARs in the same device for overlaps, but not
- * against the same ROM BAR.
- */
- (rom_only && tmp == pdev && bar->type == VPCI_BAR_ROM) )
+ const struct vpci_bar *remote_bar = &tmp->vpci->header.bars[i];
+ unsigned long start = PFN_DOWN(remote_bar->addr);
+ unsigned long end = PFN_DOWN(remote_bar->addr +
+ remote_bar->size - 1);
+
+ if ( !remote_bar->enabled )
continue;
- rc = rangeset_remove_range(mem, start, end);
- if ( rc )
+ for ( j = 0; j < ARRAY_SIZE(header->bars); j++)
{
- printk(XENLOG_G_WARNING "Failed to remove [%lx, %lx]: %d\n",
- start, end, rc);
- rangeset_destroy(mem);
- return rc;
+ const struct vpci_bar *bar = &header->bars[j];
+
+ if ( !rangeset_overlaps_range(bar->mem, start, end) ||
+ /*
+ * If only the ROM enable bit is toggled check against
+ * other BARs in the same device for overlaps, but not
+ * against the same ROM BAR.
+ */
+ (rom_only &&
+ tmp == pdev &&
+ bar->type == VPCI_BAR_ROM) )
+ continue;
+
+ rc = rangeset_remove_range(bar->mem, start, end);
+ if ( rc )
+ {
+ gprintk(XENLOG_WARNING,
+ "%pp: failed to remove [%lx, %lx]: %d\n",
+ &pdev->sbdf, start, end, rc);
+ return rc;
+ }
}
}
}
@@ -400,10 +477,10 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
* will always be to establish mappings and process all the BARs.
*/
ASSERT((cmd & PCI_COMMAND_MEMORY) && !rom_only);
- return apply_map(pdev->domain, pdev, mem, cmd);
+ return apply_map(pdev->domain, pdev, cmd);
}
- defer_map(dev->domain, dev, mem, cmd, rom_only);
+ defer_map(dev->domain, dev, cmd, rom_only);
return 0;
}
@@ -598,6 +675,18 @@ static void cf_check rom_write(
rom->addr = val & PCI_ROM_ADDRESS_MASK;
}
+static int bar_add_rangeset(const struct pci_dev *pdev, struct vpci_bar *bar,
+ unsigned int i)
+{
+ char str[32];
+
+ snprintf(str, sizeof(str), "%pp:BAR%u", &pdev->sbdf, i);
+
+ bar->mem = rangeset_new(pdev->domain, str, RANGESETF_no_print);
+
+ return !bar->mem ? -ENOMEM : 0;
+}
+
static int cf_check init_bars(struct pci_dev *pdev)
{
uint16_t cmd;
@@ -679,6 +768,10 @@ static int cf_check init_bars(struct pci_dev *pdev)
else
bars[i].type = VPCI_BAR_MEM32;
+ rc = bar_add_rangeset(pdev, &bars[i], i);
+ if ( rc )
+ goto fail;
+
rc = pci_size_mem_bar(pdev->sbdf, reg, &addr, &size,
(i == num_bars - 1) ? PCI_BAR_LAST : 0);
if ( rc < 0 )
@@ -728,6 +821,12 @@ static int cf_check init_bars(struct pci_dev *pdev)
rom_reg, 4, rom);
if ( rc )
rom->type = VPCI_BAR_EMPTY;
+ else
+ {
+ rc = bar_add_rangeset(pdev, rom, i);
+ if ( rc )
+ goto fail;
+ }
}
}
else
@@ -38,6 +38,8 @@ extern vpci_register_init_t *const __end_vpci_array[];
void vpci_deassign_device(struct pci_dev *pdev)
{
+ unsigned int i;
+
ASSERT(rw_is_write_locked(&pdev->domain->pci_lock));
if ( !has_vpci(pdev->domain) || !pdev->vpci )
@@ -63,6 +65,10 @@ void vpci_deassign_device(struct pci_dev *pdev)
if ( pdev->vpci->msix->table[i] )
iounmap(pdev->vpci->msix->table[i]);
}
+
+ for ( i = 0; i < ARRAY_SIZE(pdev->vpci->header.bars); i++ )
+ rangeset_destroy(pdev->vpci->header.bars[i].mem);
+
xfree(pdev->vpci->msix);
xfree(pdev->vpci->msi);
xfree(pdev->vpci);
@@ -72,6 +72,7 @@ struct vpci {
/* Guest address. */
uint64_t guest_addr;
uint64_t size;
+ struct rangeset *mem;
enum {
VPCI_BAR_EMPTY,
VPCI_BAR_IO,
@@ -156,7 +157,6 @@ struct vpci {
struct vpci_vcpu {
/* Per-vcpu structure to store state while {un}mapping of PCI BARs. */
- struct rangeset *mem;
struct pci_dev *pdev;
uint16_t cmd;
bool rom_only : 1;