@@ -56,6 +56,7 @@ typedef struct VFIORegion {
uint32_t nr_mmaps;
VFIOMmap *mmaps;
uint8_t nr; /* cache the region number for debug */
+ int remfd; /* fd if exported from remote process */
} VFIORegion;
typedef struct VFIOMigration {
@@ -1571,6 +1571,16 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
return true;
}
+static int vfio_get_region_info_remfd(VFIODevice *vbasedev, int index)
+{
+ struct vfio_region_info *info;
+
+ if (vbasedev->regions == NULL || vbasedev->regions[index] == NULL) {
+ vfio_get_region_info(vbasedev, index, &info);
+ }
+ return vbasedev->regfds != NULL ? vbasedev->regfds[index] : -1;
+}
+
static int vfio_setup_region_sparse_mmaps(VFIORegion *region,
struct vfio_region_info *info)
{
@@ -1624,6 +1634,7 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
region->size = info->size;
region->fd_offset = info->offset;
region->nr = index;
+ region->remfd = vfio_get_region_info_remfd(vbasedev, index);
if (region->size) {
region->mem = g_new0(MemoryRegion, 1);
@@ -1667,6 +1678,7 @@ int vfio_region_mmap(VFIORegion *region)
{
int i, prot = 0;
char *name;
+ int fd;
if (!region->mem) {
return 0;
@@ -1675,9 +1687,11 @@ int vfio_region_mmap(VFIORegion *region)
prot |= region->flags & VFIO_REGION_INFO_FLAG_READ ? PROT_READ : 0;
prot |= region->flags & VFIO_REGION_INFO_FLAG_WRITE ? PROT_WRITE : 0;
+ fd = region->remfd != -1 ? region->remfd : region->vbasedev->fd;
+
for (i = 0; i < region->nr_mmaps; i++) {
region->mmaps[i].mmap = mmap(NULL, region->mmaps[i].size, prot,
- MAP_SHARED, region->vbasedev->fd,
+ MAP_SHARED, fd,
region->fd_offset +
region->mmaps[i].offset);
if (region->mmaps[i].mmap == MAP_FAILED) {
@@ -2524,6 +2538,23 @@ int vfio_get_device(VFIOGroup *group, const char *name,
void vfio_put_base_device(VFIODevice *vbasedev)
{
+ if (vbasedev->regions != NULL) {
+ int i;
+
+ for (i = 0; i < vbasedev->num_regions; i++) {
+ if (vbasedev->regfds != NULL && vbasedev->regfds[i] != -1) {
+ close(vbasedev->regfds[i]);
+ }
+ g_free(vbasedev->regions[i]);
+ }
+ g_free(vbasedev->regions);
+ vbasedev->regions = NULL;
+ if (vbasedev->regfds != NULL) {
+ g_free(vbasedev->regfds);
+ vbasedev->regfds = NULL;
+ }
+ }
+
if (!vbasedev->group) {
return;
}
@@ -256,11 +256,16 @@ static void vfio_irqchip_change(Notifier *notify, void *data)
static int vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp)
{
- uint8_t pin = vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1);
+ uint8_t pin;
Error *err = NULL;
int32_t fd;
int ret;
+ if (vdev->vbasedev.proxy != NULL) {
+ pin = vdev->pdev.config[PCI_INTERRUPT_PIN];
+ } else {
+ pin = vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1);
+ }
if (!pin) {
return 0;
@@ -1258,10 +1263,15 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp)
int ret, entries;
Error *err = NULL;
- if (pread(vdev->vbasedev.fd, &ctrl, sizeof(ctrl),
- vdev->config_offset + pos + PCI_CAP_FLAGS) != sizeof(ctrl)) {
- error_setg_errno(errp, errno, "failed reading MSI PCI_CAP_FLAGS");
- return -errno;
+ if (vdev->vbasedev.proxy != NULL) {
+ /* during setup, config space was initialized from remote */
+ memcpy(&ctrl, vdev->pdev.config + pos + PCI_CAP_FLAGS, sizeof(ctrl));
+ } else {
+ if (pread(vdev->vbasedev.fd, &ctrl, sizeof(ctrl),
+ vdev->config_offset + pos + PCI_CAP_FLAGS) != sizeof(ctrl)) {
+ error_setg_errno(errp, errno, "failed reading MSI PCI_CAP_FLAGS");
+ return -errno;
+ }
}
ctrl = le16_to_cpu(ctrl);
@@ -3562,9 +3572,50 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
goto error;
}
+ vfio_bars_register(vdev);
+
+ ret = vfio_add_capabilities(vdev, errp);
+ if (ret) {
+ goto out_teardown;
+ }
+
+ /* QEMU emulates all of MSI & MSIX */
+ if (pdev->cap_present & QEMU_PCI_CAP_MSIX) {
+ memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff,
+ MSIX_CAP_LENGTH);
+ }
+
+ if (pdev->cap_present & QEMU_PCI_CAP_MSI) {
+ memset(vdev->emulated_config_bits + pdev->msi_cap, 0xff,
+ vdev->msi_cap_size);
+ }
+
+ if (vdev->pdev.config[PCI_INTERRUPT_PIN] != 0) {
+ vdev->intx.mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
+ vfio_intx_mmap_enable, vdev);
+ pci_device_set_intx_routing_notifier(&vdev->pdev,
+ vfio_intx_routing_notifier);
+ vdev->irqchip_change_notifier.notify = vfio_irqchip_change;
+ kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier);
+ ret = vfio_intx_enable(vdev, errp);
+ if (ret) {
+ goto out_deregister;
+ }
+ }
+
+ vfio_register_err_notifier(vdev);
+ vfio_register_req_notifier(vdev);
+
return;
+out_deregister:
+ pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
+ kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier);
+out_teardown:
+ vfio_teardown_msi(vdev);
+ vfio_bars_exit(vdev);
error:
+ vfio_user_disconnect(proxy);
error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name);
}
@@ -574,6 +574,16 @@ VFIOProxy *vfio_user_connect_dev(char *sockname, Error **errp)
return proxy;
}
+static void vfio_user_cb(void *opaque)
+{
+ VFIOProxy *proxy = opaque;
+
+ qemu_mutex_lock(&proxy->lock);
+ proxy->state = CLOSED;
+ qemu_mutex_unlock(&proxy->lock);
+ qemu_cond_signal(&proxy->close_cv);
+}
+
void vfio_user_disconnect(VFIOProxy *proxy)
{
VFIOUserReply *r1, *r2;
@@ -601,6 +611,16 @@ void vfio_user_disconnect(VFIOProxy *proxy)
g_free(r1);
}
+ /*
+ * Make sure the iothread isn't blocking anywhere
+ * with a ref to this proxy by waiting for a BH
+ * handler to run after the proxy fd handlers were
+ * deleted above.
+ */
+ proxy->close_wait = 1;
+ aio_bh_schedule_oneshot(iothread_get_aio_context(vfio_user_iothread),
+ vfio_user_cb, proxy);
+
/* drop locks so the iothread can make progress */
qemu_mutex_unlock_iothread();
qemu_cond_wait(&proxy->close_cv, &proxy->lock);