@@ -83,8 +83,11 @@ struct vfio_pci_device {
struct vfio_region {
struct vfio_region_info info;
+ struct vfio_device *vdev;
u64 guest_phys_addr;
void *host_addr;
+ u32 port_base;
+ int is_ioport :1;
};
struct vfio_device {
@@ -1,5 +1,6 @@
#include "kvm/kvm.h"
#include "kvm/vfio.h"
+#include "kvm/ioport.h"
#include <linux/list.h>
@@ -80,6 +81,141 @@ out_free_buf:
return ret;
}
+static bool vfio_ioport_in(struct ioport *ioport, struct kvm_cpu *vcpu,
+ u16 port, void *data, int len)
+{
+ u32 val;
+ ssize_t nr;
+ struct vfio_region *region = ioport->priv;
+ struct vfio_device *vdev = region->vdev;
+
+ u32 offset = port - region->port_base;
+
+ if (!(region->info.flags & VFIO_REGION_INFO_FLAG_READ))
+ return false;
+
+ nr = pread(vdev->fd, &val, len, region->info.offset + offset);
+ if (nr != len) {
+ vfio_dev_err(vdev, "could not read %d bytes from I/O port 0x%x\n",
+ len, port);
+ return false;
+ }
+
+ switch (len) {
+ case 1:
+ ioport__write8(data, val);
+ break;
+ case 2:
+ ioport__write16(data, val);
+ break;
+ case 4:
+ ioport__write32(data, val);
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+static bool vfio_ioport_out(struct ioport *ioport, struct kvm_cpu *vcpu,
+ u16 port, void *data, int len)
+{
+ u32 val;
+ ssize_t nr;
+ struct vfio_region *region = ioport->priv;
+ struct vfio_device *vdev = region->vdev;
+
+ u32 offset = port - region->port_base;
+
+ if (!(region->info.flags & VFIO_REGION_INFO_FLAG_WRITE))
+ return false;
+
+ switch (len) {
+ case 1:
+ val = ioport__read8(data);
+ break;
+ case 2:
+ val = ioport__read16(data);
+ break;
+ case 4:
+ val = ioport__read32(data);
+ break;
+ default:
+ return false;
+ }
+
+ nr = pwrite(vdev->fd, &val, len, region->info.offset + offset);
+ if (nr != len)
+ vfio_dev_err(vdev, "could not write %d bytes to I/O port 0x%x",
+ len, port);
+
+ return nr == len;
+}
+
+static struct ioport_operations vfio_ioport_ops = {
+ .io_in = vfio_ioport_in,
+ .io_out = vfio_ioport_out,
+};
+
+static void vfio_mmio_access(struct kvm_cpu *vcpu, u64 addr, u8 *data, u32 len,
+ u8 is_write, void *ptr)
+{
+ u64 val;
+ ssize_t nr;
+ struct vfio_region *region = ptr;
+ struct vfio_device *vdev = region->vdev;
+
+ u32 offset = addr - region->guest_phys_addr;
+
+ if (len < 1 || len > 8)
+ goto err_report;
+
+ if (is_write) {
+ if (!(region->info.flags & VFIO_REGION_INFO_FLAG_WRITE))
+ goto err_report;
+
+ memcpy(&val, data, len);
+
+ nr = pwrite(vdev->fd, &val, len, region->info.offset + offset);
+ if ((u32)nr != len)
+ goto err_report;
+ } else {
+ if (!(region->info.flags & VFIO_REGION_INFO_FLAG_READ))
+ goto err_report;
+
+ nr = pread(vdev->fd, &val, len, region->info.offset + offset);
+ if ((u32)nr != len)
+ goto err_report;
+
+ memcpy(data, &val, len);
+ }
+
+ return;
+
+err_report:
+ vfio_dev_err(vdev, "could not %s %u bytes at 0x%x (0x%llx)", is_write ?
+ "write" : "read", len, offset, addr);
+}
+
+static int vfio_setup_trap_region(struct kvm *kvm, struct vfio_device *vdev,
+ struct vfio_region *region)
+{
+ if (region->is_ioport) {
+ int port = ioport__register(kvm, IOPORT_EMPTY, &vfio_ioport_ops,
+ region->info.size, region);
+ if (port < 0)
+ return port;
+
+ region->port_base = port;
+ return 0;
+ }
+
+ return kvm__register_mmio(kvm, region->guest_phys_addr,
+ region->info.size, false, vfio_mmio_access,
+ region);
+}
+
int vfio_map_region(struct kvm *kvm, struct vfio_device *vdev,
struct vfio_region *region)
{
@@ -88,17 +224,8 @@ int vfio_map_region(struct kvm *kvm, struct vfio_device *vdev,
/* KVM needs page-aligned regions */
u64 map_size = ALIGN(region->info.size, PAGE_SIZE);
- /*
- * We don't want to mess about trapping config accesses, so require that
- * they can be mmap'd. Note that for PCI, this precludes the use of I/O
- * BARs in the guest (we will hide them from Configuration Space, which
- * is trapped).
- */
- if (!(region->info.flags & VFIO_REGION_INFO_FLAG_MMAP)) {
- vfio_dev_info(vdev, "ignoring region %u, as it can't be mmap'd",
- region->info.index);
- return 0;
- }
+ if (!(region->info.flags & VFIO_REGION_INFO_FLAG_MMAP))
+ return vfio_setup_trap_region(kvm, vdev, region);
if (region->info.flags & VFIO_REGION_INFO_FLAG_READ)
prot |= PROT_READ;
@@ -108,10 +235,10 @@ int vfio_map_region(struct kvm *kvm, struct vfio_device *vdev,
base = mmap(NULL, region->info.size, prot, MAP_SHARED, vdev->fd,
region->info.offset);
if (base == MAP_FAILED) {
- ret = -errno;
- vfio_dev_err(vdev, "failed to mmap region %u (0x%llx bytes)",
- region->info.index, region->info.size);
- return ret;
+ /* TODO: support sparse mmap */
+ vfio_dev_warn(vdev, "failed to mmap region %u (0x%llx bytes), falling back to trapping",
+ region->info.index, region->info.size);
+ return vfio_setup_trap_region(kvm, vdev, region);
}
region->host_addr = base;
@@ -127,7 +254,13 @@ int vfio_map_region(struct kvm *kvm, struct vfio_device *vdev,
void vfio_unmap_region(struct kvm *kvm, struct vfio_region *region)
{
- munmap(region->host_addr, region->info.size);
+ if (region->host_addr) {
+ munmap(region->host_addr, region->info.size);
+ } else if (region->is_ioport) {
+ ioport__unregister(kvm, region->port_base);
+ } else {
+ kvm__deregister_mmio(kvm, region->guest_phys_addr);
+ }
}
static int vfio_configure_device(struct kvm *kvm, struct vfio_device *vdev)
@@ -640,24 +640,27 @@ static int vfio_pci_fixup_cfg_space(struct vfio_device *vdev)
struct vfio_region_info *info;
struct vfio_pci_device *pdev = &vdev->pci;
- /* Enable exclusively MMIO and bus mastering */
- pdev->hdr.command &= ~PCI_COMMAND_IO;
- pdev->hdr.command |= PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER;
-
/* Initialise the BARs */
for (i = VFIO_PCI_BAR0_REGION_INDEX; i <= VFIO_PCI_BAR5_REGION_INDEX; ++i) {
+ u64 base;
struct vfio_region *region = &vdev->regions[i];
- u64 base = region->guest_phys_addr;
+
+ /* Construct a fake reg to match what we've mapped. */
+ if (region->is_ioport) {
+ base = (region->port_base & PCI_BASE_ADDRESS_IO_MASK) |
+ PCI_BASE_ADDRESS_SPACE_IO;
+ } else {
+ base = (region->guest_phys_addr &
+ PCI_BASE_ADDRESS_MEM_MASK) |
+ PCI_BASE_ADDRESS_SPACE_MEMORY;
+ }
+
+ pdev->hdr.bar[i] = base;
if (!base)
continue;
pdev->hdr.bar_size[i] = region->info.size;
-
- /* Construct a fake reg to match what we've mapped. */
- pdev->hdr.bar[i] = (base & PCI_BASE_ADDRESS_MEM_MASK) |
- PCI_BASE_ADDRESS_SPACE_MEMORY |
- PCI_BASE_ADDRESS_MEM_TYPE_32;
}
/* I really can't be bothered to support cardbus. */
@@ -794,6 +797,7 @@ static int vfio_pci_configure_bar(struct kvm *kvm, struct vfio_device *vdev,
size_t nr)
{
int ret;
+ u32 bar;
size_t map_size;
struct vfio_pci_device *pdev = &vdev->pci;
struct vfio_region *region = &vdev->regions[nr];
@@ -801,6 +805,10 @@ static int vfio_pci_configure_bar(struct kvm *kvm, struct vfio_device *vdev,
if (nr >= vdev->info.num_regions)
return 0;
+ bar = pdev->hdr.bar[nr];
+
+ region->vdev = vdev;
+ region->is_ioport = !!(bar & PCI_BASE_ADDRESS_SPACE_IO);
region->info = (struct vfio_region_info) {
.argsz = sizeof(region->info),
.index = nr,
@@ -828,14 +836,13 @@ static int vfio_pci_configure_bar(struct kvm *kvm, struct vfio_device *vdev,
}
}
- /* Grab some MMIO space in the guest */
- map_size = ALIGN(region->info.size, PAGE_SIZE);
- region->guest_phys_addr = pci_get_io_space_block(map_size);
+ if (!region->is_ioport) {
+ /* Grab some MMIO space in the guest */
+ map_size = ALIGN(region->info.size, PAGE_SIZE);
+ region->guest_phys_addr = pci_get_io_space_block(map_size);
+ }
- /*
- * Map the BARs into the guest. We'll later need to update
- * configuration space to reflect our allocation.
- */
+ /* Map the BARs into the guest or setup a trap region. */
ret = vfio_map_region(kvm, vdev, region);
if (ret)
return ret;