@@ -712,6 +712,7 @@ void ram_discard_manager_unregister_listener(RamDiscardManager *rdm,
typedef struct CoalescedMemoryRange CoalescedMemoryRange;
typedef struct MemoryRegionIoeventfd MemoryRegionIoeventfd;
+typedef struct MemoryRegionIoregionfd MemoryRegionIoregionfd;
/** MemoryRegion:
*
@@ -756,6 +757,8 @@ struct MemoryRegion {
const char *name;
unsigned ioeventfd_nb;
MemoryRegionIoeventfd *ioeventfds;
+ unsigned ioregionfd_nb;
+ MemoryRegionIoregionfd *ioregionfds;
RamDiscardManager *rdm; /* Only for RAM */
};
@@ -974,6 +977,38 @@ struct MemoryListener {
*/
void (*eventfd_del)(MemoryListener *listener, MemoryRegionSection *section,
bool match_data, uint64_t data, EventNotifier *e);
+ /**
+ * @ioregionfd_add:
+ *
+ * Called during an address space update transaction,
+ * for a section of the address space that has had a new ioregionfd
+ * registration since the last transaction.
+ *
+ * @listener: The #MemoryListener.
+ * @section: The new #MemoryRegionSection.
+ * @data: The @data parameter for the new ioregionfd.
+ * @fd: The file descriptor parameter for the new ioregionfd.
+ */
+ void (*ioregionfd_add)(MemoryListener *listener,
+ MemoryRegionSection *section,
+ uint64_t data, int fd);
+
+ /**
+ * @ioregionfd_del:
+ *
+ * Called during an address space update transaction,
+ * for a section of the address space that has dropped an ioregionfd
+ * registration since the last transaction.
+ *
+ * @listener: The #MemoryListener.
+ * @section: The new #MemoryRegionSection.
+ * @data: The @data parameter for the dropped ioregionfd.
+ * @fd: The file descriptor parameter for the dropped ioregionfd.
+ */
+ void (*ioregionfd_del)(MemoryListener *listener,
+ MemoryRegionSection *section,
+ uint64_t data, int fd);
+
/**
* @coalesced_io_add:
@@ -1041,6 +1076,8 @@ struct AddressSpace {
int ioeventfd_nb;
struct MemoryRegionIoeventfd *ioeventfds;
+ int ioregionfd_nb;
+ struct MemoryRegionIoregionfd *ioregionfds;
QTAILQ_HEAD(, MemoryListener) listeners;
QTAILQ_ENTRY(AddressSpace) address_spaces_link;
};
@@ -2175,6 +2212,19 @@ void memory_region_del_eventfd(MemoryRegion *mr,
uint64_t data,
EventNotifier *e);
+void memory_region_add_ioregionfd(MemoryRegion *mr,
+ hwaddr addr,
+ unsigned size,
+ uint64_t data,
+ int fd,
+ bool pio);
+
+void memory_region_del_ioregionfd(MemoryRegion *mr,
+ hwaddr addr,
+ unsigned size,
+ uint64_t data,
+ int fd);
+
/**
* memory_region_add_subregion: Add a subregion to a container.
*
@@ -46,6 +46,7 @@ extern bool kvm_readonly_mem_allowed;
extern bool kvm_direct_msi_allowed;
extern bool kvm_ioeventfd_any_length_allowed;
extern bool kvm_msi_use_devid;
+extern bool kvm_ioregionfds_allowed;
#define kvm_enabled() (kvm_allowed)
/**
@@ -167,6 +168,15 @@ extern bool kvm_msi_use_devid;
*/
#define kvm_msi_devid_required() (kvm_msi_use_devid)
+/**
+ * kvm_ioregionfds_enabled:
+ *
+ * Returns: true if we can use ioregionfd to receive the MMIO/PIO
+ * dispatches from KVM (ie the kernel supports ioregionfd and we are running
+ * with a configuration where it is meaningful to use them).
+ */
+#define kvm_ioregionfds_enabled() (kvm_ioregionfds_allowed)
+
#else
#define kvm_enabled() (0)
@@ -184,12 +194,14 @@ extern bool kvm_msi_use_devid;
#define kvm_direct_msi_enabled() (false)
#define kvm_ioeventfd_any_length_enabled() (false)
#define kvm_msi_devid_required() (false)
+#define kvm_ioregionfds_enabled (false)
#endif /* CONFIG_KVM_IS_POSSIBLE */
struct kvm_run;
struct kvm_lapic_state;
struct kvm_irq_routing_entry;
+struct kvm_ioregion;
typedef struct KVMCapabilityInfo {
const char *name;
@@ -548,4 +560,7 @@ bool kvm_cpu_check_are_resettable(void);
bool kvm_arch_cpu_check_are_resettable(void);
bool kvm_dirty_ring_enabled(void);
+
+int kvm_set_ioregionfd(struct kvm_ioregion *ioregionfd);
+
#endif
@@ -776,6 +776,29 @@ struct kvm_ioeventfd {
__u8 pad[36];
};
+enum {
+ kvm_ioregion_flag_nr_pio,
+ kvm_ioregion_flag_nr_posted_writes,
+ kvm_ioregion_flag_nr_deassign,
+ kvm_ioregion_flag_nr_max,
+};
+
+#define KVM_IOREGION_PIO (1 << kvm_ioregion_flag_nr_pio)
+#define KVM_IOREGION_POSTED_WRITES (1 << kvm_ioregion_flag_nr_posted_writes)
+#define KVM_IOREGION_DEASSIGN (1 << kvm_ioregion_flag_nr_deassign)
+
+#define KVM_IOREGION_VALID_FLAG_MASK ((1 << kvm_ioregion_flag_nr_max) - 1)
+
+struct kvm_ioregion {
+ __u64 guest_paddr; /* guest physical address */
+ __u64 memory_size; /* bytes */
+ __u64 user_data;
+ __s32 read_fd;
+ __s32 write_fd;
+ __u32 flags;
+ __u8 pad[28];
+};
+
#define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0)
#define KVM_X86_DISABLE_EXITS_HLT (1 << 1)
#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
@@ -933,6 +956,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_PIT_STATE2 35
#endif
#define KVM_CAP_IOEVENTFD 36
+#define KVM_CAP_IOREGIONFD 206
#define KVM_CAP_SET_IDENTITY_MAP_ADDR 37
#ifdef __KVM_HAVE_XEN_HVM
#define KVM_CAP_XEN_HVM 38
@@ -1372,6 +1396,7 @@ struct kvm_vfio_spapr_tce {
struct kvm_userspace_memory_region)
#define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47)
#define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64)
+#define KVM_SET_IOREGION _IOW(KVMIO, 0x49, struct kvm_ioregion)
/* enable ucontrol for s390 */
struct kvm_s390_ucas_mapping {
@@ -168,6 +168,7 @@ bool kvm_vm_attributes_allowed;
bool kvm_direct_msi_allowed;
bool kvm_ioeventfd_any_length_allowed;
bool kvm_msi_use_devid;
+bool kvm_ioregionfds_allowed;
static bool kvm_immediate_exit;
static hwaddr kvm_max_slot_size = ~0;
@@ -384,6 +385,18 @@ err:
return ret;
}
+int kvm_set_ioregionfd(struct kvm_ioregion *ioregionfd)
+{
+ KVMState *s = kvm_state;
+ int ret = -1;
+
+ ret = kvm_vm_ioctl(s, KVM_SET_IOREGION, ioregionfd);
+ if (ret < 0) {
+ error_report("Failed SET_IOREGION syscall ret is %d", ret);
+ }
+ return ret;
+}
+
static int do_kvm_destroy_vcpu(CPUState *cpu)
{
KVMState *s = kvm_state;
@@ -1635,6 +1648,104 @@ static void kvm_io_ioeventfd_del(MemoryListener *listener,
}
}
+static void kvm_mem_ioregionfd_add(MemoryListener *listener,
+ MemoryRegionSection *section,
+ uint64_t data,
+ int fd)
+{
+
+ struct kvm_ioregion ioregionfd;
+ int r = -1;
+
+ ioregionfd.guest_paddr = section->offset_within_address_space;
+ ioregionfd.memory_size = int128_get64(section->size);
+ ioregionfd.user_data = data;
+ ioregionfd.read_fd = fd;
+ ioregionfd.write_fd = fd;
+ ioregionfd.flags = 0;
+ memset(&ioregionfd.pad, 0, sizeof(ioregionfd.pad));
+
+ r = kvm_set_ioregionfd(&ioregionfd);
+ if (r < 0) {
+ fprintf(stderr, "%s: error adding ioregionfd: %s (%d)\n,",
+ __func__, strerror(-r), -r);
+ abort();
+ }
+}
+
+static void kvm_mem_ioregionfd_del(MemoryListener *listener,
+ MemoryRegionSection *section,
+ uint64_t data,
+ int fd)
+
+{
+ struct kvm_ioregion ioregionfd;
+ int r = -1;
+
+ ioregionfd.guest_paddr = section->offset_within_address_space;
+ ioregionfd.memory_size = int128_get64(section->size);
+ ioregionfd.user_data = data;
+ ioregionfd.read_fd = fd;
+ ioregionfd.write_fd = fd;
+ ioregionfd.flags = KVM_IOREGION_DEASSIGN;
+ memset(&ioregionfd.pad, 0, sizeof(ioregionfd.pad));
+
+ r = kvm_set_ioregionfd(&ioregionfd);
+ if (r < 0) {
+ fprintf(stderr, "%s: error deleting ioregionfd: %s (%d)\n,",
+ __func__, strerror(-r), -r);
+ abort();
+ }
+}
+
+static void kvm_io_ioregionfd_add(MemoryListener *listener,
+ MemoryRegionSection *section,
+ uint64_t data,
+ int fd)
+{
+ struct kvm_ioregion ioregionfd;
+ int r = -1;
+
+ ioregionfd.guest_paddr = section->offset_within_address_space;
+ ioregionfd.memory_size = int128_get64(section->size);
+ ioregionfd.user_data = data;
+ ioregionfd.read_fd = fd;
+ ioregionfd.write_fd = fd;
+ ioregionfd.flags = KVM_IOREGION_PIO;
+ memset(&ioregionfd.pad, 0, sizeof(ioregionfd.pad));
+
+ r = kvm_set_ioregionfd(&ioregionfd);
+ if (r < 0) {
+ fprintf(stderr, "%s: error adding pio ioregionfd: %s (%d)\n,",
+ __func__, strerror(-r), -r);
+ abort();
+ }
+}
+
+static void kvm_io_ioregionfd_del(MemoryListener *listener,
+ MemoryRegionSection *section,
+ uint64_t data,
+ int fd)
+{
+ struct kvm_ioregion ioregionfd;
+ int r = -1;
+
+ ioregionfd.guest_paddr = section->offset_within_address_space;
+ ioregionfd.memory_size = int128_get64(section->size);
+ ioregionfd.user_data = data;
+ ioregionfd.read_fd = fd;
+ ioregionfd.write_fd = fd;
+ ioregionfd.flags = KVM_IOREGION_DEASSIGN | KVM_IOREGION_PIO;
+ memset(&ioregionfd.pad, 0, sizeof(ioregionfd.pad));
+
+ r = kvm_set_ioregionfd(&ioregionfd);
+ if (r < 0) {
+ fprintf(stderr, "%s: error deleting pio ioregionfd: %s (%d)\n,",
+ __func__, strerror(-r), -r);
+ abort();
+ }
+}
+
void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
AddressSpace *as, int as_id, const char *name)
{
@@ -1679,6 +1790,12 @@ static MemoryListener kvm_io_listener = {
.priority = 10,
};
+static MemoryListener kvm_ioregion_listener = {
+ .ioregionfd_add = kvm_io_ioregionfd_add,
+ .ioregionfd_del = kvm_io_ioregionfd_del,
+ .priority = 10,
+};
+
int kvm_set_irq(KVMState *s, int irq, int level)
{
struct kvm_irq_level event;
@@ -2564,6 +2681,9 @@ static int kvm_init(MachineState *ms)
kvm_ioeventfd_any_length_allowed =
(kvm_check_extension(s, KVM_CAP_IOEVENTFD_ANY_LENGTH) > 0);
+ kvm_ioregionfds_allowed =
+ (kvm_check_extension(s, KVM_CAP_IOREGIONFD) > 0);
+
kvm_state = s;
ret = kvm_arch_init(ms, s);
@@ -2585,6 +2705,12 @@ static int kvm_init(MachineState *ms)
s->memory_listener.listener.eventfd_add = kvm_mem_ioeventfd_add;
s->memory_listener.listener.eventfd_del = kvm_mem_ioeventfd_del;
}
+
+ if (kvm_ioregionfds_allowed) {
+ s->memory_listener.listener.ioregionfd_add = kvm_mem_ioregionfd_add;
+ s->memory_listener.listener.ioregionfd_del = kvm_mem_ioregionfd_del;
+ }
+
s->memory_listener.listener.coalesced_io_add = kvm_coalesce_mmio_region;
s->memory_listener.listener.coalesced_io_del = kvm_uncoalesce_mmio_region;
@@ -2594,6 +2720,12 @@ static int kvm_init(MachineState *ms)
memory_listener_register(&kvm_io_listener,
&address_space_io);
}
+
+ if (kvm_ioregionfds_allowed) {
+ memory_listener_register(&kvm_ioregion_listener,
+ &address_space_io);
+ }
+
memory_listener_register(&kvm_coalesced_pio_listener,
&address_space_io);
@@ -29,6 +29,7 @@ bool kvm_gsi_direct_mapping;
bool kvm_allowed;
bool kvm_readonly_mem_allowed;
bool kvm_ioeventfd_any_length_allowed;
+bool kvm_ioregionfds_allowed;
bool kvm_msi_use_devid;
void kvm_flush_coalesced_mmio_buffer(void)
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> --- include/exec/memory.h | 50 +++++++++++++++ include/sysemu/kvm.h | 15 +++++ linux-headers/linux/kvm.h | 25 ++++++++ accel/kvm/kvm-all.c | 132 ++++++++++++++++++++++++++++++++++++++ accel/stubs/kvm-stub.c | 1 + 5 files changed, 223 insertions(+)