diff mbox series

[RFC,1/8] ioregionfd: introduce a syscall and memory API

Message ID 6001ed71ebe40c88e9d903bf0983884f522b2dea.1644302411.git.elena.ufimtseva@oracle.com (mailing list archive)
State New, archived
Headers show
Series ioregionfd introduction | expand

Commit Message

Elena Ufimtseva Feb. 8, 2022, 7:22 a.m. UTC
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
---
 include/exec/memory.h     |  50 +++++++++++++++
 include/sysemu/kvm.h      |  15 +++++
 linux-headers/linux/kvm.h |  25 ++++++++
 accel/kvm/kvm-all.c       | 132 ++++++++++++++++++++++++++++++++++++++
 accel/stubs/kvm-stub.c    |   1 +
 5 files changed, 223 insertions(+)

Comments

David Hildenbrand Feb. 16, 2022, 12:19 p.m. UTC | #1
Looks straight forward to me.

[...]

>  
> +int kvm_set_ioregionfd(struct kvm_ioregion *ioregionfd)
> +{
> +    KVMState *s = kvm_state;
> +    int ret = -1;
> +
> +    ret = kvm_vm_ioctl(s, KVM_SET_IOREGION, ioregionfd);
> +    if (ret < 0) {
> +        error_report("Failed SET_IOREGION syscall ret is %d", ret);

Maybe print the textual representation via strerror(-ret).

> +    }
> +    return ret;
> +}
> +
>  static int do_kvm_destroy_vcpu(CPUState *cpu)
>  {
>      KVMState *s = kvm_state;
> @@ -1635,6 +1648,104 @@ static void kvm_io_ioeventfd_del(MemoryListener *listener,
>      }
>  }
>  
> +static void kvm_mem_ioregionfd_add(MemoryListener *listener,
> +                                   MemoryRegionSection *section,
> +                                   uint64_t data,
> +                                   int fd)
> +{
> +
> +    struct kvm_ioregion ioregionfd;
> +    int r = -1;
> +
> +    ioregionfd.guest_paddr = section->offset_within_address_space;
> +    ioregionfd.memory_size = int128_get64(section->size);
> +    ioregionfd.user_data = data;
> +    ioregionfd.read_fd = fd;
> +    ioregionfd.write_fd = fd;
> +    ioregionfd.flags = 0;
> +    memset(&ioregionfd.pad, 0, sizeof(ioregionfd.pad));
> +
> +    r = kvm_set_ioregionfd(&ioregionfd);
> +    if (r < 0) {
> +        fprintf(stderr, "%s: error adding ioregionfd: %s (%d)\n,",
> +                __func__, strerror(-r), -r);

Oh, you're actually printing the error again? Why error_report() above
and here fprintf?

[...]

>  void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
>                                    AddressSpace *as, int as_id, const char *name)
>  {
> @@ -1679,6 +1790,12 @@ static MemoryListener kvm_io_listener = {
>      .priority = 10,
>  };
>  
> +static MemoryListener kvm_ioregion_listener = {
> +    .ioregionfd_add = kvm_io_ioregionfd_add,
> +    .ioregionfd_del = kvm_io_ioregionfd_del,
> +    .priority = 10,
> +};
> +
>  int kvm_set_irq(KVMState *s, int irq, int level)
>  {
>      struct kvm_irq_level event;
> @@ -2564,6 +2681,9 @@ static int kvm_init(MachineState *ms)
>      kvm_ioeventfd_any_length_allowed =
>          (kvm_check_extension(s, KVM_CAP_IOEVENTFD_ANY_LENGTH) > 0);
>  
> +    kvm_ioregionfds_allowed =
> +        (kvm_check_extension(s, KVM_CAP_IOREGIONFD) > 0);
> +
>      kvm_state = s;
>  
>      ret = kvm_arch_init(ms, s);
> @@ -2585,6 +2705,12 @@ static int kvm_init(MachineState *ms)
>          s->memory_listener.listener.eventfd_add = kvm_mem_ioeventfd_add;
>          s->memory_listener.listener.eventfd_del = kvm_mem_ioeventfd_del;
>      }
> +
> +    if (kvm_ioregionfds_allowed) {
> +        s->memory_listener.listener.ioregionfd_add = kvm_mem_ioregionfd_add;
> +        s->memory_listener.listener.ioregionfd_del = kvm_mem_ioregionfd_del;
> +    }
> +
>      s->memory_listener.listener.coalesced_io_add = kvm_coalesce_mmio_region;
>      s->memory_listener.listener.coalesced_io_del = kvm_uncoalesce_mmio_region;
>  
> @@ -2594,6 +2720,12 @@ static int kvm_init(MachineState *ms)
>          memory_listener_register(&kvm_io_listener,
>                                   &address_space_io);
>      }
> +
> +    if (kvm_ioregionfds_allowed) {
> +        memory_listener_register(&kvm_ioregion_listener,
> +                                 &address_space_io);
> +    }
> +
>      memory_listener_register(&kvm_coalesced_pio_listener,
>                               &address_space_io);
>  

Why are we using a single memory listener for address_space_memory but
individual listeners for address_space_io?

IOW, wey don't we have &s->io_listener ?
diff mbox series

Patch

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 20f1b27377..2ce7f35cc2 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -712,6 +712,7 @@  void ram_discard_manager_unregister_listener(RamDiscardManager *rdm,
 
 typedef struct CoalescedMemoryRange CoalescedMemoryRange;
 typedef struct MemoryRegionIoeventfd MemoryRegionIoeventfd;
+typedef struct MemoryRegionIoregionfd MemoryRegionIoregionfd;
 
 /** MemoryRegion:
  *
@@ -756,6 +757,8 @@  struct MemoryRegion {
     const char *name;
     unsigned ioeventfd_nb;
     MemoryRegionIoeventfd *ioeventfds;
+    unsigned ioregionfd_nb;
+    MemoryRegionIoregionfd *ioregionfds;
     RamDiscardManager *rdm; /* Only for RAM */
 };
 
@@ -974,6 +977,38 @@  struct MemoryListener {
      */
     void (*eventfd_del)(MemoryListener *listener, MemoryRegionSection *section,
                         bool match_data, uint64_t data, EventNotifier *e);
+    /**
+     * @ioregionfd_add:
+     *
+     * Called during an address space update transaction,
+     * for a section of the address space that has had a new ioregionfd
+     * registration since the last transaction.
+     *
+     * @listener: The #MemoryListener.
+     * @section: The new #MemoryRegionSection.
+     * @data: The @data parameter for the new ioregionfd.
+     * @fd: The file descriptor parameter for the new ioregionfd.
+     */
+    void (*ioregionfd_add)(MemoryListener *listener,
+                           MemoryRegionSection *section,
+                           uint64_t data, int fd);
+
+    /**
+     * @ioregionfd_del:
+     *
+     * Called during an address space update transaction,
+     * for a section of the address space that has dropped an ioregionfd
+     * registration since the last transaction.
+     *
+     * @listener: The #MemoryListener.
+     * @section: The new #MemoryRegionSection.
+     * @data: The @data parameter for the dropped ioregionfd.
+     * @fd: The file descriptor parameter for the dropped ioregionfd.
+     */
+    void (*ioregionfd_del)(MemoryListener *listener,
+                           MemoryRegionSection *section,
+                           uint64_t data, int fd);
+
 
     /**
      * @coalesced_io_add:
@@ -1041,6 +1076,8 @@  struct AddressSpace {
 
     int ioeventfd_nb;
     struct MemoryRegionIoeventfd *ioeventfds;
+    int ioregionfd_nb;
+    struct MemoryRegionIoregionfd *ioregionfds;
     QTAILQ_HEAD(, MemoryListener) listeners;
     QTAILQ_ENTRY(AddressSpace) address_spaces_link;
 };
@@ -2175,6 +2212,19 @@  void memory_region_del_eventfd(MemoryRegion *mr,
                                uint64_t data,
                                EventNotifier *e);
 
+void memory_region_add_ioregionfd(MemoryRegion *mr,
+                                  hwaddr addr,
+                                  unsigned size,
+                                  uint64_t data,
+                                  int fd,
+                                  bool pio);
+
+void memory_region_del_ioregionfd(MemoryRegion *mr,
+                                  hwaddr addr,
+                                  unsigned size,
+                                  uint64_t data,
+                                  int fd);
+
 /**
  * memory_region_add_subregion: Add a subregion to a container.
  *
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 7b22aeb6ae..fea77b5185 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -46,6 +46,7 @@  extern bool kvm_readonly_mem_allowed;
 extern bool kvm_direct_msi_allowed;
 extern bool kvm_ioeventfd_any_length_allowed;
 extern bool kvm_msi_use_devid;
+extern bool kvm_ioregionfds_allowed;
 
 #define kvm_enabled()           (kvm_allowed)
 /**
@@ -167,6 +168,15 @@  extern bool kvm_msi_use_devid;
  */
 #define kvm_msi_devid_required() (kvm_msi_use_devid)
 
+/**
+ * kvm_ioregionfds_enabled:
+ *
+ * Returns: true if we can use ioregionfd to receive the MMIO/PIO
+ * dispatches from KVM (ie the kernel supports ioregionfd and we are running
+ * with a configuration where it is meaningful to use them).
+ */
+#define kvm_ioregionfds_enabled() (kvm_ioregionfds_allowed)
+
 #else
 
 #define kvm_enabled()           (0)
@@ -184,12 +194,14 @@  extern bool kvm_msi_use_devid;
 #define kvm_direct_msi_enabled() (false)
 #define kvm_ioeventfd_any_length_enabled() (false)
 #define kvm_msi_devid_required() (false)
+#define kvm_ioregionfds_enabled (false)
 
 #endif  /* CONFIG_KVM_IS_POSSIBLE */
 
 struct kvm_run;
 struct kvm_lapic_state;
 struct kvm_irq_routing_entry;
+struct kvm_ioregion;
 
 typedef struct KVMCapabilityInfo {
     const char *name;
@@ -548,4 +560,7 @@  bool kvm_cpu_check_are_resettable(void);
 bool kvm_arch_cpu_check_are_resettable(void);
 
 bool kvm_dirty_ring_enabled(void);
+
+int kvm_set_ioregionfd(struct kvm_ioregion *ioregionfd);
+
 #endif
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index bcaf66cc4d..1ad444a74e 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -776,6 +776,29 @@  struct kvm_ioeventfd {
 	__u8  pad[36];
 };
 
+enum {
+        kvm_ioregion_flag_nr_pio,
+        kvm_ioregion_flag_nr_posted_writes,
+        kvm_ioregion_flag_nr_deassign,
+        kvm_ioregion_flag_nr_max,
+};
+
+#define KVM_IOREGION_PIO (1 << kvm_ioregion_flag_nr_pio)
+#define KVM_IOREGION_POSTED_WRITES (1 << kvm_ioregion_flag_nr_posted_writes)
+#define KVM_IOREGION_DEASSIGN (1 << kvm_ioregion_flag_nr_deassign)
+
+#define KVM_IOREGION_VALID_FLAG_MASK ((1 << kvm_ioregion_flag_nr_max) - 1)
+
+struct kvm_ioregion {
+        __u64 guest_paddr; /* guest physical address */
+        __u64 memory_size; /* bytes */
+        __u64 user_data;
+        __s32 read_fd;
+        __s32 write_fd;
+        __u32 flags;
+        __u8  pad[28];
+};
+
 #define KVM_X86_DISABLE_EXITS_MWAIT          (1 << 0)
 #define KVM_X86_DISABLE_EXITS_HLT            (1 << 1)
 #define KVM_X86_DISABLE_EXITS_PAUSE          (1 << 2)
@@ -933,6 +956,7 @@  struct kvm_ppc_resize_hpt {
 #define KVM_CAP_PIT_STATE2 35
 #endif
 #define KVM_CAP_IOEVENTFD 36
+#define KVM_CAP_IOREGIONFD 206
 #define KVM_CAP_SET_IDENTITY_MAP_ADDR 37
 #ifdef __KVM_HAVE_XEN_HVM
 #define KVM_CAP_XEN_HVM 38
@@ -1372,6 +1396,7 @@  struct kvm_vfio_spapr_tce {
 					struct kvm_userspace_memory_region)
 #define KVM_SET_TSS_ADDR          _IO(KVMIO,   0x47)
 #define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO,  0x48, __u64)
+#define KVM_SET_IOREGION          _IOW(KVMIO,  0x49, struct kvm_ioregion)
 
 /* enable ucontrol for s390 */
 struct kvm_s390_ucas_mapping {
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index eecd8031cf..dda04a0ae1 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -168,6 +168,7 @@  bool kvm_vm_attributes_allowed;
 bool kvm_direct_msi_allowed;
 bool kvm_ioeventfd_any_length_allowed;
 bool kvm_msi_use_devid;
+bool kvm_ioregionfds_allowed;
 static bool kvm_immediate_exit;
 static hwaddr kvm_max_slot_size = ~0;
 
@@ -384,6 +385,18 @@  err:
     return ret;
 }
 
+int kvm_set_ioregionfd(struct kvm_ioregion *ioregionfd)
+{
+    KVMState *s = kvm_state;
+    int ret = -1;
+
+    ret = kvm_vm_ioctl(s, KVM_SET_IOREGION, ioregionfd);
+    if (ret < 0) {
+        error_report("Failed SET_IOREGION syscall ret is %d", ret);
+    }
+    return ret;
+}
+
 static int do_kvm_destroy_vcpu(CPUState *cpu)
 {
     KVMState *s = kvm_state;
@@ -1635,6 +1648,104 @@  static void kvm_io_ioeventfd_del(MemoryListener *listener,
     }
 }
 
+static void kvm_mem_ioregionfd_add(MemoryListener *listener,
+                                   MemoryRegionSection *section,
+                                   uint64_t data,
+                                   int fd)
+{
+
+    struct kvm_ioregion ioregionfd;
+    int r = -1;
+
+    ioregionfd.guest_paddr = section->offset_within_address_space;
+    ioregionfd.memory_size = int128_get64(section->size);
+    ioregionfd.user_data = data;
+    ioregionfd.read_fd = fd;
+    ioregionfd.write_fd = fd;
+    ioregionfd.flags = 0;
+    memset(&ioregionfd.pad, 0, sizeof(ioregionfd.pad));
+
+    r = kvm_set_ioregionfd(&ioregionfd);
+    if (r < 0) {
+        fprintf(stderr, "%s: error adding ioregionfd: %s (%d)\n,",
+                __func__, strerror(-r), -r);
+        abort();
+    }
+}
+
+static void kvm_mem_ioregionfd_del(MemoryListener *listener,
+                                   MemoryRegionSection *section,
+                                   uint64_t data,
+                                   int fd)
+
+{
+    struct kvm_ioregion ioregionfd;
+    int r = -1;
+
+    ioregionfd.guest_paddr = section->offset_within_address_space;
+    ioregionfd.memory_size = int128_get64(section->size);
+    ioregionfd.user_data = data;
+    ioregionfd.read_fd = fd;
+    ioregionfd.write_fd = fd;
+    ioregionfd.flags = KVM_IOREGION_DEASSIGN;
+    memset(&ioregionfd.pad, 0, sizeof(ioregionfd.pad));
+
+    r = kvm_set_ioregionfd(&ioregionfd);
+    if (r < 0) {
+        fprintf(stderr, "%s: error deleting ioregionfd: %s (%d)\n,",
+                __func__, strerror(-r), -r);
+        abort();
+    }
+}
+
+static void kvm_io_ioregionfd_add(MemoryListener *listener,
+                                  MemoryRegionSection *section,
+                                  uint64_t data,
+                                  int fd)
+{
+    struct kvm_ioregion ioregionfd;
+    int r = -1;
+
+    ioregionfd.guest_paddr = section->offset_within_address_space;
+    ioregionfd.memory_size = int128_get64(section->size);
+    ioregionfd.user_data = data;
+    ioregionfd.read_fd = fd;
+    ioregionfd.write_fd = fd;
+    ioregionfd.flags = KVM_IOREGION_PIO;
+    memset(&ioregionfd.pad, 0, sizeof(ioregionfd.pad));
+
+    r = kvm_set_ioregionfd(&ioregionfd);
+    if (r < 0) {
+        fprintf(stderr, "%s: error adding pio ioregionfd: %s (%d)\n,",
+                __func__, strerror(-r), -r);
+        abort();
+    }
+}
+
+static void kvm_io_ioregionfd_del(MemoryListener *listener,
+                                  MemoryRegionSection *section,
+                                  uint64_t data,
+                                  int fd)
+{
+    struct kvm_ioregion ioregionfd;
+    int r = -1;
+
+    ioregionfd.guest_paddr = section->offset_within_address_space;
+    ioregionfd.memory_size = int128_get64(section->size);
+    ioregionfd.user_data = data;
+    ioregionfd.read_fd = fd;
+    ioregionfd.write_fd = fd;
+    ioregionfd.flags = KVM_IOREGION_DEASSIGN | KVM_IOREGION_PIO;
+    memset(&ioregionfd.pad, 0, sizeof(ioregionfd.pad));
+
+    r = kvm_set_ioregionfd(&ioregionfd);
+    if (r < 0) {
+        fprintf(stderr, "%s: error deleting pio ioregionfd: %s (%d)\n,",
+                __func__, strerror(-r), -r);
+        abort();
+    }
+}
+
 void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
                                   AddressSpace *as, int as_id, const char *name)
 {
@@ -1679,6 +1790,12 @@  static MemoryListener kvm_io_listener = {
     .priority = 10,
 };
 
+static MemoryListener kvm_ioregion_listener = {
+    .ioregionfd_add = kvm_io_ioregionfd_add,
+    .ioregionfd_del = kvm_io_ioregionfd_del,
+    .priority = 10,
+};
+
 int kvm_set_irq(KVMState *s, int irq, int level)
 {
     struct kvm_irq_level event;
@@ -2564,6 +2681,9 @@  static int kvm_init(MachineState *ms)
     kvm_ioeventfd_any_length_allowed =
         (kvm_check_extension(s, KVM_CAP_IOEVENTFD_ANY_LENGTH) > 0);
 
+    kvm_ioregionfds_allowed =
+        (kvm_check_extension(s, KVM_CAP_IOREGIONFD) > 0);
+
     kvm_state = s;
 
     ret = kvm_arch_init(ms, s);
@@ -2585,6 +2705,12 @@  static int kvm_init(MachineState *ms)
         s->memory_listener.listener.eventfd_add = kvm_mem_ioeventfd_add;
         s->memory_listener.listener.eventfd_del = kvm_mem_ioeventfd_del;
     }
+
+    if (kvm_ioregionfds_allowed) {
+        s->memory_listener.listener.ioregionfd_add = kvm_mem_ioregionfd_add;
+        s->memory_listener.listener.ioregionfd_del = kvm_mem_ioregionfd_del;
+    }
+
     s->memory_listener.listener.coalesced_io_add = kvm_coalesce_mmio_region;
     s->memory_listener.listener.coalesced_io_del = kvm_uncoalesce_mmio_region;
 
@@ -2594,6 +2720,12 @@  static int kvm_init(MachineState *ms)
         memory_listener_register(&kvm_io_listener,
                                  &address_space_io);
     }
+
+    if (kvm_ioregionfds_allowed) {
+        memory_listener_register(&kvm_ioregion_listener,
+                                 &address_space_io);
+    }
+
     memory_listener_register(&kvm_coalesced_pio_listener,
                              &address_space_io);
 
diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c
index 5319573e00..d6caea8174 100644
--- a/accel/stubs/kvm-stub.c
+++ b/accel/stubs/kvm-stub.c
@@ -29,6 +29,7 @@  bool kvm_gsi_direct_mapping;
 bool kvm_allowed;
 bool kvm_readonly_mem_allowed;
 bool kvm_ioeventfd_any_length_allowed;
+bool kvm_ioregionfds_allowed;
 bool kvm_msi_use_devid;
 
 void kvm_flush_coalesced_mmio_buffer(void)