Message ID | 20090512182701.26131.66801.stgit@dev.haskins.net (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Gregory Haskins wrote: > iofd is a mechanism to register PIO/MMIO regions to trigger an eventfd > signal when written to. Userspace can register any arbitrary address > with a corresponding eventfd. > Ugg..this patch header sucks, especially given all the talk around how we need to do them better lately :) I will add this text as well for future versions: -------------- Traditional MMIO/PIO exit paths are expensive because they are done within the same context as the VCPU thread and therefore cause a VMX/SVM "heavy-weight" exit, a transition back to userspace, and overhead with the qemu processing of the operation. An eventfd mechanism, on the other hand, allows the VCPU to take a very brief lightweight exit only long enough to trigger the eventfd_signal. This means that clients of the eventfd (supporting both userspace or kernel end-points) can potentially get notified much more efficiently than if we were to register through the traditional mechanism via qemu MMIO/PIO notification. ----------- > Signed-off-by: Gregory Haskins <ghaskins@novell.com> > --- > > include/linux/kvm.h | 12 +++++ > include/linux/kvm_host.h | 2 + > virt/kvm/eventfd.c | 107 ++++++++++++++++++++++++++++++++++++++++++++++ > virt/kvm/kvm_main.c | 13 ++++++ > 4 files changed, 134 insertions(+), 0 deletions(-) > > diff --git a/include/linux/kvm.h b/include/linux/kvm.h > index dfc4bcc..99b6e45 100644 > --- a/include/linux/kvm.h > +++ b/include/linux/kvm.h > @@ -292,6 +292,17 @@ struct kvm_guest_debug { > struct kvm_guest_debug_arch arch; > }; > > +#define KVM_IOFD_FLAG_DEASSIGN (1 << 0) > +#define KVM_IOFD_FLAG_PIO (1 << 1) > + > +struct kvm_iofd { > + __u64 addr; > + __u32 len; > + __u32 fd; > + __u32 flags; > + __u8 pad[12]; > +}; > + > #define KVM_TRC_SHIFT 16 > /* > * kvm trace categories > @@ -508,6 +519,7 @@ struct kvm_irqfd { > #define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq) > #define KVM_ASSIGN_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd) > #define KVM_DEASSIGN_IRQFD _IOW(KVMIO, 0x77, __u32) > +#define KVM_IOFD _IOW(KVMIO, 0x78, struct kvm_iofd) > > /* > * ioctls for vcpu fds > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index 1acc528..d53cb70 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -529,5 +529,7 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {} > int kvm_assign_irqfd(struct kvm *kvm, int fd, int gsi, int flags); > int kvm_deassign_irqfd(struct kvm *kvm, int fd); > void kvm_irqfd_release(struct kvm *kvm); > +int kvm_iofd(struct kvm *kvm, unsigned long addr, size_t len, > + int fd, int flags); > > #endif > diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c > index 71afd62..8b23317 100644 > --- a/virt/kvm/eventfd.c > +++ b/virt/kvm/eventfd.c > @@ -21,12 +21,16 @@ > */ > > #include <linux/kvm_host.h> > +#include <linux/kvm.h> > #include <linux/workqueue.h> > #include <linux/syscalls.h> > #include <linux/wait.h> > #include <linux/poll.h> > #include <linux/file.h> > #include <linux/list.h> > +#include <linux/eventfd.h> > + > +#include "iodev.h" > > /* > * -------------------------------------------------------------------- > @@ -185,3 +189,106 @@ kvm_irqfd_release(struct kvm *kvm) > list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds, list) > irqfd_release(irqfd); > } > + > +/* > + * -------------------------------------------------------------------- > + * iofd: translate a PIO/MMIO memory write to an eventfd signal. > + * > + * userspace can register a PIO/MMIO address with an eventfd for recieving > + * notification when the memory has been touched. > + * -------------------------------------------------------------------- > + */ > + > +struct _iofd { > + u64 addr; > + size_t length; > + struct file *file; > + struct kvm_io_device dev; > +}; > + > +static int > +iofd_in_range(struct kvm_io_device *this, gpa_t addr, int len, int is_write) > +{ > + struct _iofd *iofd = (struct _iofd *)this->private; > + > + return ((addr >= iofd->addr && (addr < iofd->addr + iofd->length))); > +} > + > +/* writes trigger an event */ > +static void > +iofd_write(struct kvm_io_device *this, gpa_t addr, int len, const void *val) > +{ > + struct _iofd *iofd = (struct _iofd *)this->private; > + > + eventfd_signal(iofd->file, 1); > +} > + > +/* reads return all zeros */ > +static void > +iofd_read(struct kvm_io_device *this, gpa_t addr, int len, void *val) > +{ > + memset(val, 0, len); > +} > + > +static void > +iofd_destructor(struct kvm_io_device *this) > +{ > + struct _iofd *iofd = (struct _iofd *)this->private; > + > + fput(iofd->file); > + kfree(iofd); > +} > + > +static int > +kvm_assign_iofd(struct kvm *kvm, unsigned long addr, size_t len, > + int fd, int flags) > +{ > + int pio = flags & KVM_IOFD_FLAG_PIO; > + struct kvm_io_bus *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus; > + struct _iofd *iofd; > + struct file *file; > + > + file = eventfd_fget(fd); > + if (IS_ERR(file)) > + return PTR_ERR(file); > + > + iofd = kzalloc(sizeof(*iofd), GFP_KERNEL); > + if (!iofd) { > + fput(file); > + return -ENOMEM; > + } > + > + iofd->dev.read = iofd_read; > + iofd->dev.write = iofd_write; > + iofd->dev.in_range = iofd_in_range; > + iofd->dev.destructor = iofd_destructor; > + iofd->dev.private = iofd; > + > + iofd->addr = addr; > + iofd->length = len; > + iofd->file = file; > + > + kvm_io_bus_register_dev(bus, &iofd->dev); > + > + printk(KERN_DEBUG "registering %s iofd at %lx of size %d\n", > + pio ? "PIO" : "MMIO", addr, (int)len); > + > + return 0; > +} > + > +static int > +kvm_deassign_iofd(struct kvm *kvm, unsigned long addr, size_t len, > + int fd, int flags) > +{ > + /* FIXME: We need an io_bus_unregister() function */ > + return -EINVAL; > +} > + > +int > +kvm_iofd(struct kvm *kvm, unsigned long addr, size_t len, int fd, int flags) > +{ > + if (flags & KVM_IOFD_FLAG_DEASSIGN) > + return kvm_deassign_iofd(kvm, addr, len, fd, flags); > + > + return kvm_assign_iofd(kvm, addr, len, fd, flags); > +} > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index 7aa9f0a..a443974 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -2228,6 +2228,19 @@ static long kvm_vm_ioctl(struct file *filp, > r = kvm_deassign_irqfd(kvm, data); > break; > } > + case KVM_IOFD: { > + struct kvm_iofd entry; > + > + r = -EFAULT; > + if (copy_from_user(&entry, argp, sizeof entry)) > + goto out; > + > + r = kvm_iofd(kvm, entry.addr, entry.len, entry.fd, > + entry.flags); > + if (r) > + goto out; > + break; > + } > default: > r = kvm_arch_vm_ioctl(filp, ioctl, arg); > } > > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html >
Gregory Haskins wrote: > iofd is a mechanism to register PIO/MMIO regions to trigger an eventfd > signal when written to. Userspace can register any arbitrary address > with a corresponding eventfd. > > Please start a separate patchset for this so I can merge irqfd. > > diff --git a/include/linux/kvm.h b/include/linux/kvm.h > index dfc4bcc..99b6e45 100644 > --- a/include/linux/kvm.h > +++ b/include/linux/kvm.h > @@ -292,6 +292,17 @@ struct kvm_guest_debug { > struct kvm_guest_debug_arch arch; > }; > > +#define KVM_IOFD_FLAG_DEASSIGN (1 << 0) > +#define KVM_IOFD_FLAG_PIO (1 << 1) > + > +struct kvm_iofd { > + __u64 addr; > + __u32 len; > + __u32 fd; > + __u32 flags; > + __u8 pad[12]; > +}; > + > Please add a data match capability. virtio uses a write with the data containing the queue ID, and we want a separate event for each queue. > * kvm trace categories > @@ -508,6 +519,7 @@ struct kvm_irqfd { > #define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq) > #define KVM_ASSIGN_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd) > #define KVM_DEASSIGN_IRQFD _IOW(KVMIO, 0x77, __u32) > +#define KVM_IOFD _IOW(KVMIO, 0x78, struct kvm_iofd) > Too general a name. It's not doing IO, just sending out notifications. Why have assign/deassign for irqfd and a single ioctl for iofd? The rest looks good.
Avi Kivity wrote: > Gregory Haskins wrote: >> iofd is a mechanism to register PIO/MMIO regions to trigger an eventfd >> signal when written to. Userspace can register any arbitrary address >> with a corresponding eventfd. >> >> > > Please start a separate patchset for this so I can merge irqfd. Ack. Will spin a new split series with your irqfd review changes > >> >> diff --git a/include/linux/kvm.h b/include/linux/kvm.h >> index dfc4bcc..99b6e45 100644 >> --- a/include/linux/kvm.h >> +++ b/include/linux/kvm.h >> @@ -292,6 +292,17 @@ struct kvm_guest_debug { >> struct kvm_guest_debug_arch arch; >> }; >> >> +#define KVM_IOFD_FLAG_DEASSIGN (1 << 0) >> +#define KVM_IOFD_FLAG_PIO (1 << 1) >> + >> +struct kvm_iofd { >> + __u64 addr; >> + __u32 len; >> + __u32 fd; >> + __u32 flags; >> + __u8 pad[12]; >> +}; >> + >> > Please add a data match capability. virtio uses a write with the data > containing the queue ID, and we want a separate event for each queue. How about "u64 cookie" ? > > >> * kvm trace categories >> @@ -508,6 +519,7 @@ struct kvm_irqfd { >> #define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct >> kvm_assigned_irq) >> #define KVM_ASSIGN_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd) >> #define KVM_DEASSIGN_IRQFD _IOW(KVMIO, 0x77, __u32) >> +#define KVM_IOFD _IOW(KVMIO, 0x78, struct kvm_iofd) >> > > Too general a name. It's not doing IO, just sending out notifications. Hmm...good point. I was trying to reflect "[MM/P]IO-FD". How about "IOSIGNALFD" > > Why have assign/deassign for irqfd and a single ioctl for iofd? Heh.. irqfd "liked" two because the deassign only needed a u32. iofd needed more or less the same structure for both so I guess I thought I would be "slick" and condense the vectors. Will fix so they are symmetrical. > > The rest looks good. > I will also submit a patch to fix the io_bus stuff so that registrations can gracefully fail instead of BUG_ON(), and to provide an unregister function. Thanks Avi, -Greg
Gregory Haskins wrote: >>> +#define KVM_IOFD_FLAG_PIO (1 << 1) >>> + >>> +struct kvm_iofd { >>> + __u64 addr; >>> + __u32 len; >>> + __u32 fd; >>> + __u32 flags; >>> + __u8 pad[12]; >>> +}; >>> + >>> >>> >> Please add a data match capability. virtio uses a write with the data >> containing the queue ID, and we want a separate event for each queue. >> > > How about "u64 cookie" ? > Sure, and a bit in flags to enable it. >>> * kvm trace categories >>> @@ -508,6 +519,7 @@ struct kvm_irqfd { >>> #define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct >>> kvm_assigned_irq) >>> #define KVM_ASSIGN_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd) >>> #define KVM_DEASSIGN_IRQFD _IOW(KVMIO, 0x77, __u32) >>> +#define KVM_IOFD _IOW(KVMIO, 0x78, struct kvm_iofd) >>> >>> >> Too general a name. It's not doing IO, just sending out notifications. >> > > Hmm...good point. I was trying to reflect "[MM/P]IO-FD". How about > "IOSIGNALFD" > Okay. >> Why have assign/deassign for irqfd and a single ioctl for iofd? >> > Heh.. irqfd "liked" two because the deassign only needed a u32. iofd > needed more or less the same structure for both so I guess I thought I > would be "slick" and condense the vectors. Will fix so they are > symmetrical. > Yeah. You could have both use just one, or both use two. Not sure which is better.
diff --git a/include/linux/kvm.h b/include/linux/kvm.h index dfc4bcc..99b6e45 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -292,6 +292,17 @@ struct kvm_guest_debug { struct kvm_guest_debug_arch arch; }; +#define KVM_IOFD_FLAG_DEASSIGN (1 << 0) +#define KVM_IOFD_FLAG_PIO (1 << 1) + +struct kvm_iofd { + __u64 addr; + __u32 len; + __u32 fd; + __u32 flags; + __u8 pad[12]; +}; + #define KVM_TRC_SHIFT 16 /* * kvm trace categories @@ -508,6 +519,7 @@ struct kvm_irqfd { #define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq) #define KVM_ASSIGN_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd) #define KVM_DEASSIGN_IRQFD _IOW(KVMIO, 0x77, __u32) +#define KVM_IOFD _IOW(KVMIO, 0x78, struct kvm_iofd) /* * ioctls for vcpu fds diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1acc528..d53cb70 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -529,5 +529,7 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {} int kvm_assign_irqfd(struct kvm *kvm, int fd, int gsi, int flags); int kvm_deassign_irqfd(struct kvm *kvm, int fd); void kvm_irqfd_release(struct kvm *kvm); +int kvm_iofd(struct kvm *kvm, unsigned long addr, size_t len, + int fd, int flags); #endif diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 71afd62..8b23317 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -21,12 +21,16 @@ */ #include <linux/kvm_host.h> +#include <linux/kvm.h> #include <linux/workqueue.h> #include <linux/syscalls.h> #include <linux/wait.h> #include <linux/poll.h> #include <linux/file.h> #include <linux/list.h> +#include <linux/eventfd.h> + +#include "iodev.h" /* * -------------------------------------------------------------------- @@ -185,3 +189,106 @@ kvm_irqfd_release(struct kvm *kvm) list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds, list) irqfd_release(irqfd); } + +/* + * -------------------------------------------------------------------- + * iofd: translate a PIO/MMIO memory write to an eventfd signal. + * + * userspace can register a PIO/MMIO address with an eventfd for recieving + * notification when the memory has been touched. + * -------------------------------------------------------------------- + */ + +struct _iofd { + u64 addr; + size_t length; + struct file *file; + struct kvm_io_device dev; +}; + +static int +iofd_in_range(struct kvm_io_device *this, gpa_t addr, int len, int is_write) +{ + struct _iofd *iofd = (struct _iofd *)this->private; + + return ((addr >= iofd->addr && (addr < iofd->addr + iofd->length))); +} + +/* writes trigger an event */ +static void +iofd_write(struct kvm_io_device *this, gpa_t addr, int len, const void *val) +{ + struct _iofd *iofd = (struct _iofd *)this->private; + + eventfd_signal(iofd->file, 1); +} + +/* reads return all zeros */ +static void +iofd_read(struct kvm_io_device *this, gpa_t addr, int len, void *val) +{ + memset(val, 0, len); +} + +static void +iofd_destructor(struct kvm_io_device *this) +{ + struct _iofd *iofd = (struct _iofd *)this->private; + + fput(iofd->file); + kfree(iofd); +} + +static int +kvm_assign_iofd(struct kvm *kvm, unsigned long addr, size_t len, + int fd, int flags) +{ + int pio = flags & KVM_IOFD_FLAG_PIO; + struct kvm_io_bus *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus; + struct _iofd *iofd; + struct file *file; + + file = eventfd_fget(fd); + if (IS_ERR(file)) + return PTR_ERR(file); + + iofd = kzalloc(sizeof(*iofd), GFP_KERNEL); + if (!iofd) { + fput(file); + return -ENOMEM; + } + + iofd->dev.read = iofd_read; + iofd->dev.write = iofd_write; + iofd->dev.in_range = iofd_in_range; + iofd->dev.destructor = iofd_destructor; + iofd->dev.private = iofd; + + iofd->addr = addr; + iofd->length = len; + iofd->file = file; + + kvm_io_bus_register_dev(bus, &iofd->dev); + + printk(KERN_DEBUG "registering %s iofd at %lx of size %d\n", + pio ? "PIO" : "MMIO", addr, (int)len); + + return 0; +} + +static int +kvm_deassign_iofd(struct kvm *kvm, unsigned long addr, size_t len, + int fd, int flags) +{ + /* FIXME: We need an io_bus_unregister() function */ + return -EINVAL; +} + +int +kvm_iofd(struct kvm *kvm, unsigned long addr, size_t len, int fd, int flags) +{ + if (flags & KVM_IOFD_FLAG_DEASSIGN) + return kvm_deassign_iofd(kvm, addr, len, fd, flags); + + return kvm_assign_iofd(kvm, addr, len, fd, flags); +} diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7aa9f0a..a443974 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2228,6 +2228,19 @@ static long kvm_vm_ioctl(struct file *filp, r = kvm_deassign_irqfd(kvm, data); break; } + case KVM_IOFD: { + struct kvm_iofd entry; + + r = -EFAULT; + if (copy_from_user(&entry, argp, sizeof entry)) + goto out; + + r = kvm_iofd(kvm, entry.addr, entry.len, entry.fd, + entry.flags); + if (r) + goto out; + break; + } default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); }
iofd is a mechanism to register PIO/MMIO regions to trigger an eventfd signal when written to. Userspace can register any arbitrary address with a corresponding eventfd. Signed-off-by: Gregory Haskins <ghaskins@novell.com> --- include/linux/kvm.h | 12 +++++ include/linux/kvm_host.h | 2 + virt/kvm/eventfd.c | 107 ++++++++++++++++++++++++++++++++++++++++++++++ virt/kvm/kvm_main.c | 13 ++++++ 4 files changed, 134 insertions(+), 0 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html