Message ID | 20090726162225.GA21972@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Sun, Jul 26, 2009 at 07:22:25PM +0300, Michael S. Tsirkin wrote: > Here's an untested patch with partial support for level triggered > interrupts in irqfd. What this patch has: support for clearing interrupt > on ack. What this patch does not have: support signalling eventfd on ack > so that userspace can take action and e.g. reenable interrupt. > Do we want level triggered irqfd to be used only for device assignment? Because if not it is not appropriate to zero irq level on ack. And you have no other way to zero irq level?! Another note is that level triggered irqfd shouldn't use KVM_USERSPACE_IRQ_SOURCE_ID since it may be shared. > Gleb, Marcelo, I'd like your input on the approach taken wrt locking. > Does it look sane? > > Avi, how's the interface? I intend to also add an eventfd probably in > the padding in the irqfd struct. > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > > --- > > diff --git a/include/linux/kvm.h b/include/linux/kvm.h > index 230a91a..8bf16af 100644 > --- a/include/linux/kvm.h > +++ b/include/linux/kvm.h > @@ -488,6 +488,7 @@ struct kvm_x86_mce { > #endif > > #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) > +#define KVM_IRQFD_FLAG_LEVEL (1 << 1) > > struct kvm_irqfd { > __u32 fd; > diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c > index 99017e8..fcbf5b5 100644 > --- a/virt/kvm/eventfd.c > +++ b/virt/kvm/eventfd.c > @@ -45,12 +45,14 @@ struct _irqfd { > struct kvm *kvm; > struct eventfd_ctx *eventfd; > int gsi; > + int is_level; > struct list_head list; > poll_table pt; > wait_queue_head_t *wqh; > wait_queue_t wait; > struct work_struct inject; > struct work_struct shutdown; > + struct kvm_irq_ack_notifier kian; > }; > > static struct workqueue_struct *irqfd_cleanup_wq; > @@ -63,10 +65,15 @@ irqfd_inject(struct work_struct *work) > > mutex_lock(&kvm->irq_lock); > kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); > - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); > + if (!irqfd->is_level) > + kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); > mutex_unlock(&kvm->irq_lock); > } > > +static void irqfd_irq_acked(struct kvm_irq_ack_notifier *kian) > +{ > + kvm_set_irq(kian->kvm, KVM_USERSPACE_IRQ_SOURCE_ID, kian->gsi, 0); > +} > /* > * Race-free decouple logic (ordering is critical) > */ > @@ -87,6 +94,9 @@ irqfd_shutdown(struct work_struct *work) > */ > flush_work(&irqfd->inject); > > + if (irqfd->is_level) > + kvm_unregister_irq_ack_notifier(&irqfd->kian); > + > /* > * It is now safe to release the object's resources > */ > @@ -166,7 +176,7 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, > } > > static int > -kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) > +kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi, int is_level) > { > struct _irqfd *irqfd; > struct file *file = NULL; > @@ -180,6 +190,7 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) > > irqfd->kvm = kvm; > irqfd->gsi = gsi; > + irqfd->is_level = is_level; > INIT_LIST_HEAD(&irqfd->list); > INIT_WORK(&irqfd->inject, irqfd_inject); > INIT_WORK(&irqfd->shutdown, irqfd_shutdown); > @@ -198,6 +209,12 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) > > irqfd->eventfd = eventfd; > > + if (is_level) { > + irqfd->kian.gsi = gsi; > + irqfd->kian.irq_acked = irqfd_irq_acked; > + kvm_register_irq_ack_notifier(&irqfd->kian); > + } > + > /* > * Install our own custom wake-up handling so we are notified via > * a callback whenever someone signals the underlying eventfd > @@ -281,10 +298,13 @@ kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi) > int > kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) > { > + if (flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_LEVEL)) > + return -EINVAL; > + > if (flags & KVM_IRQFD_FLAG_DEASSIGN) > return kvm_irqfd_deassign(kvm, fd, gsi); > > - return kvm_irqfd_assign(kvm, fd, gsi); > + return kvm_irqfd_assign(kvm, fd, gsi, !!(flags & KVM_IRQFD_FLAG_LEVEL)); > } > > /* -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Jul 27, 2009 at 08:33:12AM +0300, Gleb Natapov wrote: > On Sun, Jul 26, 2009 at 07:22:25PM +0300, Michael S. Tsirkin wrote: > > Here's an untested patch with partial support for level triggered > > interrupts in irqfd. What this patch has: support for clearing interrupt > > on ack. What this patch does not have: support signalling eventfd on ack > > so that userspace can take action and e.g. reenable interrupt. > > > Do we want level triggered irqfd to be used only for device assignment? generally level triggered-not only. But irqfd is promarily for device assignment and emulated devices. > Because if not it is not appropriate to zero irq level on ack. The bit that is missing in this patch, is that we'll signal eventfd after we zero irq. Userspace polls that and re-asserts irq. > And you > have no other way to zero irq level?! The reason is interrupt sharing: device can assert interrupt but can not clear it by itself. qemu can still use the SET_IRQ ioctl if it wants full control. > Another note is that level > triggered irqfd shouldn't use KVM_USERSPACE_IRQ_SOURCE_ID since it may > be shared. Since we zero on ack, I think this is not a problem. > > Gleb, Marcelo, I'd like your input on the approach taken wrt locking. > > Does it look sane? > > > > Avi, how's the interface? I intend to also add an eventfd probably in > > the padding in the irqfd struct. > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > > > > --- > > > > diff --git a/include/linux/kvm.h b/include/linux/kvm.h > > index 230a91a..8bf16af 100644 > > --- a/include/linux/kvm.h > > +++ b/include/linux/kvm.h > > @@ -488,6 +488,7 @@ struct kvm_x86_mce { > > #endif > > > > #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) > > +#define KVM_IRQFD_FLAG_LEVEL (1 << 1) > > > > struct kvm_irqfd { > > __u32 fd; > > diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c > > index 99017e8..fcbf5b5 100644 > > --- a/virt/kvm/eventfd.c > > +++ b/virt/kvm/eventfd.c > > @@ -45,12 +45,14 @@ struct _irqfd { > > struct kvm *kvm; > > struct eventfd_ctx *eventfd; > > int gsi; > > + int is_level; > > struct list_head list; > > poll_table pt; > > wait_queue_head_t *wqh; > > wait_queue_t wait; > > struct work_struct inject; > > struct work_struct shutdown; > > + struct kvm_irq_ack_notifier kian; > > }; > > > > static struct workqueue_struct *irqfd_cleanup_wq; > > @@ -63,10 +65,15 @@ irqfd_inject(struct work_struct *work) > > > > mutex_lock(&kvm->irq_lock); > > kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); > > - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); > > + if (!irqfd->is_level) > > + kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); > > mutex_unlock(&kvm->irq_lock); > > } > > > > +static void irqfd_irq_acked(struct kvm_irq_ack_notifier *kian) > > +{ > > + kvm_set_irq(kian->kvm, KVM_USERSPACE_IRQ_SOURCE_ID, kian->gsi, 0); > > +} > > /* > > * Race-free decouple logic (ordering is critical) > > */ > > @@ -87,6 +94,9 @@ irqfd_shutdown(struct work_struct *work) > > */ > > flush_work(&irqfd->inject); > > > > + if (irqfd->is_level) > > + kvm_unregister_irq_ack_notifier(&irqfd->kian); > > + > > /* > > * It is now safe to release the object's resources > > */ > > @@ -166,7 +176,7 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, > > } > > > > static int > > -kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) > > +kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi, int is_level) > > { > > struct _irqfd *irqfd; > > struct file *file = NULL; > > @@ -180,6 +190,7 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) > > > > irqfd->kvm = kvm; > > irqfd->gsi = gsi; > > + irqfd->is_level = is_level; > > INIT_LIST_HEAD(&irqfd->list); > > INIT_WORK(&irqfd->inject, irqfd_inject); > > INIT_WORK(&irqfd->shutdown, irqfd_shutdown); > > @@ -198,6 +209,12 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) > > > > irqfd->eventfd = eventfd; > > > > + if (is_level) { > > + irqfd->kian.gsi = gsi; > > + irqfd->kian.irq_acked = irqfd_irq_acked; > > + kvm_register_irq_ack_notifier(&irqfd->kian); > > + } > > + > > /* > > * Install our own custom wake-up handling so we are notified via > > * a callback whenever someone signals the underlying eventfd > > @@ -281,10 +298,13 @@ kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi) > > int > > kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) > > { > > + if (flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_LEVEL)) > > + return -EINVAL; > > + > > if (flags & KVM_IRQFD_FLAG_DEASSIGN) > > return kvm_irqfd_deassign(kvm, fd, gsi); > > > > - return kvm_irqfd_assign(kvm, fd, gsi); > > + return kvm_irqfd_assign(kvm, fd, gsi, !!(flags & KVM_IRQFD_FLAG_LEVEL)); > > } > > > > /* > > -- > Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Jul 27, 2009 at 09:28:34AM +0300, Michael S. Tsirkin wrote: > On Mon, Jul 27, 2009 at 08:33:12AM +0300, Gleb Natapov wrote: > > On Sun, Jul 26, 2009 at 07:22:25PM +0300, Michael S. Tsirkin wrote: > > > Here's an untested patch with partial support for level triggered > > > interrupts in irqfd. What this patch has: support for clearing interrupt > > > on ack. What this patch does not have: support signalling eventfd on ack > > > so that userspace can take action and e.g. reenable interrupt. > > > > > Do we want level triggered irqfd to be used only for device assignment? > > generally level triggered-not only. > But irqfd is promarily for device assignment and emulated devices. > If it is for emulated device zeroing irq level on ack is not acceptable. > > Because if not it is not appropriate to zero irq level on ack. > > The bit that is missing in this patch, is that we'll signal eventfd > after we zero irq. Userspace polls that and re-asserts irq. That is not needed for device emulation. Emulated device should lower irq line by itself. It know exactly when to do it. Please don't push broken device assignment model to emulated devices. > > > And you > > have no other way to zero irq level?! > > The reason is interrupt sharing: device can assert interrupt > but can not clear it by itself. For interrupt sharing to work you need to allocate source id for each irqfd. > qemu can still use the SET_IRQ ioctl if it wants full control. > > > > Another note is that level > > triggered irqfd shouldn't use KVM_USERSPACE_IRQ_SOURCE_ID since it may > > be shared. > > Since we zero on ack, I think this is not a problem. > We will not zero on ack for emulated devices. Please don't abuse ack notifiers. Even if you zero on ack this is still the problem BTW since other device may lower your line level. > > > Gleb, Marcelo, I'd like your input on the approach taken wrt locking. > > > Does it look sane? > > > > > > Avi, how's the interface? I intend to also add an eventfd probably in > > > the padding in the irqfd struct. > > > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > > > > > > --- > > > > > > diff --git a/include/linux/kvm.h b/include/linux/kvm.h > > > index 230a91a..8bf16af 100644 > > > --- a/include/linux/kvm.h > > > +++ b/include/linux/kvm.h > > > @@ -488,6 +488,7 @@ struct kvm_x86_mce { > > > #endif > > > > > > #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) > > > +#define KVM_IRQFD_FLAG_LEVEL (1 << 1) > > > > > > struct kvm_irqfd { > > > __u32 fd; > > > diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c > > > index 99017e8..fcbf5b5 100644 > > > --- a/virt/kvm/eventfd.c > > > +++ b/virt/kvm/eventfd.c > > > @@ -45,12 +45,14 @@ struct _irqfd { > > > struct kvm *kvm; > > > struct eventfd_ctx *eventfd; > > > int gsi; > > > + int is_level; > > > struct list_head list; > > > poll_table pt; > > > wait_queue_head_t *wqh; > > > wait_queue_t wait; > > > struct work_struct inject; > > > struct work_struct shutdown; > > > + struct kvm_irq_ack_notifier kian; > > > }; > > > > > > static struct workqueue_struct *irqfd_cleanup_wq; > > > @@ -63,10 +65,15 @@ irqfd_inject(struct work_struct *work) > > > > > > mutex_lock(&kvm->irq_lock); > > > kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); > > > - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); > > > + if (!irqfd->is_level) > > > + kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); > > > mutex_unlock(&kvm->irq_lock); > > > } > > > > > > +static void irqfd_irq_acked(struct kvm_irq_ack_notifier *kian) > > > +{ > > > + kvm_set_irq(kian->kvm, KVM_USERSPACE_IRQ_SOURCE_ID, kian->gsi, 0); > > > +} > > > /* > > > * Race-free decouple logic (ordering is critical) > > > */ > > > @@ -87,6 +94,9 @@ irqfd_shutdown(struct work_struct *work) > > > */ > > > flush_work(&irqfd->inject); > > > > > > + if (irqfd->is_level) > > > + kvm_unregister_irq_ack_notifier(&irqfd->kian); > > > + > > > /* > > > * It is now safe to release the object's resources > > > */ > > > @@ -166,7 +176,7 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, > > > } > > > > > > static int > > > -kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) > > > +kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi, int is_level) > > > { > > > struct _irqfd *irqfd; > > > struct file *file = NULL; > > > @@ -180,6 +190,7 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) > > > > > > irqfd->kvm = kvm; > > > irqfd->gsi = gsi; > > > + irqfd->is_level = is_level; > > > INIT_LIST_HEAD(&irqfd->list); > > > INIT_WORK(&irqfd->inject, irqfd_inject); > > > INIT_WORK(&irqfd->shutdown, irqfd_shutdown); > > > @@ -198,6 +209,12 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) > > > > > > irqfd->eventfd = eventfd; > > > > > > + if (is_level) { > > > + irqfd->kian.gsi = gsi; > > > + irqfd->kian.irq_acked = irqfd_irq_acked; > > > + kvm_register_irq_ack_notifier(&irqfd->kian); > > > + } > > > + > > > /* > > > * Install our own custom wake-up handling so we are notified via > > > * a callback whenever someone signals the underlying eventfd > > > @@ -281,10 +298,13 @@ kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi) > > > int > > > kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) > > > { > > > + if (flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_LEVEL)) > > > + return -EINVAL; > > > + > > > if (flags & KVM_IRQFD_FLAG_DEASSIGN) > > > return kvm_irqfd_deassign(kvm, fd, gsi); > > > > > > - return kvm_irqfd_assign(kvm, fd, gsi); > > > + return kvm_irqfd_assign(kvm, fd, gsi, !!(flags & KVM_IRQFD_FLAG_LEVEL)); > > > } > > > > > > /* > > > > -- > > Gleb. -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Jul 27, 2009 at 10:31:38AM +0300, Gleb Natapov wrote: > On Mon, Jul 27, 2009 at 09:28:34AM +0300, Michael S. Tsirkin wrote: > > On Mon, Jul 27, 2009 at 08:33:12AM +0300, Gleb Natapov wrote: > > > On Sun, Jul 26, 2009 at 07:22:25PM +0300, Michael S. Tsirkin wrote: > > > > Here's an untested patch with partial support for level triggered > > > > interrupts in irqfd. What this patch has: support for clearing interrupt > > > > on ack. What this patch does not have: support signalling eventfd on ack > > > > so that userspace can take action and e.g. reenable interrupt. > > > > > > > Do we want level triggered irqfd to be used only for device assignment? > > > > generally level triggered-not only. > > But irqfd is promarily for device assignment and emulated devices. > > > If it is for emulated device zeroing irq level on ack is not acceptable. OK, forget about emulated devices for now. Let's focus on assigned devices. > > > Because if not it is not appropriate to zero irq level on ack. > > > > The bit that is missing in this patch, is that we'll signal eventfd > > after we zero irq. Userspace polls that and re-asserts irq. > That is not needed for device emulation. Emulated device should lower > irq line by itself. It know exactly when to do it. Please don't push > broken device assignment model to emulated devices. > > > > > > And you > > > have no other way to zero irq level?! > > > > The reason is interrupt sharing: device can assert interrupt > > but can not clear it by itself. > For interrupt sharing to work you need to allocate source id for each > irqfd. BTW, current SET_IRQ_LINE interface uses a common source id for all irqs. Does this mean that interrupt sharing in guest is broken now? > > qemu can still use the SET_IRQ ioctl if it wants full control. > > > > > > > Another note is that level > > > triggered irqfd shouldn't use KVM_USERSPACE_IRQ_SOURCE_ID since it may > > > be shared. > > > > Since we zero on ack, I think this is not a problem. > > > We will not zero on ack for emulated devices. Please don't abuse ack > notifiers. Even if you zero on ack this is still the problem BTW since > other device may lower your line level. > > > > > Gleb, Marcelo, I'd like your input on the approach taken wrt locking. > > > > Does it look sane? > > > > > > > > Avi, how's the interface? I intend to also add an eventfd probably in > > > > the padding in the irqfd struct. > > > > > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > > > > > > > > --- > > > > > > > > diff --git a/include/linux/kvm.h b/include/linux/kvm.h > > > > index 230a91a..8bf16af 100644 > > > > --- a/include/linux/kvm.h > > > > +++ b/include/linux/kvm.h > > > > @@ -488,6 +488,7 @@ struct kvm_x86_mce { > > > > #endif > > > > > > > > #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) > > > > +#define KVM_IRQFD_FLAG_LEVEL (1 << 1) > > > > > > > > struct kvm_irqfd { > > > > __u32 fd; > > > > diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c > > > > index 99017e8..fcbf5b5 100644 > > > > --- a/virt/kvm/eventfd.c > > > > +++ b/virt/kvm/eventfd.c > > > > @@ -45,12 +45,14 @@ struct _irqfd { > > > > struct kvm *kvm; > > > > struct eventfd_ctx *eventfd; > > > > int gsi; > > > > + int is_level; > > > > struct list_head list; > > > > poll_table pt; > > > > wait_queue_head_t *wqh; > > > > wait_queue_t wait; > > > > struct work_struct inject; > > > > struct work_struct shutdown; > > > > + struct kvm_irq_ack_notifier kian; > > > > }; > > > > > > > > static struct workqueue_struct *irqfd_cleanup_wq; > > > > @@ -63,10 +65,15 @@ irqfd_inject(struct work_struct *work) > > > > > > > > mutex_lock(&kvm->irq_lock); > > > > kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); > > > > - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); > > > > + if (!irqfd->is_level) > > > > + kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); > > > > mutex_unlock(&kvm->irq_lock); > > > > } > > > > > > > > +static void irqfd_irq_acked(struct kvm_irq_ack_notifier *kian) > > > > +{ > > > > + kvm_set_irq(kian->kvm, KVM_USERSPACE_IRQ_SOURCE_ID, kian->gsi, 0); > > > > +} > > > > /* > > > > * Race-free decouple logic (ordering is critical) > > > > */ > > > > @@ -87,6 +94,9 @@ irqfd_shutdown(struct work_struct *work) > > > > */ > > > > flush_work(&irqfd->inject); > > > > > > > > + if (irqfd->is_level) > > > > + kvm_unregister_irq_ack_notifier(&irqfd->kian); > > > > + > > > > /* > > > > * It is now safe to release the object's resources > > > > */ > > > > @@ -166,7 +176,7 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, > > > > } > > > > > > > > static int > > > > -kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) > > > > +kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi, int is_level) > > > > { > > > > struct _irqfd *irqfd; > > > > struct file *file = NULL; > > > > @@ -180,6 +190,7 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) > > > > > > > > irqfd->kvm = kvm; > > > > irqfd->gsi = gsi; > > > > + irqfd->is_level = is_level; > > > > INIT_LIST_HEAD(&irqfd->list); > > > > INIT_WORK(&irqfd->inject, irqfd_inject); > > > > INIT_WORK(&irqfd->shutdown, irqfd_shutdown); > > > > @@ -198,6 +209,12 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) > > > > > > > > irqfd->eventfd = eventfd; > > > > > > > > + if (is_level) { > > > > + irqfd->kian.gsi = gsi; > > > > + irqfd->kian.irq_acked = irqfd_irq_acked; > > > > + kvm_register_irq_ack_notifier(&irqfd->kian); > > > > + } > > > > + > > > > /* > > > > * Install our own custom wake-up handling so we are notified via > > > > * a callback whenever someone signals the underlying eventfd > > > > @@ -281,10 +298,13 @@ kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi) > > > > int > > > > kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) > > > > { > > > > + if (flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_LEVEL)) > > > > + return -EINVAL; > > > > + > > > > if (flags & KVM_IRQFD_FLAG_DEASSIGN) > > > > return kvm_irqfd_deassign(kvm, fd, gsi); > > > > > > > > - return kvm_irqfd_assign(kvm, fd, gsi); > > > > + return kvm_irqfd_assign(kvm, fd, gsi, !!(flags & KVM_IRQFD_FLAG_LEVEL)); > > > > } > > > > > > > > /* > > > > > > -- > > > Gleb. > > -- > Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Jul 27, 2009 at 11:02:06AM +0300, Michael S. Tsirkin wrote: > On Mon, Jul 27, 2009 at 10:31:38AM +0300, Gleb Natapov wrote: > > On Mon, Jul 27, 2009 at 09:28:34AM +0300, Michael S. Tsirkin wrote: > > > On Mon, Jul 27, 2009 at 08:33:12AM +0300, Gleb Natapov wrote: > > > > On Sun, Jul 26, 2009 at 07:22:25PM +0300, Michael S. Tsirkin wrote: > > > > > Here's an untested patch with partial support for level triggered > > > > > interrupts in irqfd. What this patch has: support for clearing interrupt > > > > > on ack. What this patch does not have: support signalling eventfd on ack > > > > > so that userspace can take action and e.g. reenable interrupt. > > > > > > > > > Do we want level triggered irqfd to be used only for device assignment? > > > > > > generally level triggered-not only. > > > But irqfd is promarily for device assignment and emulated devices. > > > > > If it is for emulated device zeroing irq level on ack is not acceptable. > > OK, forget about emulated devices for now. Let's focus on assigned > devices. > Can't. We should design interface that works for everything. > > > > Because if not it is not appropriate to zero irq level on ack. > > > > > > The bit that is missing in this patch, is that we'll signal eventfd > > > after we zero irq. Userspace polls that and re-asserts irq. > > That is not needed for device emulation. Emulated device should lower > > irq line by itself. It know exactly when to do it. Please don't push > > broken device assignment model to emulated devices. > > > > > > > > > And you > > > > have no other way to zero irq level?! > > > > > > The reason is interrupt sharing: device can assert interrupt > > > but can not clear it by itself. > > For interrupt sharing to work you need to allocate source id for each > > irqfd. > > BTW, current SET_IRQ_LINE interface uses a common source id for all > irqs. Does this mean that interrupt sharing in guest is broken now? No. It means that sharing in handled in userspace by chipset emulation. > > > > qemu can still use the SET_IRQ ioctl if it wants full control. > > > > > > > > > > Another note is that level > > > > triggered irqfd shouldn't use KVM_USERSPACE_IRQ_SOURCE_ID since it may > > > > be shared. > > > > > > Since we zero on ack, I think this is not a problem. > > > > > We will not zero on ack for emulated devices. Please don't abuse ack > > notifiers. Even if you zero on ack this is still the problem BTW since > > other device may lower your line level. > > > > > > > Gleb, Marcelo, I'd like your input on the approach taken wrt locking. > > > > > Does it look sane? > > > > > > > > > > Avi, how's the interface? I intend to also add an eventfd probably in > > > > > the padding in the irqfd struct. > > > > > > > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > > > > > > > > > > --- > > > > > > > > > > diff --git a/include/linux/kvm.h b/include/linux/kvm.h > > > > > index 230a91a..8bf16af 100644 > > > > > --- a/include/linux/kvm.h > > > > > +++ b/include/linux/kvm.h > > > > > @@ -488,6 +488,7 @@ struct kvm_x86_mce { > > > > > #endif > > > > > > > > > > #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) > > > > > +#define KVM_IRQFD_FLAG_LEVEL (1 << 1) > > > > > > > > > > struct kvm_irqfd { > > > > > __u32 fd; > > > > > diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c > > > > > index 99017e8..fcbf5b5 100644 > > > > > --- a/virt/kvm/eventfd.c > > > > > +++ b/virt/kvm/eventfd.c > > > > > @@ -45,12 +45,14 @@ struct _irqfd { > > > > > struct kvm *kvm; > > > > > struct eventfd_ctx *eventfd; > > > > > int gsi; > > > > > + int is_level; > > > > > struct list_head list; > > > > > poll_table pt; > > > > > wait_queue_head_t *wqh; > > > > > wait_queue_t wait; > > > > > struct work_struct inject; > > > > > struct work_struct shutdown; > > > > > + struct kvm_irq_ack_notifier kian; > > > > > }; > > > > > > > > > > static struct workqueue_struct *irqfd_cleanup_wq; > > > > > @@ -63,10 +65,15 @@ irqfd_inject(struct work_struct *work) > > > > > > > > > > mutex_lock(&kvm->irq_lock); > > > > > kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); > > > > > - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); > > > > > + if (!irqfd->is_level) > > > > > + kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); > > > > > mutex_unlock(&kvm->irq_lock); > > > > > } > > > > > > > > > > +static void irqfd_irq_acked(struct kvm_irq_ack_notifier *kian) > > > > > +{ > > > > > + kvm_set_irq(kian->kvm, KVM_USERSPACE_IRQ_SOURCE_ID, kian->gsi, 0); > > > > > +} > > > > > /* > > > > > * Race-free decouple logic (ordering is critical) > > > > > */ > > > > > @@ -87,6 +94,9 @@ irqfd_shutdown(struct work_struct *work) > > > > > */ > > > > > flush_work(&irqfd->inject); > > > > > > > > > > + if (irqfd->is_level) > > > > > + kvm_unregister_irq_ack_notifier(&irqfd->kian); > > > > > + > > > > > /* > > > > > * It is now safe to release the object's resources > > > > > */ > > > > > @@ -166,7 +176,7 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, > > > > > } > > > > > > > > > > static int > > > > > -kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) > > > > > +kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi, int is_level) > > > > > { > > > > > struct _irqfd *irqfd; > > > > > struct file *file = NULL; > > > > > @@ -180,6 +190,7 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) > > > > > > > > > > irqfd->kvm = kvm; > > > > > irqfd->gsi = gsi; > > > > > + irqfd->is_level = is_level; > > > > > INIT_LIST_HEAD(&irqfd->list); > > > > > INIT_WORK(&irqfd->inject, irqfd_inject); > > > > > INIT_WORK(&irqfd->shutdown, irqfd_shutdown); > > > > > @@ -198,6 +209,12 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) > > > > > > > > > > irqfd->eventfd = eventfd; > > > > > > > > > > + if (is_level) { > > > > > + irqfd->kian.gsi = gsi; > > > > > + irqfd->kian.irq_acked = irqfd_irq_acked; > > > > > + kvm_register_irq_ack_notifier(&irqfd->kian); > > > > > + } > > > > > + > > > > > /* > > > > > * Install our own custom wake-up handling so we are notified via > > > > > * a callback whenever someone signals the underlying eventfd > > > > > @@ -281,10 +298,13 @@ kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi) > > > > > int > > > > > kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) > > > > > { > > > > > + if (flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_LEVEL)) > > > > > + return -EINVAL; > > > > > + > > > > > if (flags & KVM_IRQFD_FLAG_DEASSIGN) > > > > > return kvm_irqfd_deassign(kvm, fd, gsi); > > > > > > > > > > - return kvm_irqfd_assign(kvm, fd, gsi); > > > > > + return kvm_irqfd_assign(kvm, fd, gsi, !!(flags & KVM_IRQFD_FLAG_LEVEL)); > > > > > } > > > > > > > > > > /* > > > > > > > > -- > > > > Gleb. > > > > -- > > Gleb. -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Jul 27, 2009 at 11:13:50AM +0300, Gleb Natapov wrote: > On Mon, Jul 27, 2009 at 11:02:06AM +0300, Michael S. Tsirkin wrote: > > On Mon, Jul 27, 2009 at 10:31:38AM +0300, Gleb Natapov wrote: > > > On Mon, Jul 27, 2009 at 09:28:34AM +0300, Michael S. Tsirkin wrote: > > > > On Mon, Jul 27, 2009 at 08:33:12AM +0300, Gleb Natapov wrote: > > > > > On Sun, Jul 26, 2009 at 07:22:25PM +0300, Michael S. Tsirkin wrote: > > > > > > Here's an untested patch with partial support for level triggered > > > > > > interrupts in irqfd. What this patch has: support for clearing interrupt > > > > > > on ack. What this patch does not have: support signalling eventfd on ack > > > > > > so that userspace can take action and e.g. reenable interrupt. > > > > > > > > > > > Do we want level triggered irqfd to be used only for device assignment? > > > > > > > > generally level triggered-not only. > > > > But irqfd is promarily for device assignment and emulated devices. > > > > > > > If it is for emulated device zeroing irq level on ack is not acceptable. > > > > OK, forget about emulated devices for now. Let's focus on assigned > > devices. > > > Can't. We should design interface that works for everything. We can always add the KVM_MAKE_COFFEE ioctl later. > > > > > Because if not it is not appropriate to zero irq level on ack. > > > > > > > > The bit that is missing in this patch, is that we'll signal eventfd > > > > after we zero irq. Userspace polls that and re-asserts irq. > > > That is not needed for device emulation. Emulated device should lower > > > irq line by itself. It know exactly when to do it. Please don't push > > > broken device assignment model to emulated devices. > > > > > > > > > > > > And you > > > > > have no other way to zero irq level?! > > > > > > > > The reason is interrupt sharing: device can assert interrupt > > > > but can not clear it by itself. > > > For interrupt sharing to work you need to allocate source id for each > > > irqfd. > > > > BTW, current SET_IRQ_LINE interface uses a common source id for all > > irqs. Does this mean that interrupt sharing in guest is broken now? > No. It means that sharing in handled in userspace by chipset emulation. Aha. Where's that code in qemu-kvm? I only see it forwarding to the ioctl call directly. And what about assigned devices? These bypass qemu. > > > > > > qemu can still use the SET_IRQ ioctl if it wants full control. > > > > > > > > > > > > > Another note is that level > > > > > triggered irqfd shouldn't use KVM_USERSPACE_IRQ_SOURCE_ID since it may > > > > > be shared. > > > > > > > > Since we zero on ack, I think this is not a problem. > > > > > > > We will not zero on ack for emulated devices. Please don't abuse ack > > > notifiers. Even if you zero on ack this is still the problem BTW since > > > other device may lower your line level. > > > > > > > > > Gleb, Marcelo, I'd like your input on the approach taken wrt locking. > > > > > > Does it look sane? > > > > > > > > > > > > Avi, how's the interface? I intend to also add an eventfd probably in > > > > > > the padding in the irqfd struct. > > > > > > > > > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > > > > > > > > > > > > --- > > > > > > > > > > > > diff --git a/include/linux/kvm.h b/include/linux/kvm.h > > > > > > index 230a91a..8bf16af 100644 > > > > > > --- a/include/linux/kvm.h > > > > > > +++ b/include/linux/kvm.h > > > > > > @@ -488,6 +488,7 @@ struct kvm_x86_mce { > > > > > > #endif > > > > > > > > > > > > #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) > > > > > > +#define KVM_IRQFD_FLAG_LEVEL (1 << 1) > > > > > > > > > > > > struct kvm_irqfd { > > > > > > __u32 fd; > > > > > > diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c > > > > > > index 99017e8..fcbf5b5 100644 > > > > > > --- a/virt/kvm/eventfd.c > > > > > > +++ b/virt/kvm/eventfd.c > > > > > > @@ -45,12 +45,14 @@ struct _irqfd { > > > > > > struct kvm *kvm; > > > > > > struct eventfd_ctx *eventfd; > > > > > > int gsi; > > > > > > + int is_level; > > > > > > struct list_head list; > > > > > > poll_table pt; > > > > > > wait_queue_head_t *wqh; > > > > > > wait_queue_t wait; > > > > > > struct work_struct inject; > > > > > > struct work_struct shutdown; > > > > > > + struct kvm_irq_ack_notifier kian; > > > > > > }; > > > > > > > > > > > > static struct workqueue_struct *irqfd_cleanup_wq; > > > > > > @@ -63,10 +65,15 @@ irqfd_inject(struct work_struct *work) > > > > > > > > > > > > mutex_lock(&kvm->irq_lock); > > > > > > kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); > > > > > > - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); > > > > > > + if (!irqfd->is_level) > > > > > > + kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); > > > > > > mutex_unlock(&kvm->irq_lock); > > > > > > } > > > > > > > > > > > > +static void irqfd_irq_acked(struct kvm_irq_ack_notifier *kian) > > > > > > +{ > > > > > > + kvm_set_irq(kian->kvm, KVM_USERSPACE_IRQ_SOURCE_ID, kian->gsi, 0); > > > > > > +} > > > > > > /* > > > > > > * Race-free decouple logic (ordering is critical) > > > > > > */ > > > > > > @@ -87,6 +94,9 @@ irqfd_shutdown(struct work_struct *work) > > > > > > */ > > > > > > flush_work(&irqfd->inject); > > > > > > > > > > > > + if (irqfd->is_level) > > > > > > + kvm_unregister_irq_ack_notifier(&irqfd->kian); > > > > > > + > > > > > > /* > > > > > > * It is now safe to release the object's resources > > > > > > */ > > > > > > @@ -166,7 +176,7 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, > > > > > > } > > > > > > > > > > > > static int > > > > > > -kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) > > > > > > +kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi, int is_level) > > > > > > { > > > > > > struct _irqfd *irqfd; > > > > > > struct file *file = NULL; > > > > > > @@ -180,6 +190,7 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) > > > > > > > > > > > > irqfd->kvm = kvm; > > > > > > irqfd->gsi = gsi; > > > > > > + irqfd->is_level = is_level; > > > > > > INIT_LIST_HEAD(&irqfd->list); > > > > > > INIT_WORK(&irqfd->inject, irqfd_inject); > > > > > > INIT_WORK(&irqfd->shutdown, irqfd_shutdown); > > > > > > @@ -198,6 +209,12 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) > > > > > > > > > > > > irqfd->eventfd = eventfd; > > > > > > > > > > > > + if (is_level) { > > > > > > + irqfd->kian.gsi = gsi; > > > > > > + irqfd->kian.irq_acked = irqfd_irq_acked; > > > > > > + kvm_register_irq_ack_notifier(&irqfd->kian); > > > > > > + } > > > > > > + > > > > > > /* > > > > > > * Install our own custom wake-up handling so we are notified via > > > > > > * a callback whenever someone signals the underlying eventfd > > > > > > @@ -281,10 +298,13 @@ kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi) > > > > > > int > > > > > > kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) > > > > > > { > > > > > > + if (flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_LEVEL)) > > > > > > + return -EINVAL; > > > > > > + > > > > > > if (flags & KVM_IRQFD_FLAG_DEASSIGN) > > > > > > return kvm_irqfd_deassign(kvm, fd, gsi); > > > > > > > > > > > > - return kvm_irqfd_assign(kvm, fd, gsi); > > > > > > + return kvm_irqfd_assign(kvm, fd, gsi, !!(flags & KVM_IRQFD_FLAG_LEVEL)); > > > > > > } > > > > > > > > > > > > /* > > > > > > > > > > -- > > > > > Gleb. > > > > > > -- > > > Gleb. > > -- > Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Jul 27, 2009 at 11:21:39AM +0300, Michael S. Tsirkin wrote: > On Mon, Jul 27, 2009 at 11:13:50AM +0300, Gleb Natapov wrote: > > On Mon, Jul 27, 2009 at 11:02:06AM +0300, Michael S. Tsirkin wrote: > > > On Mon, Jul 27, 2009 at 10:31:38AM +0300, Gleb Natapov wrote: > > > > On Mon, Jul 27, 2009 at 09:28:34AM +0300, Michael S. Tsirkin wrote: > > > > > On Mon, Jul 27, 2009 at 08:33:12AM +0300, Gleb Natapov wrote: > > > > > > On Sun, Jul 26, 2009 at 07:22:25PM +0300, Michael S. Tsirkin wrote: > > > > > > > Here's an untested patch with partial support for level triggered > > > > > > > interrupts in irqfd. What this patch has: support for clearing interrupt > > > > > > > on ack. What this patch does not have: support signalling eventfd on ack > > > > > > > so that userspace can take action and e.g. reenable interrupt. > > > > > > > > > > > > > Do we want level triggered irqfd to be used only for device assignment? > > > > > > > > > > generally level triggered-not only. > > > > > But irqfd is promarily for device assignment and emulated devices. > > > > > > > > > If it is for emulated device zeroing irq level on ack is not acceptable. > > > > > > OK, forget about emulated devices for now. Let's focus on assigned > > > devices. > > > > > Can't. We should design interface that works for everything. > > We can always add the KVM_MAKE_COFFEE ioctl later. > Assigned devices are non interesting case, so we can hack something afterwards to make it work. Emulated devices is a normal case that should work when interface is introduced. Why assigning devices through the kernel is not good enough? What is the reason to push it to userspace? > > > > > > Because if not it is not appropriate to zero irq level on ack. > > > > > > > > > > The bit that is missing in this patch, is that we'll signal eventfd > > > > > after we zero irq. Userspace polls that and re-asserts irq. > > > > That is not needed for device emulation. Emulated device should lower > > > > irq line by itself. It know exactly when to do it. Please don't push > > > > broken device assignment model to emulated devices. > > > > > > > > > > > > > > > And you > > > > > > have no other way to zero irq level?! > > > > > > > > > > The reason is interrupt sharing: device can assert interrupt > > > > > but can not clear it by itself. > > > > For interrupt sharing to work you need to allocate source id for each > > > > irqfd. > > > > > > BTW, current SET_IRQ_LINE interface uses a common source id for all > > > irqs. Does this mean that interrupt sharing in guest is broken now? > > No. It means that sharing in handled in userspace by chipset emulation. > > Aha. Where's that code in qemu-kvm? I only see it forwarding to the AFAIR in hw/pci.c and/or hw/piix_pci.c. > ioctl call directly. And what about assigned devices? These bypass qemu. > Each assigned device allocates its own source id. This is the reason source id exists at all. -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin wrote: > Here's an untested patch with partial support for level triggered > interrupts in irqfd. What this patch has: support for clearing interrupt > on ack. What this patch does not have: support signalling eventfd on ack > so that userspace can take action and e.g. reenable interrupt. > > Gleb, Marcelo, I'd like your input on the approach taken wrt locking. > Does it look sane? > > Avi, how's the interface? I intend to also add an eventfd probably in > the padding in the irqfd struct. > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> Patch seems reasonable to me. Acked-by: Gregory Haskins <ghaskins@novell.com> > > --- > > diff --git a/include/linux/kvm.h b/include/linux/kvm.h > index 230a91a..8bf16af 100644 > --- a/include/linux/kvm.h > +++ b/include/linux/kvm.h > @@ -488,6 +488,7 @@ struct kvm_x86_mce { > #endif > > #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) > +#define KVM_IRQFD_FLAG_LEVEL (1 << 1) > > struct kvm_irqfd { > __u32 fd; > diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c > index 99017e8..fcbf5b5 100644 > --- a/virt/kvm/eventfd.c > +++ b/virt/kvm/eventfd.c > @@ -45,12 +45,14 @@ struct _irqfd { > struct kvm *kvm; > struct eventfd_ctx *eventfd; > int gsi; > + int is_level; > struct list_head list; > poll_table pt; > wait_queue_head_t *wqh; > wait_queue_t wait; > struct work_struct inject; > struct work_struct shutdown; > + struct kvm_irq_ack_notifier kian; > }; > > static struct workqueue_struct *irqfd_cleanup_wq; > @@ -63,10 +65,15 @@ irqfd_inject(struct work_struct *work) > > mutex_lock(&kvm->irq_lock); > kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); > - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); > + if (!irqfd->is_level) > + kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); > mutex_unlock(&kvm->irq_lock); > } > > +static void irqfd_irq_acked(struct kvm_irq_ack_notifier *kian) > +{ > + kvm_set_irq(kian->kvm, KVM_USERSPACE_IRQ_SOURCE_ID, kian->gsi, 0); > +} > /* > * Race-free decouple logic (ordering is critical) > */ > @@ -87,6 +94,9 @@ irqfd_shutdown(struct work_struct *work) > */ > flush_work(&irqfd->inject); > > + if (irqfd->is_level) > + kvm_unregister_irq_ack_notifier(&irqfd->kian); > + > /* > * It is now safe to release the object's resources > */ > @@ -166,7 +176,7 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, > } > > static int > -kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) > +kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi, int is_level) > { > struct _irqfd *irqfd; > struct file *file = NULL; > @@ -180,6 +190,7 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) > > irqfd->kvm = kvm; > irqfd->gsi = gsi; > + irqfd->is_level = is_level; > INIT_LIST_HEAD(&irqfd->list); > INIT_WORK(&irqfd->inject, irqfd_inject); > INIT_WORK(&irqfd->shutdown, irqfd_shutdown); > @@ -198,6 +209,12 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) > > irqfd->eventfd = eventfd; > > + if (is_level) { > + irqfd->kian.gsi = gsi; > + irqfd->kian.irq_acked = irqfd_irq_acked; > + kvm_register_irq_ack_notifier(&irqfd->kian); > + } > + > /* > * Install our own custom wake-up handling so we are notified via > * a callback whenever someone signals the underlying eventfd > @@ -281,10 +298,13 @@ kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi) > int > kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) > { > + if (flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_LEVEL)) > + return -EINVAL; > + > if (flags & KVM_IRQFD_FLAG_DEASSIGN) > return kvm_irqfd_deassign(kvm, fd, gsi); > > - return kvm_irqfd_assign(kvm, fd, gsi); > + return kvm_irqfd_assign(kvm, fd, gsi, !!(flags & KVM_IRQFD_FLAG_LEVEL)); > } > > /* > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html
Gregory Haskins wrote: > Michael S. Tsirkin wrote: >> Here's an untested patch with partial support for level triggered >> interrupts in irqfd. What this patch has: support for clearing interrupt >> on ack. What this patch does not have: support signalling eventfd on ack >> so that userspace can take action and e.g. reenable interrupt. >> >> Gleb, Marcelo, I'd like your input on the approach taken wrt locking. >> Does it look sane? >> >> Avi, how's the interface? I intend to also add an eventfd probably in >> the padding in the irqfd struct. >> >> Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > > Patch seems reasonable to me. > > Acked-by: Gregory Haskins <ghaskins@novell.com> Actually, one thing I didn't think of earlier is whether a CAP_IRQFD_LEVEL kind of feature is required. I would think not, since the baseline IRQFD hasnt been officially published yet. But its something to consider. -Greg > >> --- >> >> diff --git a/include/linux/kvm.h b/include/linux/kvm.h >> index 230a91a..8bf16af 100644 >> --- a/include/linux/kvm.h >> +++ b/include/linux/kvm.h >> @@ -488,6 +488,7 @@ struct kvm_x86_mce { >> #endif >> >> #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) >> +#define KVM_IRQFD_FLAG_LEVEL (1 << 1) >> >> struct kvm_irqfd { >> __u32 fd; >> diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c >> index 99017e8..fcbf5b5 100644 >> --- a/virt/kvm/eventfd.c >> +++ b/virt/kvm/eventfd.c >> @@ -45,12 +45,14 @@ struct _irqfd { >> struct kvm *kvm; >> struct eventfd_ctx *eventfd; >> int gsi; >> + int is_level; >> struct list_head list; >> poll_table pt; >> wait_queue_head_t *wqh; >> wait_queue_t wait; >> struct work_struct inject; >> struct work_struct shutdown; >> + struct kvm_irq_ack_notifier kian; >> }; >> >> static struct workqueue_struct *irqfd_cleanup_wq; >> @@ -63,10 +65,15 @@ irqfd_inject(struct work_struct *work) >> >> mutex_lock(&kvm->irq_lock); >> kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); >> - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); >> + if (!irqfd->is_level) >> + kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); >> mutex_unlock(&kvm->irq_lock); >> } >> >> +static void irqfd_irq_acked(struct kvm_irq_ack_notifier *kian) >> +{ >> + kvm_set_irq(kian->kvm, KVM_USERSPACE_IRQ_SOURCE_ID, kian->gsi, 0); >> +} >> /* >> * Race-free decouple logic (ordering is critical) >> */ >> @@ -87,6 +94,9 @@ irqfd_shutdown(struct work_struct *work) >> */ >> flush_work(&irqfd->inject); >> >> + if (irqfd->is_level) >> + kvm_unregister_irq_ack_notifier(&irqfd->kian); >> + >> /* >> * It is now safe to release the object's resources >> */ >> @@ -166,7 +176,7 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, >> } >> >> static int >> -kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) >> +kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi, int is_level) >> { >> struct _irqfd *irqfd; >> struct file *file = NULL; >> @@ -180,6 +190,7 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) >> >> irqfd->kvm = kvm; >> irqfd->gsi = gsi; >> + irqfd->is_level = is_level; >> INIT_LIST_HEAD(&irqfd->list); >> INIT_WORK(&irqfd->inject, irqfd_inject); >> INIT_WORK(&irqfd->shutdown, irqfd_shutdown); >> @@ -198,6 +209,12 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) >> >> irqfd->eventfd = eventfd; >> >> + if (is_level) { >> + irqfd->kian.gsi = gsi; >> + irqfd->kian.irq_acked = irqfd_irq_acked; >> + kvm_register_irq_ack_notifier(&irqfd->kian); >> + } >> + >> /* >> * Install our own custom wake-up handling so we are notified via >> * a callback whenever someone signals the underlying eventfd >> @@ -281,10 +298,13 @@ kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi) >> int >> kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) >> { >> + if (flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_LEVEL)) >> + return -EINVAL; >> + >> if (flags & KVM_IRQFD_FLAG_DEASSIGN) >> return kvm_irqfd_deassign(kvm, fd, gsi); >> >> - return kvm_irqfd_assign(kvm, fd, gsi); >> + return kvm_irqfd_assign(kvm, fd, gsi, !!(flags & KVM_IRQFD_FLAG_LEVEL)); >> } >> >> /* >> -- >> To unsubscribe from this list: send the line "unsubscribe kvm" in >> the body of a message to majordomo@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html > >
On 07/27/2009 08:33 AM, Gleb Natapov wrote: > On Sun, Jul 26, 2009 at 07:22:25PM +0300, Michael S. Tsirkin wrote: > >> Here's an untested patch with partial support for level triggered >> interrupts in irqfd. What this patch has: support for clearing interrupt >> on ack. What this patch does not have: support signalling eventfd on ack >> so that userspace can take action and e.g. reenable interrupt. >> >> > Do we want level triggered irqfd to be used only for device assignment? > If it works, it should work everywhere.
On 07/26/2009 07:22 PM, Michael S. Tsirkin wrote: > Here's an untested patch with partial support for level triggered > interrupts in irqfd. What this patch has: support for clearing interrupt > on ack. What this patch does not have: support signalling eventfd on ack > so that userspace can take action and e.g. reenable interrupt. > > Gleb, Marcelo, I'd like your input on the approach taken wrt locking. > Does it look sane? > > Avi, how's the interface? I intend to also add an eventfd probably in > the padding in the irqfd struct. > What about the state-capable eventfd? Seems much more natural for this. > > static struct workqueue_struct *irqfd_cleanup_wq; > @@ -63,10 +65,15 @@ irqfd_inject(struct work_struct *work) > > mutex_lock(&kvm->irq_lock); > kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); > - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); > + if (!irqfd->is_level) > + kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); > e.g. inject the eventfd->state as the irq level.
On Wed, Aug 19, 2009 at 03:20:21PM +0300, Avi Kivity wrote: > On 07/26/2009 07:22 PM, Michael S. Tsirkin wrote: >> Here's an untested patch with partial support for level triggered >> interrupts in irqfd. What this patch has: support for clearing interrupt >> on ack. What this patch does not have: support signalling eventfd on ack >> so that userspace can take action and e.g. reenable interrupt. >> >> Gleb, Marcelo, I'd like your input on the approach taken wrt locking. >> Does it look sane? >> >> Avi, how's the interface? I intend to also add an eventfd probably in >> the padding in the irqfd struct. >> > > What about the state-capable eventfd? This patch precedes that effort. > Seems much more natural for this. Good. Could you please review and ack the state-capable eventfd patch then, so I can try pushing it upstream? >> >> static struct workqueue_struct *irqfd_cleanup_wq; >> @@ -63,10 +65,15 @@ irqfd_inject(struct work_struct *work) >> >> mutex_lock(&kvm->irq_lock); >> kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); >> - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); >> + if (!irqfd->is_level) >> + kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); >> > > e.g. inject the eventfd->state as the irq level. > > -- > error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 230a91a..8bf16af 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -488,6 +488,7 @@ struct kvm_x86_mce { #endif #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) +#define KVM_IRQFD_FLAG_LEVEL (1 << 1) struct kvm_irqfd { __u32 fd; diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 99017e8..fcbf5b5 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -45,12 +45,14 @@ struct _irqfd { struct kvm *kvm; struct eventfd_ctx *eventfd; int gsi; + int is_level; struct list_head list; poll_table pt; wait_queue_head_t *wqh; wait_queue_t wait; struct work_struct inject; struct work_struct shutdown; + struct kvm_irq_ack_notifier kian; }; static struct workqueue_struct *irqfd_cleanup_wq; @@ -63,10 +65,15 @@ irqfd_inject(struct work_struct *work) mutex_lock(&kvm->irq_lock); kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); + if (!irqfd->is_level) + kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); mutex_unlock(&kvm->irq_lock); } +static void irqfd_irq_acked(struct kvm_irq_ack_notifier *kian) +{ + kvm_set_irq(kian->kvm, KVM_USERSPACE_IRQ_SOURCE_ID, kian->gsi, 0); +} /* * Race-free decouple logic (ordering is critical) */ @@ -87,6 +94,9 @@ irqfd_shutdown(struct work_struct *work) */ flush_work(&irqfd->inject); + if (irqfd->is_level) + kvm_unregister_irq_ack_notifier(&irqfd->kian); + /* * It is now safe to release the object's resources */ @@ -166,7 +176,7 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, } static int -kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) +kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi, int is_level) { struct _irqfd *irqfd; struct file *file = NULL; @@ -180,6 +190,7 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) irqfd->kvm = kvm; irqfd->gsi = gsi; + irqfd->is_level = is_level; INIT_LIST_HEAD(&irqfd->list); INIT_WORK(&irqfd->inject, irqfd_inject); INIT_WORK(&irqfd->shutdown, irqfd_shutdown); @@ -198,6 +209,12 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) irqfd->eventfd = eventfd; + if (is_level) { + irqfd->kian.gsi = gsi; + irqfd->kian.irq_acked = irqfd_irq_acked; + kvm_register_irq_ack_notifier(&irqfd->kian); + } + /* * Install our own custom wake-up handling so we are notified via * a callback whenever someone signals the underlying eventfd @@ -281,10 +298,13 @@ kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi) int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) { + if (flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_LEVEL)) + return -EINVAL; + if (flags & KVM_IRQFD_FLAG_DEASSIGN) return kvm_irqfd_deassign(kvm, fd, gsi); - return kvm_irqfd_assign(kvm, fd, gsi); + return kvm_irqfd_assign(kvm, fd, gsi, !!(flags & KVM_IRQFD_FLAG_LEVEL)); } /*
Here's an untested patch with partial support for level triggered interrupts in irqfd. What this patch has: support for clearing interrupt on ack. What this patch does not have: support signalling eventfd on ack so that userspace can take action and e.g. reenable interrupt. Gleb, Marcelo, I'd like your input on the approach taken wrt locking. Does it look sane? Avi, how's the interface? I intend to also add an eventfd probably in the padding in the irqfd struct. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> --- -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html