Message ID | 1231411535-2461-2-git-send-email-sheng@linux.intel.com (mailing list archive) |
---|---|
State | Accepted, archived |
Headers | show |
On Thu, Jan 08, 2009 at 06:45:29PM +0800, Sheng Yang wrote: > * ioctls for VM fds > @@ -433,6 +436,8 @@ struct kvm_trace_rec { > #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \ > struct kvm_assigned_irq) > #define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71) > +#define KVM_REQUEST_GSI_ROUTE _IOWR(KVMIO, 0x72, void *) > +#define KVM_FREE_GSI_ROUTE _IOR(KVMIO, 0x73, void *) Oh this slipped: should be struct kvm_gsi_route_guest. > /* > * ioctls for vcpu fds > @@ -553,4 +558,25 @@ struct kvm_assigned_irq { > #define KVM_DEV_IRQ_ASSIGN_MSI_ACTION KVM_DEV_IRQ_ASSIGN_ENABLE_MSI > #define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI (1 << 0) > > +struct kvm_gsi_route_guest { > + __u32 entries_nr; > + struct kvm_gsi_route_entry_guest *entries; > +}; And you can use a zero sized array here. Sorry :( -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Sheng Yang wrote: > Avi's purpose, to use single kvm_set_irq() to deal with all interrupt, including > MSI. So here is it. > > struct gsi_route_entry is a mapping from a special gsi(with KVM_GSI_MSG_MASK) to > MSI/MSI-X message address/data. And the struct can also be extended for other > purpose. > > Now we support up to 256 gsi_route_entry mapping, and gsi is allocated by kernel and > provide two ioctls to userspace, which is more flexiable. > > @@ -553,4 +558,25 @@ struct kvm_assigned_irq { > #define KVM_DEV_IRQ_ASSIGN_MSI_ACTION KVM_DEV_IRQ_ASSIGN_ENABLE_MSI > #define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI (1 << 0) > > +struct kvm_gsi_route_guest { > + __u32 entries_nr; > Need padding here otherwise offsetof(entries) will differ on 32-bit and 64-bit kernels. > + struct kvm_gsi_route_entry_guest *entries; > Like Marcelo says, zero sized array is better here. > +}; > + > +#define KVM_GSI_ROUTE_MSI (1 << 0) > This looks like a flag. Shouldn't it be a type? > +struct kvm_gsi_route_entry_guest { > what does _guest mean here? almost all kvm stuff is _guest related. > + __u32 gsi; > + __u32 type; > + __u32 flags; > + __u32 reserved; > + union { > + struct { > + __u32 addr_lo; > + __u32 addr_hi; > + __u32 data; > + } msi; > + __u32 padding[8]; > + }; > +}; > + > Since we replace the entire table every time, how do ioapic/pic gsis work? > > /* The guest did something we don't support. */ > @@ -336,6 +339,19 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, > struct kvm_irq_mask_notifier *kimn); > void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask); > > +#define KVM_GSI_ROUTE_MASK 0x1000000ull > +struct kvm_gsi_route_entry { > + u32 gsi; > + u32 type; > + u32 flags; > + u32 reserved; > + union { > + struct msi_msg msi; > + u32 reserved[8]; > No need for reserved fields in kernel data. > + }; > + struct hlist_node link; > +}; > @@ -123,3 +123,73 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) > kimn->func(kimn, mask); > } > > +int kvm_update_gsi_route(struct kvm *kvm, struct kvm_gsi_route_entry *entry) > +{ > + struct kvm_gsi_route_entry *found_entry, *new_entry; > + int r, gsi; > + > + mutex_lock(&kvm->lock); > + /* Find whether we need a update or a new entry */ > + found_entry = kvm_find_gsi_route_entry(kvm, entry->gsi); > + if (found_entry) > + *found_entry = *entry; > + else { > + gsi = find_first_zero_bit(kvm->gsi_route_bitmap, > + KVM_NR_GSI_ROUTE_ENTRIES); > + if (gsi >= KVM_NR_GSI_ROUTE_ENTRIES) { > + r = -ENOSPC; > + goto out; > + } > + new_entry = kzalloc(sizeof(*new_entry), GFP_KERNEL); > + if (!new_entry) { > + r = -ENOMEM; > + goto out; > + } > + *new_entry = *entry; > + entry->gsi = gsi | KVM_GSI_ROUTE_MASK; > + __set_bit(gsi, kvm->gsi_route_bitmap); > + hlist_add_head(&new_entry->link, &kvm->gsi_route_list); > + } > + r = 0; > +out: > + mutex_unlock(&kvm->lock); > + return r; > +} > Why not throw everything and set the new table? I didn't see where you respond the new KVM_CAP. It looks like a good place to return the maximum size of the table.
On Thursday 08 January 2009 22:20:22 Marcelo Tosatti wrote: > On Thu, Jan 08, 2009 at 06:45:29PM +0800, Sheng Yang wrote: > > * ioctls for VM fds > > @@ -433,6 +436,8 @@ struct kvm_trace_rec { > > #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \ > > struct kvm_assigned_irq) > > #define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71) > > +#define KVM_REQUEST_GSI_ROUTE _IOWR(KVMIO, 0x72, void *) > > +#define KVM_FREE_GSI_ROUTE _IOR(KVMIO, 0x73, void *) > > Oh this slipped: should be struct kvm_gsi_route_guest. Oh, yeah, forgot to change it back(I original purpose a array here...) > > > /* > > * ioctls for vcpu fds > > @@ -553,4 +558,25 @@ struct kvm_assigned_irq { > > #define KVM_DEV_IRQ_ASSIGN_MSI_ACTION KVM_DEV_IRQ_ASSIGN_ENABLE_MSI > > #define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI (1 << 0) > > > > +struct kvm_gsi_route_guest { > > + __u32 entries_nr; > > + struct kvm_gsi_route_entry_guest *entries; > > +}; > > And you can use a zero sized array here. OK. > > Sorry :( Oh, that's not necessary. :)
On Thursday 08 January 2009 23:08:25 Avi Kivity wrote: > Sheng Yang wrote: > > Avi's purpose, to use single kvm_set_irq() to deal with all interrupt, > > including MSI. So here is it. > > > > struct gsi_route_entry is a mapping from a special gsi(with > > KVM_GSI_MSG_MASK) to MSI/MSI-X message address/data. And the struct can > > also be extended for other purpose. > > > > Now we support up to 256 gsi_route_entry mapping, and gsi is allocated by > > kernel and provide two ioctls to userspace, which is more flexiable. > > > > @@ -553,4 +558,25 @@ struct kvm_assigned_irq { > > #define KVM_DEV_IRQ_ASSIGN_MSI_ACTION KVM_DEV_IRQ_ASSIGN_ENABLE_MSI > > #define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI (1 << 0) > > > > +struct kvm_gsi_route_guest { > > + __u32 entries_nr; > > Need padding here otherwise offsetof(entries) will differ on 32-bit and > 64-bit kernels. OK. > > > + struct kvm_gsi_route_entry_guest *entries; > > Like Marcelo says, zero sized array is better here. > > > +}; > > + > > +#define KVM_GSI_ROUTE_MSI (1 << 0) > > This looks like a flag. Shouldn't it be a type? Oh, custom... Would update. > > +struct kvm_gsi_route_entry_guest { > > what does _guest mean here? almost all kvm stuff is _guest related. Because I can't think of a good name... kvm_gsi_route_entry_guest? kvm_gsi_kernel_route_entry? What's your favorite? :) > > + __u32 gsi; > > + __u32 type; > > + __u32 flags; > > + __u32 reserved; > > + union { > > + struct { > > + __u32 addr_lo; > > + __u32 addr_hi; > > + __u32 data; > > + } msi; > > + __u32 padding[8]; > > + }; > > +}; > > + > > Since we replace the entire table every time, how do ioapic/pic gsis work? > > /* The guest did something we don't support. */ > > @@ -336,6 +339,19 @@ void kvm_unregister_irq_mask_notifier(struct kvm > > *kvm, int irq, struct kvm_irq_mask_notifier *kimn); > > void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask); > > > > +#define KVM_GSI_ROUTE_MASK 0x1000000ull > > +struct kvm_gsi_route_entry { > > + u32 gsi; > > + u32 type; > > + u32 flags; > > + u32 reserved; > > + union { > > + struct msi_msg msi; > > + u32 reserved[8]; > > No need for reserved fields in kernel data. Yeah > > + }; > > + struct hlist_node link; > > +}; > > @@ -123,3 +123,73 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int > > irq, bool mask) kimn->func(kimn, mask); > > } > > > > +int kvm_update_gsi_route(struct kvm *kvm, struct kvm_gsi_route_entry > > *entry) +{ > > + struct kvm_gsi_route_entry *found_entry, *new_entry; > > + int r, gsi; > > + > > + mutex_lock(&kvm->lock); > > + /* Find whether we need a update or a new entry */ > > + found_entry = kvm_find_gsi_route_entry(kvm, entry->gsi); > > + if (found_entry) > > + *found_entry = *entry; > > + else { > > + gsi = find_first_zero_bit(kvm->gsi_route_bitmap, > > + KVM_NR_GSI_ROUTE_ENTRIES); > > + if (gsi >= KVM_NR_GSI_ROUTE_ENTRIES) { > > + r = -ENOSPC; > > + goto out; > > + } > > + new_entry = kzalloc(sizeof(*new_entry), GFP_KERNEL); > > + if (!new_entry) { > > + r = -ENOMEM; > > + goto out; > > + } > > + *new_entry = *entry; > > + entry->gsi = gsi | KVM_GSI_ROUTE_MASK; > > + __set_bit(gsi, kvm->gsi_route_bitmap); > > + hlist_add_head(&new_entry->link, &kvm->gsi_route_list); > > + } > > + r = 0; > > +out: > > + mutex_unlock(&kvm->lock); > > + return r; > > +} > > Why not throw everything and set the new table? Userspace to maintain a big route table? Just for MSI/MSI-X it's easy, but for others, a global one is needed, and need follow up more maintain functions. For kernel, a little more effect can archive this, like this. So I do it in this way. > I didn't see where you respond the new KVM_CAP. It looks like a good > place to return the maximum size of the table. I just use it as #ifdef in userspace now, for no user other than MSI/MSI-X now. And if we keep maintaining it in kernel, we would return free size instead of maximum size..
Sheng Yang wrote: >>> +struct kvm_gsi_route_entry_guest { >>> >> what does _guest mean here? almost all kvm stuff is _guest related. >> > > Because I can't think of a good name... kvm_gsi_route_entry_guest? > kvm_gsi_kernel_route_entry? What's your favorite? :) > > kvm_gsi_route_entry? >> >> Since we replace the entire table every time, how do ioapic/pic gsis work? >> Missed this question... >> Why not throw everything and set the new table? >> > > Userspace to maintain a big route table? Just for MSI/MSI-X it's easy, but for > others, a global one is needed, and need follow up more maintain functions. > For kernel, a little more effect can archive this, like this. So I do it in > this way. > Sorry, I don't understand the reply. >> I didn't see where you respond the new KVM_CAP. It looks like a good >> place to return the maximum size of the table. >> > > I just use it as #ifdef in userspace now, for no user other than MSI/MSI-X > now. And if we keep maintaining it in kernel, we would return free size > instead of maximum size.. > We need to allow userspace to change pic/ioapic routing for the HPET. There are two styles of maintaining the table: 1. add/remove ioctls The advantage is that very little work needs to be done when something changes, but the code size (and bug count) doubles. 2. replace everything ioctl Smaller code size, but slower if there are high frequency changes I don't think we'll see high frequency interrupt routing changes; we'll probably have one on setup (for HPET), another when switching from ACPI PIC mode to ACPI APIC mode, and one for every msi initialized.
On Fri, Jan 09, 2009 at 08:06:01PM +0200, Avi Kivity wrote: > Sheng Yang wrote: >>>> +struct kvm_gsi_route_entry_guest { >>>> >>> what does _guest mean here? almost all kvm stuff is _guest related. >>> >> >> Because I can't think of a good name... kvm_gsi_route_entry_guest? >> kvm_gsi_kernel_route_entry? What's your favorite? :) >> >> > > kvm_gsi_route_entry? Which is used for kernel one now... > >>> >>> Since we replace the entire table every time, how do ioapic/pic gsis work? >>> > > Missed this question... My comments below... > >>> Why not throw everything and set the new table? >>> >> >> Userspace to maintain a big route table? Just for MSI/MSI-X it's easy, >> but for others, a global one is needed, and need follow up more >> maintain functions. For kernel, a little more effect can archive this, >> like this. So I do it in this way. >> > > Sorry, I don't understand the reply. > >>> I didn't see where you respond the new KVM_CAP. It looks like a good >>> place to return the maximum size of the table. >>> >> >> I just use it as #ifdef in userspace now, for no user other than >> MSI/MSI-X now. And if we keep maintaining it in kernel, we would return >> free size instead of maximum size.. >> > > We need to allow userspace to change pic/ioapic routing for the HPET. > > There are two styles of maintaining the table: > > 1. add/remove ioctls > > The advantage is that very little work needs to be done when something > changes, but the code size (and bug count) doubles. > > 2. replace everything ioctl > > Smaller code size, but slower if there are high frequency changes > > I don't think we'll see high frequency interrupt routing changes; we'll > probably have one on setup (for HPET), another when switching from ACPI > PIC mode to ACPI APIC mode, and one for every msi initialized. I come to option 2 mix with option 1 now. What I meant is, MSI part is in device-assignment part, and HPET and others are in some other place, so a global table should be maintained for these three across the parts of userspace. I don't like the global gsi route table, and especially we already got one in kernel space. We have to do some maintain work in the kernel space, e.g. looking up a entry, so I think a little add-on can take the job. And now I think you original purpose is three different tables for MSI, HPET and ACPI APIC mode? This also avoid global big table in userspace. But it seems like a waste, and also too specific... So how about this? One ioctl to replace everything, another(maybe two, transfer entry number then table, or one with maximum entries number in userspace) can export current gsi route table? This can avoid the global route table, as well as reduce the complexity. How do you think?
On Fri, Jan 09, 2009 at 08:06:01PM +0200, Avi Kivity wrote: > Sheng Yang wrote: >> I just use it as #ifdef in userspace now, for no user other than >> MSI/MSI-X now. And if we keep maintaining it in kernel, we would return >> free size instead of maximum size.. >> > > We need to allow userspace to change pic/ioapic routing for the HPET. > > There are two styles of maintaining the table: > > 1. add/remove ioctls > > The advantage is that very little work needs to be done when something > changes, but the code size (and bug count) doubles. > > 2. replace everything ioctl > > Smaller code size, but slower if there are high frequency changes > > I don't think we'll see high frequency interrupt routing changes; we'll > probably have one on setup (for HPET), another when switching from ACPI > PIC mode to ACPI APIC mode, and one for every msi initialized. Hi, Avi After reconsidering, I must say I prefer add/remove ioctls. About the code size, I don't think it would increase much. I've rewritten the code twice, I think I know the difference is little. For the option 2 route table ioctl, we got a array from userspace, and would convert it to linked list and keep it in kernel. That's a kind of must(I don't think you would prefer use a array in kernel), and it's very direct. So, we have to insert/delete route entry for both. What's the real difference we do it one by one or do it all at once. I don't think it is much different on the code size. And it's indeed very clear and direct. Beside this, option 2 seems strange. Why should we handle this table in this way when it won't result in significant code reduce. Insert/delete entry it there, look up entry is also there, not many things changed. And it's not that direct as option 1, which also can be a source of bugs. How do you think?
Sheng Yang wrote: > On Fri, Jan 09, 2009 at 08:06:01PM +0200, Avi Kivity wrote: > >> Sheng Yang wrote: >> >>>>> +struct kvm_gsi_route_entry_guest { >>>>> >>>>> >>>> what does _guest mean here? almost all kvm stuff is _guest related. >>>> >>>> >>> Because I can't think of a good name... kvm_gsi_route_entry_guest? >>> kvm_gsi_kernel_route_entry? What's your favorite? :) >>> >>> >>> >> kvm_gsi_route_entry? >> > > Which is used for kernel one now... > So add _kernel to that... It's more important to keep the userspace interface clean. >> We need to allow userspace to change pic/ioapic routing for the HPET. >> >> There are two styles of maintaining the table: >> >> 1. add/remove ioctls >> >> The advantage is that very little work needs to be done when something >> changes, but the code size (and bug count) doubles. >> >> 2. replace everything ioctl >> >> Smaller code size, but slower if there are high frequency changes >> >> I don't think we'll see high frequency interrupt routing changes; we'll >> probably have one on setup (for HPET), another when switching from ACPI >> PIC mode to ACPI APIC mode, and one for every msi initialized. >> > > I come to option 2 mix with option 1 now. What I meant is, MSI part is in > device-assignment part, and HPET and others are in some other place, so a > global table should be maintained for these three across the parts of > userspace. I don't like the global gsi route table, and especially we > already got one in kernel space. We have to do some maintain work in the > kernel space, e.g. looking up a entry, so I think a little add-on can take > the job. > > And now I think you original purpose is three different tables for MSI, HPET > and ACPI APIC mode? This also avoid global big table in userspace. But it > seems like a waste, and also too specific... > > So how about this? One ioctl to replace everything, another(maybe two, > transfer entry number then table, or one with maximum entries number in > userspace) can export current gsi route table? This can avoid the global > route table, as well as reduce the complexity. > I still don't understand. We already have all of the information needed in userspace. We know whether HPET is in use or not, whether PIC mode or APIC mode is enabled, and what MSIs are enabled. We need all this at least for live migration. In the kernel, we'll need a function to free the table (for VM shutdown), so adding a function to populate a new table seems to be the least amount of work. But I think we're miscommunicating, maybe I'm misunderstanding what you're suggesting.
Sheng Yang wrote: > After reconsidering, I must say I prefer add/remove ioctls. > > About the code size, I don't think it would increase much. I've rewritten > the code twice, I think I know the difference is little. > :( sorry about that. > For the option 2 route table ioctl, we got a array from userspace, and would > convert it to linked list and keep it in kernel. That's a kind of must(I > don't think you would prefer use a array in kernel), and it's very direct. > Actually, eventually we'd want an array indexed by gsi. Each element would be a pointer to another array (one or two routing entries). Certainly we don't want to iterate a list which could hold several hundred interrupts for a large guest. It's okay to start with a linked list, but eventually we'll want something faster. > So, we have to insert/delete route entry for both. What's the real > difference we do it one by one or do it all at once. I don't think it is > much different on the code size. And it's indeed very clear and direct. > > Beside this, option 2 seems strange. Why should we handle this table in this > way when it won't result in significant code reduce. Insert/delete entry it > there, look up entry is also there, not many things changed. And it's not > that direct as option 1, which also can be a source of bugs. > > How do you think? > I'm not convinced. Please post your latest, and I will post a counter-proposal.
Avi Kivity wrote: > > 1. add/remove ioctls > > The advantage is that very little work needs to be done when something > changes, but the code size (and bug count) doubles. > One disadvantage of add/remove is that we cannot effect a change atomically. Probably not a big deal, but something to keep in mind.
On Sunday 11 January 2009 17:38:22 Avi Kivity wrote: > Sheng Yang wrote: > > After reconsidering, I must say I prefer add/remove ioctls. > > > > About the code size, I don't think it would increase much. I've rewritten > > the code twice, I think I know the difference is little. > > > :( sorry about that. > : > > For the option 2 route table ioctl, we got a array from userspace, and > > would convert it to linked list and keep it in kernel. That's a kind of > > must(I don't think you would prefer use a array in kernel), and it's very > > direct. > > Actually, eventually we'd want an array indexed by gsi. Each element > would be a pointer to another array (one or two routing entries). > > Certainly we don't want to iterate a list which could hold several > hundred interrupts for a large guest. > > It's okay to start with a linked list, but eventually we'll want > something faster. Oh, I see. What I means here is allocate/deallocate would cause some memory fragments, but seems not that critical here. > > > So, we have to insert/delete route entry for both. What's the real > > difference we do it one by one or do it all at once. I don't think it is > > much different on the code size. And it's indeed very clear and direct. > > > > Beside this, option 2 seems strange. Why should we handle this table in > > this way when it won't result in significant code reduce. Insert/delete > > entry it there, look up entry is also there, not many things changed. And > > it's not that direct as option 1, which also can be a source of bugs. > > > > How do you think? > > I'm not convinced. Please post your latest, and I will post a > counter-proposal. OK.
diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 71c150f..bbefce6 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -399,6 +399,9 @@ struct kvm_trace_rec { #if defined(CONFIG_X86) #define KVM_CAP_REINJECT_CONTROL 24 #endif +#if defined(CONFIG_X86) +#define KVM_CAP_GSI_ROUTE 25 +#endif /* * ioctls for VM fds @@ -433,6 +436,8 @@ struct kvm_trace_rec { #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \ struct kvm_assigned_irq) #define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71) +#define KVM_REQUEST_GSI_ROUTE _IOWR(KVMIO, 0x72, void *) +#define KVM_FREE_GSI_ROUTE _IOR(KVMIO, 0x73, void *) /* * ioctls for vcpu fds @@ -553,4 +558,25 @@ struct kvm_assigned_irq { #define KVM_DEV_IRQ_ASSIGN_MSI_ACTION KVM_DEV_IRQ_ASSIGN_ENABLE_MSI #define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI (1 << 0) +struct kvm_gsi_route_guest { + __u32 entries_nr; + struct kvm_gsi_route_entry_guest *entries; +}; + +#define KVM_GSI_ROUTE_MSI (1 << 0) +struct kvm_gsi_route_entry_guest { + __u32 gsi; + __u32 type; + __u32 flags; + __u32 reserved; + union { + struct { + __u32 addr_lo; + __u32 addr_hi; + __u32 data; + } msi; + __u32 padding[8]; + }; +}; + #endif diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a8bcad0..6a00201 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -136,6 +136,9 @@ struct kvm { unsigned long mmu_notifier_seq; long mmu_notifier_count; #endif + struct hlist_head gsi_route_list; +#define KVM_NR_GSI_ROUTE_ENTRIES 256 + DECLARE_BITMAP(gsi_route_bitmap, KVM_NR_GSI_ROUTE_ENTRIES); }; /* The guest did something we don't support. */ @@ -336,6 +339,19 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, struct kvm_irq_mask_notifier *kimn); void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask); +#define KVM_GSI_ROUTE_MASK 0x1000000ull +struct kvm_gsi_route_entry { + u32 gsi; + u32 type; + u32 flags; + u32 reserved; + union { + struct msi_msg msi; + u32 reserved[8]; + }; + struct hlist_node link; +}; + void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level); void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi); void kvm_register_irq_ack_notifier(struct kvm *kvm, @@ -343,6 +359,10 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm, void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian); int kvm_request_irq_source_id(struct kvm *kvm); void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); +int kvm_update_gsi_route(struct kvm *kvm, struct kvm_gsi_route_entry *entry); +struct kvm_gsi_route_entry *kvm_find_gsi_route_entry(struct kvm *kvm, u32 gsi); +void kvm_free_gsi_route(struct kvm *kvm, struct kvm_gsi_route_entry *entry); +void kvm_free_gsi_route_list(struct kvm *kvm); #ifdef CONFIG_DMAR int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 5162a41..64ca4cf 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -123,3 +123,73 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) kimn->func(kimn, mask); } +int kvm_update_gsi_route(struct kvm *kvm, struct kvm_gsi_route_entry *entry) +{ + struct kvm_gsi_route_entry *found_entry, *new_entry; + int r, gsi; + + mutex_lock(&kvm->lock); + /* Find whether we need a update or a new entry */ + found_entry = kvm_find_gsi_route_entry(kvm, entry->gsi); + if (found_entry) + *found_entry = *entry; + else { + gsi = find_first_zero_bit(kvm->gsi_route_bitmap, + KVM_NR_GSI_ROUTE_ENTRIES); + if (gsi >= KVM_NR_GSI_ROUTE_ENTRIES) { + r = -ENOSPC; + goto out; + } + new_entry = kzalloc(sizeof(*new_entry), GFP_KERNEL); + if (!new_entry) { + r = -ENOMEM; + goto out; + } + *new_entry = *entry; + entry->gsi = gsi | KVM_GSI_ROUTE_MASK; + __set_bit(gsi, kvm->gsi_route_bitmap); + hlist_add_head(&new_entry->link, &kvm->gsi_route_list); + } + r = 0; +out: + mutex_unlock(&kvm->lock); + return r; +} + +/* Call with kvm->lock hold */ +struct kvm_gsi_route_entry *kvm_find_gsi_route_entry(struct kvm *kvm, u32 gsi) +{ + struct kvm_gsi_route_entry *entry; + struct hlist_node *n; + + if (!(gsi & KVM_GSI_ROUTE_MASK)) + return NULL; + hlist_for_each_entry(entry, n, &kvm->gsi_route_list, link) + if (entry->gsi == gsi) + goto out; + entry = NULL; +out: + return entry; +} + +/* Call with kvm->lock hold */ +void kvm_free_gsi_route(struct kvm *kvm, struct kvm_gsi_route_entry *entry) +{ + if (!entry) + return; + __clear_bit(entry->gsi & ~KVM_GSI_ROUTE_MASK, kvm->gsi_route_bitmap); + hlist_del(&entry->link); + kfree(entry); +} + +void kvm_free_gsi_route_list(struct kvm *kvm) +{ + struct kvm_gsi_route_entry *entry; + struct hlist_node *pos, *n; + + mutex_lock(&kvm->lock); + hlist_for_each_entry_safe(entry, pos, n, &kvm->gsi_route_list, link) + kvm_free_gsi_route(kvm, entry); + mutex_unlock(&kvm->lock); +} + diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 61688a6..8aa939c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -839,6 +839,7 @@ static struct kvm *kvm_create_vm(void) #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET kvm_coalesced_mmio_init(kvm); #endif + INIT_HLIST_HEAD(&kvm->gsi_route_list); out: return kvm; } @@ -877,6 +878,7 @@ static void kvm_destroy_vm(struct kvm *kvm) struct mm_struct *mm = kvm->mm; kvm_arch_sync_events(kvm); + kvm_free_gsi_route_list(kvm); spin_lock(&kvm_lock); list_del(&kvm->vm_list); spin_unlock(&kvm_lock); @@ -1605,6 +1607,45 @@ static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset) return 0; } +static int kvm_vm_ioctl_request_gsi_route(struct kvm *kvm, + struct kvm_gsi_route_guest *gsi_route, + struct kvm_gsi_route_entry_guest *guest_entries) +{ + struct kvm_gsi_route_entry entry; + int r, i; + + for (i = 0; i < gsi_route->entries_nr; i++) { + memcpy(&entry, &guest_entries[i], sizeof(entry)); + r = kvm_update_gsi_route(kvm, &entry); + if (r == 0) + guest_entries[i].gsi = entry.gsi; + else + break; + } + return r; +} + +static int kvm_vm_ioctl_free_gsi_route(struct kvm *kvm, + struct kvm_gsi_route_guest *gsi_route, + struct kvm_gsi_route_entry_guest *guest_entries) +{ + struct kvm_gsi_route_entry *entry; + int r, i; + + mutex_lock(&kvm->lock); + for (i = 0; i < gsi_route->entries_nr; i++) { + entry = kvm_find_gsi_route_entry(kvm, guest_entries[i].gsi); + if (!entry) { + r = -EINVAL; + goto out; + } + kvm_free_gsi_route(kvm, entry); + } +out: + mutex_unlock(&kvm->lock); + return r; +} + static long kvm_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -1803,6 +1844,7 @@ static long kvm_vm_ioctl(struct file *filp, { struct kvm *kvm = filp->private_data; void __user *argp = (void __user *)arg; + struct kvm_gsi_route_entry_guest *gsi_entries = NULL; int r; if (kvm->mm != current->mm) @@ -1887,10 +1929,73 @@ static long kvm_vm_ioctl(struct file *filp, break; } #endif + case KVM_REQUEST_GSI_ROUTE: { + struct kvm_gsi_route_guest gsi_route; + r = -EFAULT; + if (copy_from_user(&gsi_route, argp, sizeof gsi_route)) + goto out; + if (gsi_route.entries_nr == 0 || + gsi_route.entries_nr >= KVM_NR_GSI_ROUTE_ENTRIES) { + r = -EFAULT; + goto out; + } + gsi_entries = vmalloc(gsi_route.entries_nr * + sizeof(struct kvm_gsi_route_entry_guest)); + if (!gsi_entries) { + r = -ENOMEM; + goto out; + } + r = -EFAULT; + if (copy_from_user(gsi_entries, + (void __user *)gsi_route.entries, + gsi_route.entries_nr * + sizeof(struct kvm_gsi_route_entry_guest))) + goto out; + r = kvm_vm_ioctl_request_gsi_route(kvm, &gsi_route, + gsi_entries); + if (r) + goto out; + r = -EFAULT; + if (copy_to_user((void __user *)gsi_route.entries, + gsi_entries, + gsi_route.entries_nr * + sizeof(struct kvm_gsi_route_entry_guest))) + goto out; + r = 0; + break; + } + case KVM_FREE_GSI_ROUTE: { + struct kvm_gsi_route_guest gsi_route; + r = -EFAULT; + if (copy_from_user(&gsi_route, argp, sizeof gsi_route)) + goto out; + if (gsi_route.entries_nr == 0 || + gsi_route.entries_nr >= KVM_NR_GSI_ROUTE_ENTRIES) { + r = -EFAULT; + goto out; + } + gsi_entries = vmalloc(gsi_route.entries_nr * + sizeof(struct kvm_gsi_route_entry_guest)); + if (!gsi_entries) { + r = -ENOMEM; + goto out; + } + r = -EFAULT; + if (copy_from_user(gsi_entries, + (void __user *)gsi_route.entries, + gsi_route.entries_nr * + sizeof(struct kvm_gsi_route_entry_guest))) + goto out; + r = kvm_vm_ioctl_free_gsi_route(kvm, &gsi_route, gsi_entries); + if (r) + goto out; + break; + } default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); } out: + vfree(gsi_entries); return r; }
Avi's purpose, to use single kvm_set_irq() to deal with all interrupt, including MSI. So here is it. struct gsi_route_entry is a mapping from a special gsi(with KVM_GSI_MSG_MASK) to MSI/MSI-X message address/data. And the struct can also be extended for other purpose. Now we support up to 256 gsi_route_entry mapping, and gsi is allocated by kernel and provide two ioctls to userspace, which is more flexiable. Signed-off-by: Sheng Yang <sheng@linux.intel.com> --- include/linux/kvm.h | 26 +++++++++++ include/linux/kvm_host.h | 20 +++++++++ virt/kvm/irq_comm.c | 70 ++++++++++++++++++++++++++++++ virt/kvm/kvm_main.c | 105 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 221 insertions(+), 0 deletions(-)