Message ID | 1304735660-10844-1-git-send-email-asias.hejun@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
* Asias He <asias.hejun@gmail.com> wrote: > Inject IRQ to guest only when ISR status is low which means > guest has read ISR status and device has cleared this bit as > the side effect of this reading. > > This reduces a lot of unnecessary IRQ inject from device to > guest. > > Netpef test shows this patch changes: > > the host to guest bandwidth > from 2866.27 Mbps (cpu 33.96%) to 5548.87 Mbps (cpu 53.87%), > > the guest to host bandwitdth > form 1408.86 Mbps (cpu 99.9%) to 1301.29 Mbps (cpu 99.9%). > > The bottleneck of the guest to host bandwidth is guest cpu power. > > Signed-off-by: Asias He <asias.hejun@gmail.com> > --- > tools/kvm/include/kvm/virtio.h | 5 +++++ > tools/kvm/virtio/core.c | 8 ++++++++ > tools/kvm/virtio/net.c | 12 ++++++++---- > 3 files changed, 21 insertions(+), 4 deletions(-) > > diff --git a/tools/kvm/include/kvm/virtio.h b/tools/kvm/include/kvm/virtio.h > index e8df8eb..7f92dea 100644 > --- a/tools/kvm/include/kvm/virtio.h > +++ b/tools/kvm/include/kvm/virtio.h > @@ -8,6 +8,9 @@ > > #include "kvm/kvm.h" > > +#define VIRTIO_IRQ_LOW 0 > +#define VIRTIO_IRQ_HIGH 1 > + > struct virt_queue { > struct vring vring; > u32 pfn; > @@ -37,4 +40,6 @@ struct vring_used_elem *virt_queue__set_used_elem(struct virt_queue *queue, u32 > > u16 virt_queue__get_iov(struct virt_queue *queue, struct iovec iov[], u16 *out, u16 *in, struct kvm *kvm); > > +void virt_queue__trigger_irq(struct virt_queue *vq, int irq, u8 *isr, struct kvm *kvm); > + > #endif /* KVM__VIRTIO_H */ > diff --git a/tools/kvm/virtio/core.c b/tools/kvm/virtio/core.c > index 18d2c41..0734984 100644 > --- a/tools/kvm/virtio/core.c > +++ b/tools/kvm/virtio/core.c > @@ -57,3 +57,11 @@ u16 virt_queue__get_iov(struct virt_queue *queue, struct iovec iov[], u16 *out, > > return head; > } > + > +void virt_queue__trigger_irq(struct virt_queue *vq, int irq, u8 *isr, struct kvm *kvm) > +{ > + if (*isr == VIRTIO_IRQ_LOW) { > + *isr = VIRTIO_IRQ_HIGH; > + kvm__irq_line(kvm, irq, VIRTIO_IRQ_HIGH); > + } > +} > diff --git a/tools/kvm/virtio/net.c b/tools/kvm/virtio/net.c > index df69ab3..0189f7d 100644 > --- a/tools/kvm/virtio/net.c > +++ b/tools/kvm/virtio/net.c > @@ -35,6 +35,7 @@ struct net_device { > u32 guest_features; > u16 config_vector; > u8 status; > + u8 isr; > u16 queue_selector; > > pthread_t io_rx_thread; > @@ -88,8 +89,9 @@ static void *virtio_net_rx_thread(void *p) > head = virt_queue__get_iov(vq, iov, &out, &in, self); > len = readv(net_device.tap_fd, iov, in); > virt_queue__set_used_elem(vq, head, len); > + > /* We should interrupt guest right now, otherwise latency is huge. */ > - kvm__irq_line(self, VIRTIO_NET_IRQ, 1); > + virt_queue__trigger_irq(vq, VIRTIO_NET_IRQ, &net_device.isr, self); > } > > } > @@ -123,7 +125,8 @@ static void *virtio_net_tx_thread(void *p) > virt_queue__set_used_elem(vq, head, len); > } > > - kvm__irq_line(self, VIRTIO_NET_IRQ, 1); > + virt_queue__trigger_irq(vq, VIRTIO_NET_IRQ, &net_device.isr, self); > + > } > > pthread_exit(NULL); > @@ -175,8 +178,9 @@ static bool virtio_net_pci_io_in(struct kvm *self, u16 port, void *data, int siz > ioport__write8(data, net_device.status); > break; > case VIRTIO_PCI_ISR: > - ioport__write8(data, 0x1); > - kvm__irq_line(self, VIRTIO_NET_IRQ, 0); > + ioport__write8(data, net_device.isr); > + kvm__irq_line(self, VIRTIO_NET_IRQ, VIRTIO_IRQ_LOW); > + net_device.isr = VIRTIO_IRQ_LOW; > break; > case VIRTIO_MSI_CONFIG_VECTOR: > ioport__write16(data, net_device.config_vector); Hm, the ISR flag seems to be an explicit IRQ-ack mechanism, not just an optimization. Perhaps if the guest kernel side virtio driver expects us to do honor these acks and not inject double irqs when the virtio driver does not expect them? There's this code in drivers/virtio/virtio_pci.c: /* reading the ISR has the effect of also clearing it so it's very * important to save off the value. */ isr = ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR); Which seems to suggest that this ISR flag is more important than just a performance hint. Pekka: was this the patch perhaps that fixed the ping latency problem for you? Could any virtio gents on Cc: please confirm/deny this theory? :-) The original problem was that the virtio-net driver in tools/kvm/virtio/net.c was producing unexplained latencies (long ping latencies) under certain circumstances. Sometimes it triggered spontaneously, sometimes it needed a ping -f flood to trigger. The root cause of that race is still not understood. Thanks, Ingo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sat, 2011-05-07 at 11:30 +0200, Ingo Molnar wrote: > * Asias He <asias.hejun@gmail.com> wrote: > > > Inject IRQ to guest only when ISR status is low which means > > guest has read ISR status and device has cleared this bit as > > the side effect of this reading. > > > > This reduces a lot of unnecessary IRQ inject from device to > > guest. > > > > Netpef test shows this patch changes: > > > > the host to guest bandwidth > > from 2866.27 Mbps (cpu 33.96%) to 5548.87 Mbps (cpu 53.87%), > > > > the guest to host bandwitdth > > form 1408.86 Mbps (cpu 99.9%) to 1301.29 Mbps (cpu 99.9%). > > > > The bottleneck of the guest to host bandwidth is guest cpu power. > > > > Signed-off-by: Asias He <asias.hejun@gmail.com> > > --- > Hm, the ISR flag seems to be an explicit IRQ-ack mechanism, not just an > optimization. > > Perhaps if the guest kernel side virtio driver expects us to do honor these > acks and not inject double irqs when the virtio driver does not expect them? > > There's this code in drivers/virtio/virtio_pci.c: > > /* reading the ISR has the effect of also clearing it so it's very > * important to save off the value. */ > isr = ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR); > > Which seems to suggest that this ISR flag is more important than just a > performance hint. > > Pekka: was this the patch perhaps that fixed the ping latency problem for you? > > Could any virtio gents on Cc: please confirm/deny this theory? :-) > > The original problem was that the virtio-net driver in tools/kvm/virtio/net.c > was producing unexplained latencies (long ping latencies) under certain > circumstances. Sometimes it triggered spontaneously, sometimes it needed a ping > -f flood to trigger. The root cause of that race is still not understood. Looks like it solved the ping -f issue here. Why was this change only implemented in virtio-net? shouldn't it go to the other virtio drivers as well?
On Sat, 2011-05-07 at 13:34 +0300, Sasha Levin wrote: > Why was this change only implemented in virtio-net? shouldn't it go to > the other virtio drivers as well? Yes, definitely. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 05/07/2011 06:34 PM, Sasha Levin wrote: > On Sat, 2011-05-07 at 11:30 +0200, Ingo Molnar wrote: >> * Asias He <asias.hejun@gmail.com> wrote: >> >>> Inject IRQ to guest only when ISR status is low which means >>> guest has read ISR status and device has cleared this bit as >>> the side effect of this reading. >>> >>> This reduces a lot of unnecessary IRQ inject from device to >>> guest. >>> >>> Netpef test shows this patch changes: >>> >>> the host to guest bandwidth >>> from 2866.27 Mbps (cpu 33.96%) to 5548.87 Mbps (cpu 53.87%), >>> >>> the guest to host bandwitdth >>> form 1408.86 Mbps (cpu 99.9%) to 1301.29 Mbps (cpu 99.9%). >>> >>> The bottleneck of the guest to host bandwidth is guest cpu power. >>> >>> Signed-off-by: Asias He <asias.hejun@gmail.com> >>> --- >> Hm, the ISR flag seems to be an explicit IRQ-ack mechanism, not just an >> optimization. >> >> Perhaps if the guest kernel side virtio driver expects us to do honor these >> acks and not inject double irqs when the virtio driver does not expect them? >> >> There's this code in drivers/virtio/virtio_pci.c: >> >> /* reading the ISR has the effect of also clearing it so it's very >> * important to save off the value. */ >> isr = ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR); >> >> Which seems to suggest that this ISR flag is more important than just a >> performance hint. >> >> Pekka: was this the patch perhaps that fixed the ping latency problem for you? >> >> Could any virtio gents on Cc: please confirm/deny this theory? :-) >> >> The original problem was that the virtio-net driver in tools/kvm/virtio/net.c >> was producing unexplained latencies (long ping latencies) under certain >> circumstances. Sometimes it triggered spontaneously, sometimes it needed a ping >> -f flood to trigger. The root cause of that race is still not understood. > > Looks like it solved the ping -f issue here. > > Why was this change only implemented in virtio-net? shouldn't it go to > the other virtio drivers as well? I will send follow up patchs to virtio-* ;-)
On 05/07/2011 05:30 PM, Ingo Molnar wrote: > > * Asias He <asias.hejun@gmail.com> wrote: > >> Inject IRQ to guest only when ISR status is low which means >> guest has read ISR status and device has cleared this bit as >> the side effect of this reading. >> >> This reduces a lot of unnecessary IRQ inject from device to >> guest. >> >> Netpef test shows this patch changes: >> >> the host to guest bandwidth >> from 2866.27 Mbps (cpu 33.96%) to 5548.87 Mbps (cpu 53.87%), >> >> the guest to host bandwitdth >> form 1408.86 Mbps (cpu 99.9%) to 1301.29 Mbps (cpu 99.9%). >> >> The bottleneck of the guest to host bandwidth is guest cpu power. >> >> Signed-off-by: Asias He <asias.hejun@gmail.com> >> --- >> tools/kvm/include/kvm/virtio.h | 5 +++++ >> tools/kvm/virtio/core.c | 8 ++++++++ >> tools/kvm/virtio/net.c | 12 ++++++++---- >> 3 files changed, 21 insertions(+), 4 deletions(-) >> >> diff --git a/tools/kvm/include/kvm/virtio.h b/tools/kvm/include/kvm/virtio.h >> index e8df8eb..7f92dea 100644 >> --- a/tools/kvm/include/kvm/virtio.h >> +++ b/tools/kvm/include/kvm/virtio.h >> @@ -8,6 +8,9 @@ >> >> #include "kvm/kvm.h" >> >> +#define VIRTIO_IRQ_LOW 0 >> +#define VIRTIO_IRQ_HIGH 1 >> + >> struct virt_queue { >> struct vring vring; >> u32 pfn; >> @@ -37,4 +40,6 @@ struct vring_used_elem *virt_queue__set_used_elem(struct virt_queue *queue, u32 >> >> u16 virt_queue__get_iov(struct virt_queue *queue, struct iovec iov[], u16 *out, u16 *in, struct kvm *kvm); >> >> +void virt_queue__trigger_irq(struct virt_queue *vq, int irq, u8 *isr, struct kvm *kvm); >> + >> #endif /* KVM__VIRTIO_H */ >> diff --git a/tools/kvm/virtio/core.c b/tools/kvm/virtio/core.c >> index 18d2c41..0734984 100644 >> --- a/tools/kvm/virtio/core.c >> +++ b/tools/kvm/virtio/core.c >> @@ -57,3 +57,11 @@ u16 virt_queue__get_iov(struct virt_queue *queue, struct iovec iov[], u16 *out, >> >> return head; >> } >> + >> +void virt_queue__trigger_irq(struct virt_queue *vq, int irq, u8 *isr, struct kvm *kvm) >> +{ >> + if (*isr == VIRTIO_IRQ_LOW) { >> + *isr = VIRTIO_IRQ_HIGH; >> + kvm__irq_line(kvm, irq, VIRTIO_IRQ_HIGH); >> + } >> +} >> diff --git a/tools/kvm/virtio/net.c b/tools/kvm/virtio/net.c >> index df69ab3..0189f7d 100644 >> --- a/tools/kvm/virtio/net.c >> +++ b/tools/kvm/virtio/net.c >> @@ -35,6 +35,7 @@ struct net_device { >> u32 guest_features; >> u16 config_vector; >> u8 status; >> + u8 isr; >> u16 queue_selector; >> >> pthread_t io_rx_thread; >> @@ -88,8 +89,9 @@ static void *virtio_net_rx_thread(void *p) >> head = virt_queue__get_iov(vq, iov, &out, &in, self); >> len = readv(net_device.tap_fd, iov, in); >> virt_queue__set_used_elem(vq, head, len); >> + >> /* We should interrupt guest right now, otherwise latency is huge. */ >> - kvm__irq_line(self, VIRTIO_NET_IRQ, 1); >> + virt_queue__trigger_irq(vq, VIRTIO_NET_IRQ, &net_device.isr, self); >> } >> >> } >> @@ -123,7 +125,8 @@ static void *virtio_net_tx_thread(void *p) >> virt_queue__set_used_elem(vq, head, len); >> } >> >> - kvm__irq_line(self, VIRTIO_NET_IRQ, 1); >> + virt_queue__trigger_irq(vq, VIRTIO_NET_IRQ, &net_device.isr, self); >> + >> } >> >> pthread_exit(NULL); >> @@ -175,8 +178,9 @@ static bool virtio_net_pci_io_in(struct kvm *self, u16 port, void *data, int siz >> ioport__write8(data, net_device.status); >> break; >> case VIRTIO_PCI_ISR: >> - ioport__write8(data, 0x1); >> - kvm__irq_line(self, VIRTIO_NET_IRQ, 0); >> + ioport__write8(data, net_device.isr); >> + kvm__irq_line(self, VIRTIO_NET_IRQ, VIRTIO_IRQ_LOW); >> + net_device.isr = VIRTIO_IRQ_LOW; >> break; >> case VIRTIO_MSI_CONFIG_VECTOR: >> ioport__write16(data, net_device.config_vector); > > Hm, the ISR flag seems to be an explicit IRQ-ack mechanism, not just an > optimization. > > Perhaps if the guest kernel side virtio driver expects us to do honor these > acks and not inject double irqs when the virtio driver does not expect them? > > There's this code in drivers/virtio/virtio_pci.c: > > /* reading the ISR has the effect of also clearing it so it's very > * important to save off the value. */ > isr = ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR); > > Which seems to suggest that this ISR flag is more important than just a > performance hint. > > Pekka: was this the patch perhaps that fixed the ping latency problem for you? > > Could any virtio gents on Cc: please confirm/deny this theory? :-) > > The original problem was that the virtio-net driver in tools/kvm/virtio/net.c > was producing unexplained latencies (long ping latencies) under certain > circumstances. Sometimes it triggered spontaneously, sometimes it needed a ping > -f flood to trigger. The root cause of that race is still not understood. > I am using the KVM_IRQ_LINE_STATUS to trigger IRQ instead of KVM_IRQ_LINE when I am fighting against the network stall/hangs issue. The KVM_IRQ_LINE_STATUS reports IRQ injection status to userspace. See commit 4925663a079c77d95d8685228ad6675fc5639c8e for detail. I found that there are huge IRQ injections with status 0 or -1 when guest and host are ping flooding each other simultaneously. I think the root casue is the IRQ race. I also found when network hangs, guest kernel refuse to give any avail buffers in rx queue to device. At that time, vq->last_used_idx equals vq->vring.used->idx in rx queue, so even with manual IRQ injection using a debug key ctrl-a-i, the network still hangs. BTW. The ping latency was caused by the movement of irq injection outside the loop. Suppose we have 5 available buffers and only 1 buffer from tap device. We will sleep on read without giving the buffer from tap to guest. The latency will be huge in this case. while(virt_queue__available(vq)) { ... read(tap_fd) ... } trigger_irq()
On 05/07/2011 04:30 AM, Ingo Molnar wrote: > Hm, the ISR flag seems to be an explicit IRQ-ack mechanism, not just an > optimization. > > Perhaps if the guest kernel side virtio driver expects us to do honor these > acks and not inject double irqs when the virtio driver does not expect them? > > There's this code in drivers/virtio/virtio_pci.c: > > /* reading the ISR has the effect of also clearing it so it's very > * important to save off the value. */ > isr = ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR); > > Which seems to suggest that this ISR flag is more important than just a > performance hint. > > Pekka: was this the patch perhaps that fixed the ping latency problem for you? > > Could any virtio gents on Cc: please confirm/deny this theory? :-) When using PCI LNK interrupts, the ISR flag serves two purposes. It indicates that an interrupt was raised (since the actual interrupt line may be shared) and it is used to acknowledge the interrupt (since PCI LNK lines are level triggered). It seems like this patch is simply avoiding raising the interrupt line if the ISR has not been acknowledged yet. I don't think there's a functional issue here but I'm surprised that it's a win. There should be a very short window when the interrupt is lowered in the APIC but still not acknowledged in the ISR. You should just be saving a pretty cheap system call. I wonder if the system call is taking longer than it should.. Regards, Anthony Liguori -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
* Asias He <asias.hejun@gmail.com> wrote: > BTW. The ping latency was caused by the movement of irq injection outside the > loop. Suppose we have 5 available buffers and only 1 buffer from tap device. > We will sleep on read without giving the buffer from tap to guest. The > latency will be huge in this case. > > while(virt_queue__available(vq)) { > ... > read(tap_fd) > ... > } > trigger_irq() But ... in one of the mails one of you claimed that even when moving the irq notification inside the loop (which we all agreed was necessary to avoid latencies!) the latencies would still occur during stress-tests. So something is still not understood here and could hit us anytime with any of the virtio drivers in the future and such bugs are not always so nice to debug like the latency problem here ... Thanks, Ingo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
* Anthony Liguori <aliguori@us.ibm.com> wrote: > When using PCI LNK interrupts, the ISR flag serves two purposes. It > indicates that an interrupt was raised (since the actual interrupt line may > be shared) and it is used to acknowledge the interrupt (since PCI LNK lines > are level triggered). ok. > It seems like this patch is simply avoiding raising the interrupt line if the > ISR has not been acknowledged yet. I don't think there's a functional issue > here [...] Thanks for confirming this - so i think we still do not understand the root cause of the ping latency and why this change fixed it ... > [...] but I'm surprised that it's a win. There should be a very short window > when the interrupt is lowered in the APIC but still not acknowledged in the > ISR. > > You should just be saving a pretty cheap system call. I wonder if the system > call is taking longer than it should.. Well the optimization also avoids unnecessary VM exits (due to the injection, which interrupts a guest context immediately, even if it's running on another CPU), not just system calls - so it could be more expensive than a system call, right? Thanks, Ingo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 05/07/2011 09:02 AM, Ingo Molnar wrote: > Well the optimization also avoids unnecessary VM exits (due to the injection, > which interrupts a guest context immediately, even if it's running on another > CPU), not just system calls - so it could be more expensive than a system call, > right? If there's IRQ is already pending, the syscall should be a nop. If the IRQ is extraneous, then there's a pretty short window when the IRQ wouldn't already be pending. I took a look at the current virtio code and noticed a few things that are sub-optimal: 1) No MSI. MSI makes interrupts edge triggered and avoids the ISR read entirely. The ISR read is actually pretty expensive. 2) The access pattern is basically: while pending_requests: a = get_next_request(); process_next_request(a); But this is not the optimal pattern with virtio. The optimal pattern is: if pending_requests: disable_notifications(); again: while pending_requests: a = get_next_request(); process_next_request(); enable_notifications(); if pending_requests: goto again; You're currently taking exits way more than you should, particularly since you're using threads for processing the ring queues. 3) You aren't advertising VIRTIO_F_NOTIFY_ON_EMPTY. Particularly with TX, it's advantageous to inject an IRQ even if they're disabled when there are no longer packets in the ring. (2) and (1) are definitely the most important for performance. (2) should be a pretty simple change and should have a significant impact. Regards, Anthony Liguori > > Thanks, > > Ingo > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 05/07/2011 10:00 PM, Ingo Molnar wrote: > > * Asias He <asias.hejun@gmail.com> wrote: > >> BTW. The ping latency was caused by the movement of irq injection outside the >> loop. Suppose we have 5 available buffers and only 1 buffer from tap device. >> We will sleep on read without giving the buffer from tap to guest. The >> latency will be huge in this case. >> >> while(virt_queue__available(vq)) { >> ... >> read(tap_fd) >> ... >> } >> trigger_irq() > > But ... in one of the mails one of you claimed that even when moving the irq > notification inside the loop (which we all agreed was necessary to avoid > latencies!) the latencies would still occur during stress-tests. What I got in stress-tests(ping -f) with irq notification inside the loop is only network hangs. > > So something is still not understood here and could hit us anytime with any of > the virtio drivers in the future and such bugs are not always so nice to debug > like the latency problem here ... Yes, Especially when they are under tremendous stress. > > Thanks, > > Ingo >
* Anthony Liguori <anthony@codemonkey.ws> wrote: > On 05/07/2011 09:02 AM, Ingo Molnar wrote: > >Well the optimization also avoids unnecessary VM exits (due to the injection, > >which interrupts a guest context immediately, even if it's running on another > >CPU), not just system calls - so it could be more expensive than a system call, > >right? > > If there's IRQ is already pending, the syscall should be a nop. If > the IRQ is extraneous, then there's a pretty short window when the > IRQ wouldn't already be pending. > > I took a look at the current virtio code and noticed a few things > that are sub-optimal: > > 1) No MSI. MSI makes interrupts edge triggered and avoids the ISR > read entirely. The ISR read is actually pretty expensive. > > 2) The access pattern is basically: > > while pending_requests: > a = get_next_request(); > process_next_request(a); > > But this is not the optimal pattern with virtio. The optimal pattern is: > > if pending_requests: > disable_notifications(); > > again: > while pending_requests: > a = get_next_request(); > process_next_request(); > > enable_notifications(); > if pending_requests: > goto again; > > You're currently taking exits way more than you should, particularly > since you're using threads for processing the ring queues. > > 3) You aren't advertising VIRTIO_F_NOTIFY_ON_EMPTY. Particularly > with TX, it's advantageous to inject an IRQ even if they're disabled > when there are no longer packets in the ring. > > (2) and (1) are definitely the most important for performance. (2) > should be a pretty simple change and should have a significant > impact. Can you anything in the virtio protocol implementation that would explain networking lags, which seem to be caused by guest notifications either not be sent or being missed? In particular this sequence: > while pending_requests: > a = get_next_request(); > process_next_request(a); is apparently not what Qemu uses - so maybe there's some latent bug or some silly oversight somewhere. It is suboptimal and i agree with you that the better sequence should be implemented, but the above *should* work, yet it does not. Thanks, Ingo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sat, 2011-05-07 at 08:14 -0500, Anthony Liguori wrote: > It seems like this patch is simply avoiding raising the interrupt line > if the ISR has not been acknowledged yet. I don't think there's a > functional issue here but I'm surprised that it's a win. There should > be a very short window when the interrupt is lowered in the APIC but > still not acknowledged in the ISR. > > You should just be saving a pretty cheap system call. I wonder if the > system call is taking longer than it should.. The patch seems to fix a bug where the guest kernel breaks down under interrupt storm and stops doing VIRTIO_PCI_QUEUE_NOTIFY. We assumed it was something with our code but your comments make me wonder if there's a real problem in KVM_IRQ_LINE. Pekka -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sat, 2011-05-07 at 16:47 +0200, Ingo Molnar wrote: > Can you anything in the virtio protocol implementation that would explain > networking lags, which seem to be caused by guest notifications either not be > sent or being missed? > > In particular this sequence: > > > while pending_requests: > > a = get_next_request(); > > process_next_request(a); > > is apparently not what Qemu uses - so maybe there's some latent bug or some > silly oversight somewhere. > > It is suboptimal and i agree with you that the better sequence should be > implemented, but the above *should* work, yet it does not. Yes, so the performance benefits of Asias' patch aren't the interesting part but the fact that it fixes a real bug in our tool. Pekka -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
* Pekka Enberg <penberg@kernel.org> wrote: > On Sat, 2011-05-07 at 16:47 +0200, Ingo Molnar wrote: > > Can you anything in the virtio protocol implementation that would explain > > networking lags, which seem to be caused by guest notifications either not be > > sent or being missed? > > > > In particular this sequence: > > > > > while pending_requests: > > > a = get_next_request(); > > > process_next_request(a); > > > > is apparently not what Qemu uses - so maybe there's some latent bug or some > > silly oversight somewhere. > > > > It is suboptimal and i agree with you that the better sequence should be > > implemented, but the above *should* work, yet it does not. > > Yes, so the performance benefits of Asias' patch aren't the interesting > part but the fact that it fixes a real bug in our tool. It could be the same like the mutex_lock() change: that too seemed to 'fix' the latency bug but we still do not understand the root cause of the 'stuck ring-buffer' situation. I.e. some sort of timing related condition which goes away spuriously when unrelated but timing-relevant changes are done to the code. And we'll continue to see these problems on and off, in probably all virtio drivers. virtio-console might be suffering from it, virtio-blk, etc. etc. I'd suggest freezing changes to this driver until this bug is analyzed correctly... Thanks, Ingo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 05/07/2011 09:50 AM, Pekka Enberg wrote: > On Sat, 2011-05-07 at 08:14 -0500, Anthony Liguori wrote: >> It seems like this patch is simply avoiding raising the interrupt line >> if the ISR has not been acknowledged yet. I don't think there's a >> functional issue here but I'm surprised that it's a win. There should >> be a very short window when the interrupt is lowered in the APIC but >> still not acknowledged in the ISR. >> >> You should just be saving a pretty cheap system call. I wonder if the >> system call is taking longer than it should.. > > The patch seems to fix a bug where the guest kernel breaks down under > interrupt storm and stops doing VIRTIO_PCI_QUEUE_NOTIFY. We assumed it > was something with our code but your comments make me wonder if there's > a real problem in KVM_IRQ_LINE. Stops doing it for a short period of time or entirely? Regards, Anthony Liguori > Pekka > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sat, May 7, 2011 at 6:01 PM, Anthony Liguori <aliguori@us.ibm.com> wrote: > On 05/07/2011 09:50 AM, Pekka Enberg wrote: >> >> On Sat, 2011-05-07 at 08:14 -0500, Anthony Liguori wrote: >>> >>> It seems like this patch is simply avoiding raising the interrupt line >>> if the ISR has not been acknowledged yet. I don't think there's a >>> functional issue here but I'm surprised that it's a win. There should >>> be a very short window when the interrupt is lowered in the APIC but >>> still not acknowledged in the ISR. >>> >>> You should just be saving a pretty cheap system call. I wonder if the >>> system call is taking longer than it should.. >> >> The patch seems to fix a bug where the guest kernel breaks down under >> interrupt storm and stops doing VIRTIO_PCI_QUEUE_NOTIFY. We assumed it >> was something with our code but your comments make me wonder if there's >> a real problem in KVM_IRQ_LINE. > > Stops doing it for a short period of time or entirely? Seems to be entirely. The test case is doing "ping -f" from host to guest and vice versa and it takes 30-60 seconds to trigger for me. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
* Pekka Enberg <penberg@kernel.org> wrote: > On Sat, May 7, 2011 at 6:01 PM, Anthony Liguori <aliguori@us.ibm.com> wrote: > > On 05/07/2011 09:50 AM, Pekka Enberg wrote: > >> > >> On Sat, 2011-05-07 at 08:14 -0500, Anthony Liguori wrote: > >>> > >>> It seems like this patch is simply avoiding raising the interrupt line > >>> if the ISR has not been acknowledged yet. I don't think there's a > >>> functional issue here but I'm surprised that it's a win. There should > >>> be a very short window when the interrupt is lowered in the APIC but > >>> still not acknowledged in the ISR. > >>> > >>> You should just be saving a pretty cheap system call. I wonder if the > >>> system call is taking longer than it should.. > >> > >> The patch seems to fix a bug where the guest kernel breaks down under > >> interrupt storm and stops doing VIRTIO_PCI_QUEUE_NOTIFY. We assumed it > >> was something with our code but your comments make me wonder if there's > >> a real problem in KVM_IRQ_LINE. > > > > Stops doing it for a short period of time or entirely? > > Seems to be entirely. The test case is doing "ping -f" from host to > guest and vice versa and it takes 30-60 seconds to trigger for me. here's the condition as described by Asias: " I also found when network hangs, guest kernel refuse to give any avail buffers in rx queue to device. At that time, vq->last_used_idx equals vq->vring.used->idx in rx queue, so even with manual IRQ injection using a debug key ctrl-a-i, the network still hangs. " Thanks, Ingo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/tools/kvm/include/kvm/virtio.h b/tools/kvm/include/kvm/virtio.h index e8df8eb..7f92dea 100644 --- a/tools/kvm/include/kvm/virtio.h +++ b/tools/kvm/include/kvm/virtio.h @@ -8,6 +8,9 @@ #include "kvm/kvm.h" +#define VIRTIO_IRQ_LOW 0 +#define VIRTIO_IRQ_HIGH 1 + struct virt_queue { struct vring vring; u32 pfn; @@ -37,4 +40,6 @@ struct vring_used_elem *virt_queue__set_used_elem(struct virt_queue *queue, u32 u16 virt_queue__get_iov(struct virt_queue *queue, struct iovec iov[], u16 *out, u16 *in, struct kvm *kvm); +void virt_queue__trigger_irq(struct virt_queue *vq, int irq, u8 *isr, struct kvm *kvm); + #endif /* KVM__VIRTIO_H */ diff --git a/tools/kvm/virtio/core.c b/tools/kvm/virtio/core.c index 18d2c41..0734984 100644 --- a/tools/kvm/virtio/core.c +++ b/tools/kvm/virtio/core.c @@ -57,3 +57,11 @@ u16 virt_queue__get_iov(struct virt_queue *queue, struct iovec iov[], u16 *out, return head; } + +void virt_queue__trigger_irq(struct virt_queue *vq, int irq, u8 *isr, struct kvm *kvm) +{ + if (*isr == VIRTIO_IRQ_LOW) { + *isr = VIRTIO_IRQ_HIGH; + kvm__irq_line(kvm, irq, VIRTIO_IRQ_HIGH); + } +} diff --git a/tools/kvm/virtio/net.c b/tools/kvm/virtio/net.c index df69ab3..0189f7d 100644 --- a/tools/kvm/virtio/net.c +++ b/tools/kvm/virtio/net.c @@ -35,6 +35,7 @@ struct net_device { u32 guest_features; u16 config_vector; u8 status; + u8 isr; u16 queue_selector; pthread_t io_rx_thread; @@ -88,8 +89,9 @@ static void *virtio_net_rx_thread(void *p) head = virt_queue__get_iov(vq, iov, &out, &in, self); len = readv(net_device.tap_fd, iov, in); virt_queue__set_used_elem(vq, head, len); + /* We should interrupt guest right now, otherwise latency is huge. */ - kvm__irq_line(self, VIRTIO_NET_IRQ, 1); + virt_queue__trigger_irq(vq, VIRTIO_NET_IRQ, &net_device.isr, self); } } @@ -123,7 +125,8 @@ static void *virtio_net_tx_thread(void *p) virt_queue__set_used_elem(vq, head, len); } - kvm__irq_line(self, VIRTIO_NET_IRQ, 1); + virt_queue__trigger_irq(vq, VIRTIO_NET_IRQ, &net_device.isr, self); + } pthread_exit(NULL); @@ -175,8 +178,9 @@ static bool virtio_net_pci_io_in(struct kvm *self, u16 port, void *data, int siz ioport__write8(data, net_device.status); break; case VIRTIO_PCI_ISR: - ioport__write8(data, 0x1); - kvm__irq_line(self, VIRTIO_NET_IRQ, 0); + ioport__write8(data, net_device.isr); + kvm__irq_line(self, VIRTIO_NET_IRQ, VIRTIO_IRQ_LOW); + net_device.isr = VIRTIO_IRQ_LOW; break; case VIRTIO_MSI_CONFIG_VECTOR: ioport__write16(data, net_device.config_vector);
Inject IRQ to guest only when ISR status is low which means guest has read ISR status and device has cleared this bit as the side effect of this reading. This reduces a lot of unnecessary IRQ inject from device to guest. Netpef test shows this patch changes: the host to guest bandwidth from 2866.27 Mbps (cpu 33.96%) to 5548.87 Mbps (cpu 53.87%), the guest to host bandwitdth form 1408.86 Mbps (cpu 99.9%) to 1301.29 Mbps (cpu 99.9%). The bottleneck of the guest to host bandwidth is guest cpu power. Signed-off-by: Asias He <asias.hejun@gmail.com> --- tools/kvm/include/kvm/virtio.h | 5 +++++ tools/kvm/virtio/core.c | 8 ++++++++ tools/kvm/virtio/net.c | 12 ++++++++---- 3 files changed, 21 insertions(+), 4 deletions(-)