diff mbox series

[v4,08/11] virtio: event suppression support for packed ring

Message ID 1550118402-4057-9-git-send-email-wexu@redhat.com (mailing list archive)
State New, archived
Headers show
Series packed ring virtio-net backends support | expand

Commit Message

Wei Xu Feb. 14, 2019, 4:26 a.m. UTC
From: Wei Xu <wexu@redhat.com>

Difference between 'avail_wrap_counter' and 'last_avail_wrap_counter':
For Tx(guest transmitting), they are the same after each pop of a desc.

For Rx(guest receiving), they are also the same when there are enough
descriptors to carry the payload for a packet(e.g. usually 16 descs are
needed for a 64k packet in typical iperf tcp connection with tso enabled),
however, when the ring is running out of descriptors while there are
still a few free ones, e.g. 6 descriptors are available which is not
enough to carry an entire packet which needs 16 descriptors, in this
case the 'avail_wrap_counter' should be set as the first one pending
being handled by guest driver in order to get a notification, and the
'last_avail_wrap_counter' should stay unchanged to the head of available
descriptors, like below:

Mark meaning:
    | | -- available
    |*| -- used

A Snapshot of the queue:
                                      last_avail_idx = 253
                                      last_avail_wrap_counter = 1
                                             |
    +---------------------------------------------+
 0  | | | |*|*|*|*|*|*|*|*|*|*|*|*|*|*|*|*|*| | | | 255
    +---------------------------------------------+
           |
  shadow_avail_idx = 3
  avail_wrap_counter = 0

Signed-off-by: Wei Xu <wexu@redhat.com>
---
 hw/virtio/virtio.c | 137 +++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 128 insertions(+), 9 deletions(-)

Comments

Jason Wang Feb. 19, 2019, 7:19 a.m. UTC | #1
On 2019/2/14 下午12:26, wexu@redhat.com wrote:
> From: Wei Xu <wexu@redhat.com>
>
> Difference between 'avail_wrap_counter' and 'last_avail_wrap_counter':
> For Tx(guest transmitting), they are the same after each pop of a desc.
>
> For Rx(guest receiving), they are also the same when there are enough
> descriptors to carry the payload for a packet(e.g. usually 16 descs are
> needed for a 64k packet in typical iperf tcp connection with tso enabled),
> however, when the ring is running out of descriptors while there are
> still a few free ones, e.g. 6 descriptors are available which is not
> enough to carry an entire packet which needs 16 descriptors, in this
> case the 'avail_wrap_counter' should be set as the first one pending
> being handled by guest driver in order to get a notification, and the
> 'last_avail_wrap_counter' should stay unchanged to the head of available
> descriptors, like below:
>
> Mark meaning:
>      | | -- available
>      |*| -- used
>
> A Snapshot of the queue:
>                                        last_avail_idx = 253
>                                        last_avail_wrap_counter = 1
>                                               |
>      +---------------------------------------------+
>   0  | | | |*|*|*|*|*|*|*|*|*|*|*|*|*|*|*|*|*| | | | 255
>      +---------------------------------------------+
>             |
>    shadow_avail_idx = 3
>    avail_wrap_counter = 0


Well this might not be the good place to describe the difference between 
shadow_avail_idx and last_avail_idx. And the comments above their 
definition looks good enough?

     /* Next head to pop */
     uint16_t last_avail_idx;

     /* Last avail_idx read from VQ. */
     uint16_t shadow_avail_idx;

Instead, how or why need event suppress is not mentioned ...



>
> Signed-off-by: Wei Xu <wexu@redhat.com>
> ---
>   hw/virtio/virtio.c | 137 +++++++++++++++++++++++++++++++++++++++++++++++++----
>   1 file changed, 128 insertions(+), 9 deletions(-)
>
> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> index 7e276b4..8cfc7b6 100644
> --- a/hw/virtio/virtio.c
> +++ b/hw/virtio/virtio.c
> @@ -234,6 +234,34 @@ static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc,
>       virtio_tswap16s(vdev, &desc->next);
>   }
>   
> +static void vring_packed_event_read(VirtIODevice *vdev,
> +                            MemoryRegionCache *cache, VRingPackedDescEvent *e)
> +{
> +    address_space_read_cached(cache, 0, e, sizeof(*e));
> +    virtio_tswap16s(vdev, &e->off_wrap);
> +    virtio_tswap16s(vdev, &e->flags);
> +}
> +
> +static void vring_packed_off_wrap_write(VirtIODevice *vdev,
> +                            MemoryRegionCache *cache, uint16_t off_wrap)
> +{
> +    virtio_tswap16s(vdev, &off_wrap);
> +    address_space_write_cached(cache, offsetof(VRingPackedDescEvent, off_wrap),
> +                            &off_wrap, sizeof(off_wrap));
> +    address_space_cache_invalidate(cache,
> +                offsetof(VRingPackedDescEvent, off_wrap), sizeof(off_wrap));
> +}
> +
> +static void vring_packed_flags_write(VirtIODevice *vdev,
> +                            MemoryRegionCache *cache, uint16_t flags)
> +{
> +    virtio_tswap16s(vdev, &flags);
> +    address_space_write_cached(cache, offsetof(VRingPackedDescEvent, flags),
> +                            &flags, sizeof(flags));
> +    address_space_cache_invalidate(cache,
> +                        offsetof(VRingPackedDescEvent, flags), sizeof(flags));
> +}
> +
>   static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
>   {
>       VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
> @@ -340,14 +368,8 @@ static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
>       address_space_cache_invalidate(&caches->used, pa, sizeof(val));
>   }
>   
> -void virtio_queue_set_notification(VirtQueue *vq, int enable)
> +static void virtio_queue_set_notification_split(VirtQueue *vq, int enable)
>   {
> -    vq->notification = enable;
> -
> -    if (!vq->vring.desc) {
> -        return;
> -    }
> -
>       rcu_read_lock();
>       if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
>           vring_set_avail_event(vq, vring_avail_idx(vq));
> @@ -363,6 +385,57 @@ void virtio_queue_set_notification(VirtQueue *vq, int enable)
>       rcu_read_unlock();
>   }
>   
> +static void virtio_queue_set_notification_packed(VirtQueue *vq, int enable)
> +{
> +    VRingPackedDescEvent e;
> +    VRingMemoryRegionCaches *caches;
> +
> +    rcu_read_lock();
> +    caches  = vring_get_region_caches(vq);
> +    vring_packed_event_read(vq->vdev, &caches->used, &e);
> +
> +    if (!enable) {
> +        if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
> +            /* no need to write device area since this is outdated. */


We should advertise off and wrap in this case as well, otherwise we may 
get notifications earlier than expected.


> +            goto out;
> +        }
> +
> +        e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
> +        goto update;
> +    }
> +
> +    e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;


Here and the above goto could be eliminated by:

if (even idx) {

...

} else if (enable) {

...

} else {

...

}


Thanks


> +    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
> +        uint16_t off_wrap = vq->shadow_avail_idx | vq->avail_wrap_counter << 15;
> +
> +        vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
> +        /* Make sure off_wrap is wrote before flags */
> +        smp_wmb();
> +
> +        e.flags = VRING_PACKED_EVENT_FLAG_DESC;
> +    }
> +
> +update:
> +    vring_packed_flags_write(vq->vdev, &caches->used, e.flags);
> +out:
> +    rcu_read_unlock();
> +}
> +
> +void virtio_queue_set_notification(VirtQueue *vq, int enable)
> +{
> +    vq->notification = enable;
> +
> +    if (!vq->vring.desc) {
> +        return;
> +    }
> +
> +    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
> +        virtio_queue_set_notification_packed(vq, enable);
> +    } else {
> +        virtio_queue_set_notification_split(vq, enable);
> +    }
> +}
> +
>   int virtio_queue_ready(VirtQueue *vq)
>   {
>       return vq->vring.avail != 0;
> @@ -2117,8 +2190,7 @@ static void virtio_set_isr(VirtIODevice *vdev, int value)
>       }
>   }
>   
> -/* Called within rcu_read_lock().  */
> -static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
> +static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
>   {
>       uint16_t old, new;
>       bool v;
> @@ -2141,6 +2213,53 @@ static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
>       return !v || vring_need_event(vring_get_used_event(vq), new, old);
>   }
>   
> +static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
> +                            uint16_t off_wrap, uint16_t new, uint16_t old)
> +{
> +    int off = off_wrap & ~(1 << 15);
> +
> +    if (wrap != off_wrap >> 15) {
> +        off -= vq->vring.num;
> +    }
> +
> +    return vring_need_event(off, new, old);
> +}
> +
> +static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
> +{
> +    VRingPackedDescEvent e;
> +    uint16_t old, new;
> +    bool v;
> +    VRingMemoryRegionCaches *caches;
> +
> +    caches  = vring_get_region_caches(vq);
> +    vring_packed_event_read(vdev, &caches->avail, &e);
> +
> +    old = vq->signalled_used;
> +    new = vq->signalled_used = vq->used_idx;
> +    v = vq->signalled_used_valid;
> +    vq->signalled_used_valid = true;
> +
> +    if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) {
> +        return false;
> +    } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) {
> +        return true;
> +    }
> +
> +    return !v || vring_packed_need_event(vq,
> +        vq->used_wrap_counter, e.off_wrap, new, old);
> +}
> +
> +/* Called within rcu_read_lock().  */
> +static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
> +{
> +    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
> +        return virtio_packed_should_notify(vdev, vq);
> +    } else {
> +        return virtio_split_should_notify(vdev, vq);
> +    }
> +}
> +
>   void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
>   {
>       bool should_notify;
Wei Xu Feb. 19, 2019, 10:40 a.m. UTC | #2
On Tue, Feb 19, 2019 at 03:19:58PM +0800, Jason Wang wrote:
> 
> On 2019/2/14 下午12:26, wexu@redhat.com wrote:
> >From: Wei Xu <wexu@redhat.com>
> >
> >Difference between 'avail_wrap_counter' and 'last_avail_wrap_counter':
> >For Tx(guest transmitting), they are the same after each pop of a desc.
> >
> >For Rx(guest receiving), they are also the same when there are enough
> >descriptors to carry the payload for a packet(e.g. usually 16 descs are
> >needed for a 64k packet in typical iperf tcp connection with tso enabled),
> >however, when the ring is running out of descriptors while there are
> >still a few free ones, e.g. 6 descriptors are available which is not
> >enough to carry an entire packet which needs 16 descriptors, in this
> >case the 'avail_wrap_counter' should be set as the first one pending
> >being handled by guest driver in order to get a notification, and the
> >'last_avail_wrap_counter' should stay unchanged to the head of available
> >descriptors, like below:
> >
> >Mark meaning:
> >     | | -- available
> >     |*| -- used
> >
> >A Snapshot of the queue:
> >                                       last_avail_idx = 253
> >                                       last_avail_wrap_counter = 1
> >                                              |
> >     +---------------------------------------------+
> >  0  | | | |*|*|*|*|*|*|*|*|*|*|*|*|*|*|*|*|*| | | | 255
> >     +---------------------------------------------+
> >            |
> >   shadow_avail_idx = 3
> >   avail_wrap_counter = 0
> 
> 
> Well this might not be the good place to describe the difference between
> shadow_avail_idx and last_avail_idx. And the comments above their definition
> looks good enough?

Sorry, I do not get it, can you elaborate more? 

This is one of the buggy parts of packed ring, it is good to make it clear here.

> 
>     /* Next head to pop */
>     uint16_t last_avail_idx;
> 
>     /* Last avail_idx read from VQ. */
>     uint16_t shadow_avail_idx;
> 

What is the meaning of these comment? Do you mean I should replace put them 
to the comments also? thanks.

> Instead, how or why need event suppress is not mentioned ...

Yes, but presumably this knowledge has been acquired from reading through the
spec, so I skipped this part.

Wei

> 
> 
> 
> >
> >Signed-off-by: Wei Xu <wexu@redhat.com>
> >---
> >  hw/virtio/virtio.c | 137 +++++++++++++++++++++++++++++++++++++++++++++++++----
> >  1 file changed, 128 insertions(+), 9 deletions(-)
> >
> >diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> >index 7e276b4..8cfc7b6 100644
> >--- a/hw/virtio/virtio.c
> >+++ b/hw/virtio/virtio.c
> >@@ -234,6 +234,34 @@ static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc,
> >      virtio_tswap16s(vdev, &desc->next);
> >  }
> >+static void vring_packed_event_read(VirtIODevice *vdev,
> >+                            MemoryRegionCache *cache, VRingPackedDescEvent *e)
> >+{
> >+    address_space_read_cached(cache, 0, e, sizeof(*e));
> >+    virtio_tswap16s(vdev, &e->off_wrap);
> >+    virtio_tswap16s(vdev, &e->flags);
> >+}
> >+
> >+static void vring_packed_off_wrap_write(VirtIODevice *vdev,
> >+                            MemoryRegionCache *cache, uint16_t off_wrap)
> >+{
> >+    virtio_tswap16s(vdev, &off_wrap);
> >+    address_space_write_cached(cache, offsetof(VRingPackedDescEvent, off_wrap),
> >+                            &off_wrap, sizeof(off_wrap));
> >+    address_space_cache_invalidate(cache,
> >+                offsetof(VRingPackedDescEvent, off_wrap), sizeof(off_wrap));
> >+}
> >+
> >+static void vring_packed_flags_write(VirtIODevice *vdev,
> >+                            MemoryRegionCache *cache, uint16_t flags)
> >+{
> >+    virtio_tswap16s(vdev, &flags);
> >+    address_space_write_cached(cache, offsetof(VRingPackedDescEvent, flags),
> >+                            &flags, sizeof(flags));
> >+    address_space_cache_invalidate(cache,
> >+                        offsetof(VRingPackedDescEvent, flags), sizeof(flags));
> >+}
> >+
> >  static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
> >  {
> >      VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
> >@@ -340,14 +368,8 @@ static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
> >      address_space_cache_invalidate(&caches->used, pa, sizeof(val));
> >  }
> >-void virtio_queue_set_notification(VirtQueue *vq, int enable)
> >+static void virtio_queue_set_notification_split(VirtQueue *vq, int enable)
> >  {
> >-    vq->notification = enable;
> >-
> >-    if (!vq->vring.desc) {
> >-        return;
> >-    }
> >-
> >      rcu_read_lock();
> >      if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
> >          vring_set_avail_event(vq, vring_avail_idx(vq));
> >@@ -363,6 +385,57 @@ void virtio_queue_set_notification(VirtQueue *vq, int enable)
> >      rcu_read_unlock();
> >  }
> >+static void virtio_queue_set_notification_packed(VirtQueue *vq, int enable)
> >+{
> >+    VRingPackedDescEvent e;
> >+    VRingMemoryRegionCaches *caches;
> >+
> >+    rcu_read_lock();
> >+    caches  = vring_get_region_caches(vq);
> >+    vring_packed_event_read(vq->vdev, &caches->used, &e);
> >+
> >+    if (!enable) {
> >+        if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
> >+            /* no need to write device area since this is outdated. */
> 
> 
> We should advertise off and wrap in this case as well, otherwise we may get
> notifications earlier than expected.

Is it necessary? Supposing offset & wrap_counter are always set before update
notification flags, it is reliable to skip this for disabling, isn't it?

While the logic here is unclear, I did a concise one like below
which doesn't use the 'vq->notification' as in your comment for v2,
I think this should work for packed ring as well, anything I missed?

    if (!enable) {
        e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
    } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
        uint16_t off_wrap;

        off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << 15;
        vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);

        /* Make sure off_wrap is wrote before flags */
        smp_wmb();
        e.flags = VRING_PACKED_EVENT_FLAG_DESC;
    } else {
        e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
    }

    vring_packed_flags_write(vq->vdev, &caches->used, e.flags);

> 
> 
> >+            goto out;
> >+        }
> >+
> >+        e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
> >+        goto update;
> >+    }
> >+
> >+    e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
> 
> 
> Here and the above goto could be eliminated by:
> 
> if (even idx) {
> 
> ...
> 
> } else if (enable) {
> 
> ...
> 
> } else {
> 
> ...
> 
> }
> 

thanks, I have removed it in above snippet.

Wei
> 
> Thanks
> 
> 
> >+    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
> >+        uint16_t off_wrap = vq->shadow_avail_idx | vq->avail_wrap_counter << 15;
> >+
> >+        vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
> >+        /* Make sure off_wrap is wrote before flags */
> >+        smp_wmb();
> >+
> >+        e.flags = VRING_PACKED_EVENT_FLAG_DESC;
> >+    }
> >+
> >+update:
> >+    vring_packed_flags_write(vq->vdev, &caches->used, e.flags);
> >+out:
> >+    rcu_read_unlock();
> >+}
> >+
> >+void virtio_queue_set_notification(VirtQueue *vq, int enable)
> >+{
> >+    vq->notification = enable;
> >+
> >+    if (!vq->vring.desc) {
> >+        return;
> >+    }
> >+
> >+    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
> >+        virtio_queue_set_notification_packed(vq, enable);
> >+    } else {
> >+        virtio_queue_set_notification_split(vq, enable);
> >+    }
> >+}
> >+
> >  int virtio_queue_ready(VirtQueue *vq)
> >  {
> >      return vq->vring.avail != 0;
> >@@ -2117,8 +2190,7 @@ static void virtio_set_isr(VirtIODevice *vdev, int value)
> >      }
> >  }
> >-/* Called within rcu_read_lock().  */
> >-static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
> >+static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
> >  {
> >      uint16_t old, new;
> >      bool v;
> >@@ -2141,6 +2213,53 @@ static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
> >      return !v || vring_need_event(vring_get_used_event(vq), new, old);
> >  }
> >+static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
> >+                            uint16_t off_wrap, uint16_t new, uint16_t old)
> >+{
> >+    int off = off_wrap & ~(1 << 15);
> >+
> >+    if (wrap != off_wrap >> 15) {
> >+        off -= vq->vring.num;
> >+    }
> >+
> >+    return vring_need_event(off, new, old);
> >+}
> >+
> >+static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
> >+{
> >+    VRingPackedDescEvent e;
> >+    uint16_t old, new;
> >+    bool v;
> >+    VRingMemoryRegionCaches *caches;
> >+
> >+    caches  = vring_get_region_caches(vq);
> >+    vring_packed_event_read(vdev, &caches->avail, &e);
> >+
> >+    old = vq->signalled_used;
> >+    new = vq->signalled_used = vq->used_idx;
> >+    v = vq->signalled_used_valid;
> >+    vq->signalled_used_valid = true;
> >+
> >+    if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) {
> >+        return false;
> >+    } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) {
> >+        return true;
> >+    }
> >+
> >+    return !v || vring_packed_need_event(vq,
> >+        vq->used_wrap_counter, e.off_wrap, new, old);
> >+}
> >+
> >+/* Called within rcu_read_lock().  */
> >+static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
> >+{
> >+    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
> >+        return virtio_packed_should_notify(vdev, vq);
> >+    } else {
> >+        return virtio_split_should_notify(vdev, vq);
> >+    }
> >+}
> >+
> >  void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
> >  {
> >      bool should_notify;
>
Jason Wang Feb. 19, 2019, 1:06 p.m. UTC | #3
On 2019/2/19 下午6:40, Wei Xu wrote:
> On Tue, Feb 19, 2019 at 03:19:58PM +0800, Jason Wang wrote:
>> On 2019/2/14 下午12:26, wexu@redhat.com wrote:
>>> From: Wei Xu <wexu@redhat.com>
>>>
>>> Difference between 'avail_wrap_counter' and 'last_avail_wrap_counter':
>>> For Tx(guest transmitting), they are the same after each pop of a desc.
>>>
>>> For Rx(guest receiving), they are also the same when there are enough
>>> descriptors to carry the payload for a packet(e.g. usually 16 descs are
>>> needed for a 64k packet in typical iperf tcp connection with tso enabled),
>>> however, when the ring is running out of descriptors while there are
>>> still a few free ones, e.g. 6 descriptors are available which is not
>>> enough to carry an entire packet which needs 16 descriptors, in this
>>> case the 'avail_wrap_counter' should be set as the first one pending
>>> being handled by guest driver in order to get a notification, and the
>>> 'last_avail_wrap_counter' should stay unchanged to the head of available
>>> descriptors, like below:
>>>
>>> Mark meaning:
>>>      | | -- available
>>>      |*| -- used
>>>
>>> A Snapshot of the queue:
>>>                                        last_avail_idx = 253
>>>                                        last_avail_wrap_counter = 1
>>>                                               |
>>>      +---------------------------------------------+
>>>   0  | | | |*|*|*|*|*|*|*|*|*|*|*|*|*|*|*|*|*| | | | 255
>>>      +---------------------------------------------+
>>>             |
>>>    shadow_avail_idx = 3
>>>    avail_wrap_counter = 0
>>
>> Well this might not be the good place to describe the difference between
>> shadow_avail_idx and last_avail_idx. And the comments above their definition
>> looks good enough?
> Sorry, I do not get it, can you elaborate more?


I meant the comment is good enough to explain what it did. For packed 
ring, the only difference is the wrap counter. You can add e.g "The wrap 
counter of next head to pop" to above last_avail_wrap_counter. And so 
does shadow_avail_wrap_counter.


>
> This is one of the buggy parts of packed ring, it is good to make it clear here.
>
>>      /* Next head to pop */
>>      uint16_t last_avail_idx;
>>
>>      /* Last avail_idx read from VQ. */
>>      uint16_t shadow_avail_idx;
>>
> What is the meaning of these comment?


It's pretty easy to be understood, isn't it?


>   Do you mean I should replace put them
> to the comments also? thanks.
>
>> Instead, how or why need event suppress is not mentioned ...
> Yes, but presumably this knowledge has been acquired from reading through the
> spec, so I skipped this part.


You need at least add reference to the spec. Commit log is pretty 
important for starters to understand what has been done in the patch 
before reading the code. I'm pretty sure they will get confused for 
reading what you wrote here.


Thanks


>
> Wei
>
>>
>>
>>> Signed-off-by: Wei Xu <wexu@redhat.com>
>>> ---
>>>   hw/virtio/virtio.c | 137 +++++++++++++++++++++++++++++++++++++++++++++++++----
>>>   1 file changed, 128 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
>>> index 7e276b4..8cfc7b6 100644
>>> --- a/hw/virtio/virtio.c
>>> +++ b/hw/virtio/virtio.c
>>> @@ -234,6 +234,34 @@ static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc,
>>>       virtio_tswap16s(vdev, &desc->next);
>>>   }
>>> +static void vring_packed_event_read(VirtIODevice *vdev,
>>> +                            MemoryRegionCache *cache, VRingPackedDescEvent *e)
>>> +{
>>> +    address_space_read_cached(cache, 0, e, sizeof(*e));
>>> +    virtio_tswap16s(vdev, &e->off_wrap);
>>> +    virtio_tswap16s(vdev, &e->flags);
>>> +}
>>> +
>>> +static void vring_packed_off_wrap_write(VirtIODevice *vdev,
>>> +                            MemoryRegionCache *cache, uint16_t off_wrap)
>>> +{
>>> +    virtio_tswap16s(vdev, &off_wrap);
>>> +    address_space_write_cached(cache, offsetof(VRingPackedDescEvent, off_wrap),
>>> +                            &off_wrap, sizeof(off_wrap));
>>> +    address_space_cache_invalidate(cache,
>>> +                offsetof(VRingPackedDescEvent, off_wrap), sizeof(off_wrap));
>>> +}
>>> +
>>> +static void vring_packed_flags_write(VirtIODevice *vdev,
>>> +                            MemoryRegionCache *cache, uint16_t flags)
>>> +{
>>> +    virtio_tswap16s(vdev, &flags);
>>> +    address_space_write_cached(cache, offsetof(VRingPackedDescEvent, flags),
>>> +                            &flags, sizeof(flags));
>>> +    address_space_cache_invalidate(cache,
>>> +                        offsetof(VRingPackedDescEvent, flags), sizeof(flags));
>>> +}
>>> +
>>>   static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
>>>   {
>>>       VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
>>> @@ -340,14 +368,8 @@ static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
>>>       address_space_cache_invalidate(&caches->used, pa, sizeof(val));
>>>   }
>>> -void virtio_queue_set_notification(VirtQueue *vq, int enable)
>>> +static void virtio_queue_set_notification_split(VirtQueue *vq, int enable)
>>>   {
>>> -    vq->notification = enable;
>>> -
>>> -    if (!vq->vring.desc) {
>>> -        return;
>>> -    }
>>> -
>>>       rcu_read_lock();
>>>       if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
>>>           vring_set_avail_event(vq, vring_avail_idx(vq));
>>> @@ -363,6 +385,57 @@ void virtio_queue_set_notification(VirtQueue *vq, int enable)
>>>       rcu_read_unlock();
>>>   }
>>> +static void virtio_queue_set_notification_packed(VirtQueue *vq, int enable)
>>> +{
>>> +    VRingPackedDescEvent e;
>>> +    VRingMemoryRegionCaches *caches;
>>> +
>>> +    rcu_read_lock();
>>> +    caches  = vring_get_region_caches(vq);
>>> +    vring_packed_event_read(vq->vdev, &caches->used, &e);
>>> +
>>> +    if (!enable) {
>>> +        if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
>>> +            /* no need to write device area since this is outdated. */
>>
>> We should advertise off and wrap in this case as well, otherwise we may get
>> notifications earlier than expected.
> Is it necessary? Supposing offset & wrap_counter are always set before update
> notification flags, it is reliable to skip this for disabling, isn't it?


You should either:

- advertise the EVENT_FLAG_DISABLE

or

- advertise the new off wrap otherwise you may get notified early.

Both you none of above did by your code.


>
> While the logic here is unclear, I did a concise one like below
> which doesn't use the 'vq->notification' as in your comment for v2,
> I think this should work for packed ring as well, anything I missed?
>
>      if (!enable) {
>          e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
>      } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
>          uint16_t off_wrap;
>
>          off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << 15;
>          vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
>
>          /* Make sure off_wrap is wrote before flags */
>          smp_wmb();
>          e.flags = VRING_PACKED_EVENT_FLAG_DESC;
>      } else {
>          e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
>      }
>
>      vring_packed_flags_write(vq->vdev, &caches->used, e.flags);


Looks good to me.

Thanks


>
>>
>>> +            goto out;
>>> +        }
>>> +
>>> +        e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
>>> +        goto update;
>>> +    }
>>> +
>>> +    e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
>>
>> Here and the above goto could be eliminated by:
>>
>> if (even idx) {
>>
>> ...
>>
>> } else if (enable) {
>>
>> ...
>>
>> } else {
>>
>> ...
>>
>> }
>>
> thanks, I have removed it in above snippet.
>
> Wei
>> Thanks
>>
>>
>>> +    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
>>> +        uint16_t off_wrap = vq->shadow_avail_idx | vq->avail_wrap_counter << 15;
>>> +
>>> +        vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
>>> +        /* Make sure off_wrap is wrote before flags */
>>> +        smp_wmb();
>>> +
>>> +        e.flags = VRING_PACKED_EVENT_FLAG_DESC;
>>> +    }
>>> +
>>> +update:
>>> +    vring_packed_flags_write(vq->vdev, &caches->used, e.flags);
>>> +out:
>>> +    rcu_read_unlock();
>>> +}
>>> +
>>> +void virtio_queue_set_notification(VirtQueue *vq, int enable)
>>> +{
>>> +    vq->notification = enable;
>>> +
>>> +    if (!vq->vring.desc) {
>>> +        return;
>>> +    }
>>> +
>>> +    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
>>> +        virtio_queue_set_notification_packed(vq, enable);
>>> +    } else {
>>> +        virtio_queue_set_notification_split(vq, enable);
>>> +    }
>>> +}
>>> +
>>>   int virtio_queue_ready(VirtQueue *vq)
>>>   {
>>>       return vq->vring.avail != 0;
>>> @@ -2117,8 +2190,7 @@ static void virtio_set_isr(VirtIODevice *vdev, int value)
>>>       }
>>>   }
>>> -/* Called within rcu_read_lock().  */
>>> -static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
>>> +static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
>>>   {
>>>       uint16_t old, new;
>>>       bool v;
>>> @@ -2141,6 +2213,53 @@ static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
>>>       return !v || vring_need_event(vring_get_used_event(vq), new, old);
>>>   }
>>> +static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
>>> +                            uint16_t off_wrap, uint16_t new, uint16_t old)
>>> +{
>>> +    int off = off_wrap & ~(1 << 15);
>>> +
>>> +    if (wrap != off_wrap >> 15) {
>>> +        off -= vq->vring.num;
>>> +    }
>>> +
>>> +    return vring_need_event(off, new, old);
>>> +}
>>> +
>>> +static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
>>> +{
>>> +    VRingPackedDescEvent e;
>>> +    uint16_t old, new;
>>> +    bool v;
>>> +    VRingMemoryRegionCaches *caches;
>>> +
>>> +    caches  = vring_get_region_caches(vq);
>>> +    vring_packed_event_read(vdev, &caches->avail, &e);
>>> +
>>> +    old = vq->signalled_used;
>>> +    new = vq->signalled_used = vq->used_idx;
>>> +    v = vq->signalled_used_valid;
>>> +    vq->signalled_used_valid = true;
>>> +
>>> +    if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) {
>>> +        return false;
>>> +    } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) {
>>> +        return true;
>>> +    }
>>> +
>>> +    return !v || vring_packed_need_event(vq,
>>> +        vq->used_wrap_counter, e.off_wrap, new, old);
>>> +}
>>> +
>>> +/* Called within rcu_read_lock().  */
>>> +static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
>>> +{
>>> +    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
>>> +        return virtio_packed_should_notify(vdev, vq);
>>> +    } else {
>>> +        return virtio_split_should_notify(vdev, vq);
>>> +    }
>>> +}
>>> +
>>>   void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
>>>   {
>>>       bool should_notify;
Wei Xu Feb. 20, 2019, 2:17 a.m. UTC | #4
On Tue, Feb 19, 2019 at 09:06:42PM +0800, Jason Wang wrote:
> 
> On 2019/2/19 下午6:40, Wei Xu wrote:
> >On Tue, Feb 19, 2019 at 03:19:58PM +0800, Jason Wang wrote:
> >>On 2019/2/14 下午12:26, wexu@redhat.com wrote:
> >>>From: Wei Xu <wexu@redhat.com>
> >>>
> >>>Difference between 'avail_wrap_counter' and 'last_avail_wrap_counter':
> >>>For Tx(guest transmitting), they are the same after each pop of a desc.
> >>>
> >>>For Rx(guest receiving), they are also the same when there are enough
> >>>descriptors to carry the payload for a packet(e.g. usually 16 descs are
> >>>needed for a 64k packet in typical iperf tcp connection with tso enabled),
> >>>however, when the ring is running out of descriptors while there are
> >>>still a few free ones, e.g. 6 descriptors are available which is not
> >>>enough to carry an entire packet which needs 16 descriptors, in this
> >>>case the 'avail_wrap_counter' should be set as the first one pending
> >>>being handled by guest driver in order to get a notification, and the
> >>>'last_avail_wrap_counter' should stay unchanged to the head of available
> >>>descriptors, like below:
> >>>
> >>>Mark meaning:
> >>>     | | -- available
> >>>     |*| -- used
> >>>
> >>>A Snapshot of the queue:
> >>>                                       last_avail_idx = 253
> >>>                                       last_avail_wrap_counter = 1
> >>>                                              |
> >>>     +---------------------------------------------+
> >>>  0  | | | |*|*|*|*|*|*|*|*|*|*|*|*|*|*|*|*|*| | | | 255
> >>>     +---------------------------------------------+
> >>>            |
> >>>   shadow_avail_idx = 3
> >>>   avail_wrap_counter = 0
> >>
> >>Well this might not be the good place to describe the difference between
> >>shadow_avail_idx and last_avail_idx. And the comments above their definition
> >>looks good enough?
> >Sorry, I do not get it, can you elaborate more?
> 
> 
> I meant the comment is good enough to explain what it did. For packed ring,
> the only difference is the wrap counter. You can add e.g "The wrap counter
> of next head to pop" to above last_avail_wrap_counter. And so does
> shadow_avail_wrap_counter.

OK, I will remove the example part.

> 
> 
> >
> >This is one of the buggy parts of packed ring, it is good to make it clear here.
> >
> >>     /* Next head to pop */
> >>     uint16_t last_avail_idx;
> >>
> >>     /* Last avail_idx read from VQ. */
> >>     uint16_t shadow_avail_idx;
> >>
> >What is the meaning of these comment?
> 
> 
> It's pretty easy to be understood, isn't it?

:)

> 
> 
> >  Do you mean I should replace put them
> >to the comments also? thanks.
> >
> >>Instead, how or why need event suppress is not mentioned ...
> >Yes, but presumably this knowledge has been acquired from reading through the
> >spec, so I skipped this part.
> 
> 
> You need at least add reference to the spec. Commit log is pretty important
> for starters to understand what has been done in the patch before reading
> the code. I'm pretty sure they will get confused for reading what you wrote
> here.
> 

OK.

> 
> Thanks
> 
> 
> >
> >Wei
> >
> >>
> >>
> >>>Signed-off-by: Wei Xu <wexu@redhat.com>
> >>>---
> >>>  hw/virtio/virtio.c | 137 +++++++++++++++++++++++++++++++++++++++++++++++++----
> >>>  1 file changed, 128 insertions(+), 9 deletions(-)
> >>>
> >>>diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> >>>index 7e276b4..8cfc7b6 100644
> >>>--- a/hw/virtio/virtio.c
> >>>+++ b/hw/virtio/virtio.c
> >>>@@ -234,6 +234,34 @@ static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc,
> >>>      virtio_tswap16s(vdev, &desc->next);
> >>>  }
> >>>+static void vring_packed_event_read(VirtIODevice *vdev,
> >>>+                            MemoryRegionCache *cache, VRingPackedDescEvent *e)
> >>>+{
> >>>+    address_space_read_cached(cache, 0, e, sizeof(*e));
> >>>+    virtio_tswap16s(vdev, &e->off_wrap);
> >>>+    virtio_tswap16s(vdev, &e->flags);
> >>>+}
> >>>+
> >>>+static void vring_packed_off_wrap_write(VirtIODevice *vdev,
> >>>+                            MemoryRegionCache *cache, uint16_t off_wrap)
> >>>+{
> >>>+    virtio_tswap16s(vdev, &off_wrap);
> >>>+    address_space_write_cached(cache, offsetof(VRingPackedDescEvent, off_wrap),
> >>>+                            &off_wrap, sizeof(off_wrap));
> >>>+    address_space_cache_invalidate(cache,
> >>>+                offsetof(VRingPackedDescEvent, off_wrap), sizeof(off_wrap));
> >>>+}
> >>>+
> >>>+static void vring_packed_flags_write(VirtIODevice *vdev,
> >>>+                            MemoryRegionCache *cache, uint16_t flags)
> >>>+{
> >>>+    virtio_tswap16s(vdev, &flags);
> >>>+    address_space_write_cached(cache, offsetof(VRingPackedDescEvent, flags),
> >>>+                            &flags, sizeof(flags));
> >>>+    address_space_cache_invalidate(cache,
> >>>+                        offsetof(VRingPackedDescEvent, flags), sizeof(flags));
> >>>+}
> >>>+
> >>>  static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
> >>>  {
> >>>      VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
> >>>@@ -340,14 +368,8 @@ static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
> >>>      address_space_cache_invalidate(&caches->used, pa, sizeof(val));
> >>>  }
> >>>-void virtio_queue_set_notification(VirtQueue *vq, int enable)
> >>>+static void virtio_queue_set_notification_split(VirtQueue *vq, int enable)
> >>>  {
> >>>-    vq->notification = enable;
> >>>-
> >>>-    if (!vq->vring.desc) {
> >>>-        return;
> >>>-    }
> >>>-
> >>>      rcu_read_lock();
> >>>      if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
> >>>          vring_set_avail_event(vq, vring_avail_idx(vq));
> >>>@@ -363,6 +385,57 @@ void virtio_queue_set_notification(VirtQueue *vq, int enable)
> >>>      rcu_read_unlock();
> >>>  }
> >>>+static void virtio_queue_set_notification_packed(VirtQueue *vq, int enable)
> >>>+{
> >>>+    VRingPackedDescEvent e;
> >>>+    VRingMemoryRegionCaches *caches;
> >>>+
> >>>+    rcu_read_lock();
> >>>+    caches  = vring_get_region_caches(vq);
> >>>+    vring_packed_event_read(vq->vdev, &caches->used, &e);
> >>>+
> >>>+    if (!enable) {
> >>>+        if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
> >>>+            /* no need to write device area since this is outdated. */
> >>
> >>We should advertise off and wrap in this case as well, otherwise we may get
> >>notifications earlier than expected.
> >Is it necessary? Supposing offset & wrap_counter are always set before update
> >notification flags, it is reliable to skip this for disabling, isn't it?
> 
> 
> You should either:
> 
> - advertise the EVENT_FLAG_DISABLE
> 
> or
> 
> - advertise the new off wrap otherwise you may get notified early.
> 
> Both you none of above did by your code.

Right.

> 
> 
> >
> >While the logic here is unclear, I did a concise one like below
> >which doesn't use the 'vq->notification' as in your comment for v2,
> >I think this should work for packed ring as well, anything I missed?
> >
> >     if (!enable) {
> >         e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
> >     } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
> >         uint16_t off_wrap;
> >
> >         off_wrap = vq->shadow_avail_idx | vq->shadow_avail_wrap_counter << 15;
> >         vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
> >
> >         /* Make sure off_wrap is wrote before flags */
> >         smp_wmb();
> >         e.flags = VRING_PACKED_EVENT_FLAG_DESC;
> >     } else {
> >         e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
> >     }
> >
> >     vring_packed_flags_write(vq->vdev, &caches->used, e.flags);
> 
> 
> Looks good to me.

Thanks.

> 
> Thanks
> 
> 
> >
> >>
> >>>+            goto out;
> >>>+        }
> >>>+
> >>>+        e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
> >>>+        goto update;
> >>>+    }
> >>>+
> >>>+    e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
> >>
> >>Here and the above goto could be eliminated by:
> >>
> >>if (even idx) {
> >>
> >>...
> >>
> >>} else if (enable) {
> >>
> >>...
> >>
> >>} else {
> >>
> >>...
> >>
> >>}
> >>
> >thanks, I have removed it in above snippet.
> >
> >Wei
> >>Thanks
> >>
> >>
> >>>+    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
> >>>+        uint16_t off_wrap = vq->shadow_avail_idx | vq->avail_wrap_counter << 15;
> >>>+
> >>>+        vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
> >>>+        /* Make sure off_wrap is wrote before flags */
> >>>+        smp_wmb();
> >>>+
> >>>+        e.flags = VRING_PACKED_EVENT_FLAG_DESC;
> >>>+    }
> >>>+
> >>>+update:
> >>>+    vring_packed_flags_write(vq->vdev, &caches->used, e.flags);
> >>>+out:
> >>>+    rcu_read_unlock();
> >>>+}
> >>>+
> >>>+void virtio_queue_set_notification(VirtQueue *vq, int enable)
> >>>+{
> >>>+    vq->notification = enable;
> >>>+
> >>>+    if (!vq->vring.desc) {
> >>>+        return;
> >>>+    }
> >>>+
> >>>+    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
> >>>+        virtio_queue_set_notification_packed(vq, enable);
> >>>+    } else {
> >>>+        virtio_queue_set_notification_split(vq, enable);
> >>>+    }
> >>>+}
> >>>+
> >>>  int virtio_queue_ready(VirtQueue *vq)
> >>>  {
> >>>      return vq->vring.avail != 0;
> >>>@@ -2117,8 +2190,7 @@ static void virtio_set_isr(VirtIODevice *vdev, int value)
> >>>      }
> >>>  }
> >>>-/* Called within rcu_read_lock().  */
> >>>-static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
> >>>+static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
> >>>  {
> >>>      uint16_t old, new;
> >>>      bool v;
> >>>@@ -2141,6 +2213,53 @@ static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
> >>>      return !v || vring_need_event(vring_get_used_event(vq), new, old);
> >>>  }
> >>>+static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
> >>>+                            uint16_t off_wrap, uint16_t new, uint16_t old)
> >>>+{
> >>>+    int off = off_wrap & ~(1 << 15);
> >>>+
> >>>+    if (wrap != off_wrap >> 15) {
> >>>+        off -= vq->vring.num;
> >>>+    }
> >>>+
> >>>+    return vring_need_event(off, new, old);
> >>>+}
> >>>+
> >>>+static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
> >>>+{
> >>>+    VRingPackedDescEvent e;
> >>>+    uint16_t old, new;
> >>>+    bool v;
> >>>+    VRingMemoryRegionCaches *caches;
> >>>+
> >>>+    caches  = vring_get_region_caches(vq);
> >>>+    vring_packed_event_read(vdev, &caches->avail, &e);
> >>>+
> >>>+    old = vq->signalled_used;
> >>>+    new = vq->signalled_used = vq->used_idx;
> >>>+    v = vq->signalled_used_valid;
> >>>+    vq->signalled_used_valid = true;
> >>>+
> >>>+    if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) {
> >>>+        return false;
> >>>+    } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) {
> >>>+        return true;
> >>>+    }
> >>>+
> >>>+    return !v || vring_packed_need_event(vq,
> >>>+        vq->used_wrap_counter, e.off_wrap, new, old);
> >>>+}
> >>>+
> >>>+/* Called within rcu_read_lock().  */
> >>>+static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
> >>>+{
> >>>+    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
> >>>+        return virtio_packed_should_notify(vdev, vq);
> >>>+    } else {
> >>>+        return virtio_split_should_notify(vdev, vq);
> >>>+    }
> >>>+}
> >>>+
> >>>  void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
> >>>  {
> >>>      bool should_notify;
diff mbox series

Patch

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 7e276b4..8cfc7b6 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -234,6 +234,34 @@  static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc,
     virtio_tswap16s(vdev, &desc->next);
 }
 
+static void vring_packed_event_read(VirtIODevice *vdev,
+                            MemoryRegionCache *cache, VRingPackedDescEvent *e)
+{
+    address_space_read_cached(cache, 0, e, sizeof(*e));
+    virtio_tswap16s(vdev, &e->off_wrap);
+    virtio_tswap16s(vdev, &e->flags);
+}
+
+static void vring_packed_off_wrap_write(VirtIODevice *vdev,
+                            MemoryRegionCache *cache, uint16_t off_wrap)
+{
+    virtio_tswap16s(vdev, &off_wrap);
+    address_space_write_cached(cache, offsetof(VRingPackedDescEvent, off_wrap),
+                            &off_wrap, sizeof(off_wrap));
+    address_space_cache_invalidate(cache,
+                offsetof(VRingPackedDescEvent, off_wrap), sizeof(off_wrap));
+}
+
+static void vring_packed_flags_write(VirtIODevice *vdev,
+                            MemoryRegionCache *cache, uint16_t flags)
+{
+    virtio_tswap16s(vdev, &flags);
+    address_space_write_cached(cache, offsetof(VRingPackedDescEvent, flags),
+                            &flags, sizeof(flags));
+    address_space_cache_invalidate(cache,
+                        offsetof(VRingPackedDescEvent, flags), sizeof(flags));
+}
+
 static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
 {
     VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
@@ -340,14 +368,8 @@  static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
     address_space_cache_invalidate(&caches->used, pa, sizeof(val));
 }
 
-void virtio_queue_set_notification(VirtQueue *vq, int enable)
+static void virtio_queue_set_notification_split(VirtQueue *vq, int enable)
 {
-    vq->notification = enable;
-
-    if (!vq->vring.desc) {
-        return;
-    }
-
     rcu_read_lock();
     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
         vring_set_avail_event(vq, vring_avail_idx(vq));
@@ -363,6 +385,57 @@  void virtio_queue_set_notification(VirtQueue *vq, int enable)
     rcu_read_unlock();
 }
 
+static void virtio_queue_set_notification_packed(VirtQueue *vq, int enable)
+{
+    VRingPackedDescEvent e;
+    VRingMemoryRegionCaches *caches;
+
+    rcu_read_lock();
+    caches  = vring_get_region_caches(vq);
+    vring_packed_event_read(vq->vdev, &caches->used, &e);
+
+    if (!enable) {
+        if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
+            /* no need to write device area since this is outdated. */
+            goto out;
+        }
+
+        e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
+        goto update;
+    }
+
+    e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
+    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
+        uint16_t off_wrap = vq->shadow_avail_idx | vq->avail_wrap_counter << 15;
+
+        vring_packed_off_wrap_write(vq->vdev, &caches->used, off_wrap);
+        /* Make sure off_wrap is wrote before flags */
+        smp_wmb();
+
+        e.flags = VRING_PACKED_EVENT_FLAG_DESC;
+    }
+
+update:
+    vring_packed_flags_write(vq->vdev, &caches->used, e.flags);
+out:
+    rcu_read_unlock();
+}
+
+void virtio_queue_set_notification(VirtQueue *vq, int enable)
+{
+    vq->notification = enable;
+
+    if (!vq->vring.desc) {
+        return;
+    }
+
+    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+        virtio_queue_set_notification_packed(vq, enable);
+    } else {
+        virtio_queue_set_notification_split(vq, enable);
+    }
+}
+
 int virtio_queue_ready(VirtQueue *vq)
 {
     return vq->vring.avail != 0;
@@ -2117,8 +2190,7 @@  static void virtio_set_isr(VirtIODevice *vdev, int value)
     }
 }
 
-/* Called within rcu_read_lock().  */
-static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
+static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
 {
     uint16_t old, new;
     bool v;
@@ -2141,6 +2213,53 @@  static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
     return !v || vring_need_event(vring_get_used_event(vq), new, old);
 }
 
+static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
+                            uint16_t off_wrap, uint16_t new, uint16_t old)
+{
+    int off = off_wrap & ~(1 << 15);
+
+    if (wrap != off_wrap >> 15) {
+        off -= vq->vring.num;
+    }
+
+    return vring_need_event(off, new, old);
+}
+
+static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
+{
+    VRingPackedDescEvent e;
+    uint16_t old, new;
+    bool v;
+    VRingMemoryRegionCaches *caches;
+
+    caches  = vring_get_region_caches(vq);
+    vring_packed_event_read(vdev, &caches->avail, &e);
+
+    old = vq->signalled_used;
+    new = vq->signalled_used = vq->used_idx;
+    v = vq->signalled_used_valid;
+    vq->signalled_used_valid = true;
+
+    if (e.flags == VRING_PACKED_EVENT_FLAG_DISABLE) {
+        return false;
+    } else if (e.flags == VRING_PACKED_EVENT_FLAG_ENABLE) {
+        return true;
+    }
+
+    return !v || vring_packed_need_event(vq,
+        vq->used_wrap_counter, e.off_wrap, new, old);
+}
+
+/* Called within rcu_read_lock().  */
+static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
+{
+    if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+        return virtio_packed_should_notify(vdev, vq);
+    } else {
+        return virtio_split_should_notify(vdev, vq);
+    }
+}
+
 void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
 {
     bool should_notify;