Message ID | 20190723075718.6275-6-jasowang@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Fixes for meta data acceleration | expand |
On Tue, Jul 23, 2019 at 03:57:17AM -0400, Jason Wang wrote: > We don't mark dirty pages if the map was teared down outside MMU > notifier. This will lead untracked dirty pages. Fixing by marking > dirty pages during map uninit. > > Reported-by: Michael S. Tsirkin <mst@redhat.com> > Fixes: 7f466032dc9e ("vhost: access vq metadata through kernel virtual address") > Signed-off-by: Jason Wang <jasowang@redhat.com> > --- > drivers/vhost/vhost.c | 22 ++++++++++++++++------ > 1 file changed, 16 insertions(+), 6 deletions(-) > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > index 89c9f08b5146..5b8821d00fe4 100644 > --- a/drivers/vhost/vhost.c > +++ b/drivers/vhost/vhost.c > @@ -306,6 +306,18 @@ static void vhost_map_unprefetch(struct vhost_map *map) > kfree(map); > } > > +static void vhost_set_map_dirty(struct vhost_virtqueue *vq, > + struct vhost_map *map, int index) > +{ > + struct vhost_uaddr *uaddr = &vq->uaddrs[index]; > + int i; > + > + if (uaddr->write) { > + for (i = 0; i < map->npages; i++) > + set_page_dirty(map->pages[i]); > + } > +} > + > static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq) > { > struct vhost_map *map[VHOST_NUM_ADDRS]; > @@ -315,8 +327,10 @@ static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq) > for (i = 0; i < VHOST_NUM_ADDRS; i++) { > map[i] = rcu_dereference_protected(vq->maps[i], > lockdep_is_held(&vq->mmu_lock)); > - if (map[i]) > + if (map[i]) { > + vhost_set_map_dirty(vq, map[i], i); > rcu_assign_pointer(vq->maps[i], NULL); > + } > } > spin_unlock(&vq->mmu_lock); > > @@ -354,7 +368,6 @@ static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq, > { > struct vhost_uaddr *uaddr = &vq->uaddrs[index]; > struct vhost_map *map; > - int i; > > if (!vhost_map_range_overlap(uaddr, start, end)) > return; > @@ -365,10 +378,7 @@ static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq, > map = rcu_dereference_protected(vq->maps[index], > lockdep_is_held(&vq->mmu_lock)); > if (map) { > - if (uaddr->write) { > - for (i = 0; i < map->npages; i++) > - set_page_dirty(map->pages[i]); > - } > + vhost_set_map_dirty(vq, map, index); > rcu_assign_pointer(vq->maps[index], NULL); > } > spin_unlock(&vq->mmu_lock); OK and the reason it's safe is because the invalidate counter got incremented so we know page will not get mapped again. But we *do* need to wait for page not to be mapped. And if that means waiting for VQ processing to finish, then I worry that is a very log time. > -- > 2.18.1
On 2019/7/23 下午5:17, Michael S. Tsirkin wrote: > On Tue, Jul 23, 2019 at 03:57:17AM -0400, Jason Wang wrote: >> We don't mark dirty pages if the map was teared down outside MMU >> notifier. This will lead untracked dirty pages. Fixing by marking >> dirty pages during map uninit. >> >> Reported-by: Michael S. Tsirkin<mst@redhat.com> >> Fixes: 7f466032dc9e ("vhost: access vq metadata through kernel virtual address") >> Signed-off-by: Jason Wang<jasowang@redhat.com> >> --- >> drivers/vhost/vhost.c | 22 ++++++++++++++++------ >> 1 file changed, 16 insertions(+), 6 deletions(-) >> >> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c >> index 89c9f08b5146..5b8821d00fe4 100644 >> --- a/drivers/vhost/vhost.c >> +++ b/drivers/vhost/vhost.c >> @@ -306,6 +306,18 @@ static void vhost_map_unprefetch(struct vhost_map *map) >> kfree(map); >> } >> >> +static void vhost_set_map_dirty(struct vhost_virtqueue *vq, >> + struct vhost_map *map, int index) >> +{ >> + struct vhost_uaddr *uaddr = &vq->uaddrs[index]; >> + int i; >> + >> + if (uaddr->write) { >> + for (i = 0; i < map->npages; i++) >> + set_page_dirty(map->pages[i]); >> + } >> +} >> + >> static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq) >> { >> struct vhost_map *map[VHOST_NUM_ADDRS]; >> @@ -315,8 +327,10 @@ static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq) >> for (i = 0; i < VHOST_NUM_ADDRS; i++) { >> map[i] = rcu_dereference_protected(vq->maps[i], >> lockdep_is_held(&vq->mmu_lock)); >> - if (map[i]) >> + if (map[i]) { >> + vhost_set_map_dirty(vq, map[i], i); >> rcu_assign_pointer(vq->maps[i], NULL); >> + } >> } >> spin_unlock(&vq->mmu_lock); >> >> @@ -354,7 +368,6 @@ static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq, >> { >> struct vhost_uaddr *uaddr = &vq->uaddrs[index]; >> struct vhost_map *map; >> - int i; >> >> if (!vhost_map_range_overlap(uaddr, start, end)) >> return; >> @@ -365,10 +378,7 @@ static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq, >> map = rcu_dereference_protected(vq->maps[index], >> lockdep_is_held(&vq->mmu_lock)); >> if (map) { >> - if (uaddr->write) { >> - for (i = 0; i < map->npages; i++) >> - set_page_dirty(map->pages[i]); >> - } >> + vhost_set_map_dirty(vq, map, index); >> rcu_assign_pointer(vq->maps[index], NULL); >> } >> spin_unlock(&vq->mmu_lock); > OK and the reason it's safe is because the invalidate counter > got incremented so we know page will not get mapped again. > > But we*do* need to wait for page not to be mapped. > And if that means waiting for VQ processing to finish, > then I worry that is a very log time. > I'm not sure I get you here. If we don't have such map, we will fall back to normal uaccess helper. And in the memory accessor, the rcu critical section is pretty small. Thanks
On Tue, Jul 23, 2019 at 09:19:33PM +0800, Jason Wang wrote: > > On 2019/7/23 下午5:17, Michael S. Tsirkin wrote: > > On Tue, Jul 23, 2019 at 03:57:17AM -0400, Jason Wang wrote: > > > We don't mark dirty pages if the map was teared down outside MMU > > > notifier. This will lead untracked dirty pages. Fixing by marking > > > dirty pages during map uninit. > > > > > > Reported-by: Michael S. Tsirkin<mst@redhat.com> > > > Fixes: 7f466032dc9e ("vhost: access vq metadata through kernel virtual address") > > > Signed-off-by: Jason Wang<jasowang@redhat.com> > > > --- > > > drivers/vhost/vhost.c | 22 ++++++++++++++++------ > > > 1 file changed, 16 insertions(+), 6 deletions(-) > > > > > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > > > index 89c9f08b5146..5b8821d00fe4 100644 > > > --- a/drivers/vhost/vhost.c > > > +++ b/drivers/vhost/vhost.c > > > @@ -306,6 +306,18 @@ static void vhost_map_unprefetch(struct vhost_map *map) > > > kfree(map); > > > } > > > +static void vhost_set_map_dirty(struct vhost_virtqueue *vq, > > > + struct vhost_map *map, int index) > > > +{ > > > + struct vhost_uaddr *uaddr = &vq->uaddrs[index]; > > > + int i; > > > + > > > + if (uaddr->write) { > > > + for (i = 0; i < map->npages; i++) > > > + set_page_dirty(map->pages[i]); > > > + } > > > +} > > > + > > > static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq) > > > { > > > struct vhost_map *map[VHOST_NUM_ADDRS]; > > > @@ -315,8 +327,10 @@ static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq) > > > for (i = 0; i < VHOST_NUM_ADDRS; i++) { > > > map[i] = rcu_dereference_protected(vq->maps[i], > > > lockdep_is_held(&vq->mmu_lock)); > > > - if (map[i]) > > > + if (map[i]) { > > > + vhost_set_map_dirty(vq, map[i], i); > > > rcu_assign_pointer(vq->maps[i], NULL); > > > + } > > > } > > > spin_unlock(&vq->mmu_lock); > > > @@ -354,7 +368,6 @@ static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq, > > > { > > > struct vhost_uaddr *uaddr = &vq->uaddrs[index]; > > > struct vhost_map *map; > > > - int i; > > > if (!vhost_map_range_overlap(uaddr, start, end)) > > > return; > > > @@ -365,10 +378,7 @@ static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq, > > > map = rcu_dereference_protected(vq->maps[index], > > > lockdep_is_held(&vq->mmu_lock)); > > > if (map) { > > > - if (uaddr->write) { > > > - for (i = 0; i < map->npages; i++) > > > - set_page_dirty(map->pages[i]); > > > - } > > > + vhost_set_map_dirty(vq, map, index); > > > rcu_assign_pointer(vq->maps[index], NULL); > > > } > > > spin_unlock(&vq->mmu_lock); > > OK and the reason it's safe is because the invalidate counter > > got incremented so we know page will not get mapped again. > > > > But we*do* need to wait for page not to be mapped. > > And if that means waiting for VQ processing to finish, > > then I worry that is a very log time. > > > > I'm not sure I get you here. If we don't have such map, we will fall back to > normal uaccess helper. And in the memory accessor, the rcu critical section > is pretty small. > > Thanks > OK. So the trick is that page_mkclean invokes mmu notifiers.
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 89c9f08b5146..5b8821d00fe4 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -306,6 +306,18 @@ static void vhost_map_unprefetch(struct vhost_map *map) kfree(map); } +static void vhost_set_map_dirty(struct vhost_virtqueue *vq, + struct vhost_map *map, int index) +{ + struct vhost_uaddr *uaddr = &vq->uaddrs[index]; + int i; + + if (uaddr->write) { + for (i = 0; i < map->npages; i++) + set_page_dirty(map->pages[i]); + } +} + static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq) { struct vhost_map *map[VHOST_NUM_ADDRS]; @@ -315,8 +327,10 @@ static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq) for (i = 0; i < VHOST_NUM_ADDRS; i++) { map[i] = rcu_dereference_protected(vq->maps[i], lockdep_is_held(&vq->mmu_lock)); - if (map[i]) + if (map[i]) { + vhost_set_map_dirty(vq, map[i], i); rcu_assign_pointer(vq->maps[i], NULL); + } } spin_unlock(&vq->mmu_lock); @@ -354,7 +368,6 @@ static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq, { struct vhost_uaddr *uaddr = &vq->uaddrs[index]; struct vhost_map *map; - int i; if (!vhost_map_range_overlap(uaddr, start, end)) return; @@ -365,10 +378,7 @@ static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq, map = rcu_dereference_protected(vq->maps[index], lockdep_is_held(&vq->mmu_lock)); if (map) { - if (uaddr->write) { - for (i = 0; i < map->npages; i++) - set_page_dirty(map->pages[i]); - } + vhost_set_map_dirty(vq, map, index); rcu_assign_pointer(vq->maps[index], NULL); } spin_unlock(&vq->mmu_lock);
We don't mark dirty pages if the map was teared down outside MMU notifier. This will lead untracked dirty pages. Fixing by marking dirty pages during map uninit. Reported-by: Michael S. Tsirkin <mst@redhat.com> Fixes: 7f466032dc9e ("vhost: access vq metadata through kernel virtual address") Signed-off-by: Jason Wang <jasowang@redhat.com> --- drivers/vhost/vhost.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-)