Message ID | 20221214163025.103075-7-sgarzare@redhat.com (mailing list archive) |
---|---|
State | RFC |
Headers | show |
Series | vdpa_sim: add support for user VA | expand |
Context | Check | Description |
---|---|---|
netdev/tree_selection | success | Not a local patch |
On Thu, Dec 15, 2022 at 12:31 AM Stefano Garzarella <sgarzare@redhat.com> wrote: > > The new "use_va" module parameter (default: false) is used in > vdpa_alloc_device() to inform the vDPA framework that the device > supports VA. > > vringh is initialized to use VA only when "use_va" is true and the > user's mm has been bound. So, only when the bus supports user VA > (e.g. vhost-vdpa). > > vdpasim_mm_work_fn work is used to attach the kthread to the user > address space when the .bind_mm callback is invoked, and to detach > it when the device is reset. One thing in my mind is that the current datapath is running under spinlock which prevents us from using iov_iter (which may have page faults). We need to get rid of the spinlock first. > > Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> > --- > drivers/vdpa/vdpa_sim/vdpa_sim.h | 1 + > drivers/vdpa/vdpa_sim/vdpa_sim.c | 104 ++++++++++++++++++++++++++++++- > 2 files changed, 103 insertions(+), 2 deletions(-) > > diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h > index 07ef53ea375e..1b010e5c0445 100644 > --- a/drivers/vdpa/vdpa_sim/vdpa_sim.h > +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h > @@ -55,6 +55,7 @@ struct vdpasim { > struct vdpasim_virtqueue *vqs; > struct kthread_worker *worker; > struct kthread_work work; > + struct mm_struct *mm_bound; > struct vdpasim_dev_attr dev_attr; > /* spinlock to synchronize virtqueue state */ > spinlock_t lock; > diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c > index 36a1d2e0a6ba..6e07cedef30c 100644 > --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c > +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c > @@ -36,10 +36,90 @@ module_param(max_iotlb_entries, int, 0444); > MODULE_PARM_DESC(max_iotlb_entries, > "Maximum number of iotlb entries for each address space. 0 means unlimited. (default: 2048)"); > > +static bool use_va; > +module_param(use_va, bool, 0444); > +MODULE_PARM_DESC(use_va, "Enable the device's ability to use VA"); > + > #define VDPASIM_QUEUE_ALIGN PAGE_SIZE > #define VDPASIM_QUEUE_MAX 256 > #define VDPASIM_VENDOR_ID 0 > > +struct vdpasim_mm_work { > + struct kthread_work work; > + struct task_struct *owner; > + struct mm_struct *mm; > + bool bind; > + int ret; > +}; > + > +static void vdpasim_mm_work_fn(struct kthread_work *work) > +{ > + struct vdpasim_mm_work *mm_work = > + container_of(work, struct vdpasim_mm_work, work); > + > + mm_work->ret = 0; > + > + if (mm_work->bind) { > + kthread_use_mm(mm_work->mm); > +#if 0 > + if (mm_work->owner) > + mm_work->ret = cgroup_attach_task_all(mm_work->owner, > + current); > +#endif > + } else { > +#if 0 > + //TODO: check it > + cgroup_release(current); > +#endif > + kthread_unuse_mm(mm_work->mm); > + } > +} > + > +static void vdpasim_worker_queue_mm(struct vdpasim *vdpasim, > + struct vdpasim_mm_work *mm_work) > +{ > + struct kthread_work *work = &mm_work->work; > + > + kthread_init_work(work, vdpasim_mm_work_fn); > + kthread_queue_work(vdpasim->worker, work); > + > + spin_unlock(&vdpasim->lock); > + kthread_flush_work(work); > + spin_lock(&vdpasim->lock); > +} > + > +static int vdpasim_worker_bind_mm(struct vdpasim *vdpasim, > + struct mm_struct *new_mm, > + struct task_struct *owner) > +{ > + struct vdpasim_mm_work mm_work; > + > + mm_work.owner = owner; > + mm_work.mm = new_mm; > + mm_work.bind = true; > + > + vdpasim_worker_queue_mm(vdpasim, &mm_work); > + Should we wait for the work to be finished? > + if (!mm_work.ret) > + vdpasim->mm_bound = new_mm; > + > + return mm_work.ret; > +} > + > +static void vdpasim_worker_unbind_mm(struct vdpasim *vdpasim) > +{ > + struct vdpasim_mm_work mm_work; > + > + if (!vdpasim->mm_bound) > + return; > + > + mm_work.mm = vdpasim->mm_bound; > + mm_work.bind = false; > + > + vdpasim_worker_queue_mm(vdpasim, &mm_work); > + > + vdpasim->mm_bound = NULL; > +} > static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa) > { > return container_of(vdpa, struct vdpasim, vdpa); > @@ -66,8 +146,10 @@ static void vdpasim_vq_notify(struct vringh *vring) > static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx) > { > struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; > + bool va_enabled = use_va && vdpasim->mm_bound; > > - vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, false, false, > + vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, false, > + va_enabled, > (struct vring_desc *)(uintptr_t)vq->desc_addr, > (struct vring_avail *) > (uintptr_t)vq->driver_addr, > @@ -96,6 +178,9 @@ static void vdpasim_do_reset(struct vdpasim *vdpasim) > { > int i; > > + //TODO: should we cancel the works? > + vdpasim_worker_unbind_mm(vdpasim); We probably don't need this since it's the virtio level reset so we need to keep the mm bound in this case. Otherwise we may break the guest. It should be the responsibility of the driver to call config_ops->unbind if it needs to do that. Thanks > + > spin_lock(&vdpasim->iommu_lock); > > for (i = 0; i < vdpasim->dev_attr.nvqs; i++) { > @@ -275,7 +360,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr, > > vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, > dev_attr->ngroups, dev_attr->nas, > - dev_attr->name, false); > + dev_attr->name, use_va); > if (IS_ERR(vdpasim)) { > ret = PTR_ERR(vdpasim); > goto err_alloc; > @@ -657,6 +742,19 @@ static int vdpasim_set_map(struct vdpa_device *vdpa, unsigned int asid, > return ret; > } > > +static int vdpasim_bind_mm(struct vdpa_device *vdpa, struct mm_struct *mm, > + struct task_struct *owner) > +{ > + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); > + int ret; > + > + spin_lock(&vdpasim->lock); > + ret = vdpasim_worker_bind_mm(vdpasim, mm, owner); > + spin_unlock(&vdpasim->lock); > + > + return ret; > +} > + > static int vdpasim_dma_map(struct vdpa_device *vdpa, unsigned int asid, > u64 iova, u64 size, > u64 pa, u32 perm, void *opaque) > @@ -744,6 +842,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = { > .set_group_asid = vdpasim_set_group_asid, > .dma_map = vdpasim_dma_map, > .dma_unmap = vdpasim_dma_unmap, > + .bind_mm = vdpasim_bind_mm, > .free = vdpasim_free, > }; > > @@ -776,6 +875,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = { > .get_iova_range = vdpasim_get_iova_range, > .set_group_asid = vdpasim_set_group_asid, > .set_map = vdpasim_set_map, > + .bind_mm = vdpasim_bind_mm, > .free = vdpasim_free, > }; > > -- > 2.38.1 >
On Fri, Dec 16, 2022 at 03:26:46PM +0800, Jason Wang wrote: >On Thu, Dec 15, 2022 at 12:31 AM Stefano Garzarella <sgarzare@redhat.com> wrote: >> >> The new "use_va" module parameter (default: false) is used in >> vdpa_alloc_device() to inform the vDPA framework that the device >> supports VA. >> >> vringh is initialized to use VA only when "use_va" is true and the >> user's mm has been bound. So, only when the bus supports user VA >> (e.g. vhost-vdpa). >> >> vdpasim_mm_work_fn work is used to attach the kthread to the user >> address space when the .bind_mm callback is invoked, and to detach >> it when the device is reset. > >One thing in my mind is that the current datapath is running under >spinlock which prevents us from using iov_iter (which may have page >faults). > >We need to get rid of the spinlock first. Right! I already have a patch for that since I used for the vdpa-blk software device in-kernel PoC where I had the same issue. I'll add it to the series! > >> >> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> >> --- >> drivers/vdpa/vdpa_sim/vdpa_sim.h | 1 + >> drivers/vdpa/vdpa_sim/vdpa_sim.c | 104 ++++++++++++++++++++++++++++++- >> 2 files changed, 103 insertions(+), 2 deletions(-) >> >> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h >> index 07ef53ea375e..1b010e5c0445 100644 >> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.h >> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h >> @@ -55,6 +55,7 @@ struct vdpasim { >> struct vdpasim_virtqueue *vqs; >> struct kthread_worker *worker; >> struct kthread_work work; >> + struct mm_struct *mm_bound; >> struct vdpasim_dev_attr dev_attr; >> /* spinlock to synchronize virtqueue state */ >> spinlock_t lock; >> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c >> index 36a1d2e0a6ba..6e07cedef30c 100644 >> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c >> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c >> @@ -36,10 +36,90 @@ module_param(max_iotlb_entries, int, 0444); >> MODULE_PARM_DESC(max_iotlb_entries, >> "Maximum number of iotlb entries for each address space. 0 means unlimited. (default: 2048)"); >> >> +static bool use_va; >> +module_param(use_va, bool, 0444); >> +MODULE_PARM_DESC(use_va, "Enable the device's ability to use VA"); >> + >> #define VDPASIM_QUEUE_ALIGN PAGE_SIZE >> #define VDPASIM_QUEUE_MAX 256 >> #define VDPASIM_VENDOR_ID 0 >> >> +struct vdpasim_mm_work { >> + struct kthread_work work; >> + struct task_struct *owner; >> + struct mm_struct *mm; >> + bool bind; >> + int ret; >> +}; >> + >> +static void vdpasim_mm_work_fn(struct kthread_work *work) >> +{ >> + struct vdpasim_mm_work *mm_work = >> + container_of(work, struct vdpasim_mm_work, work); >> + >> + mm_work->ret = 0; >> + >> + if (mm_work->bind) { >> + kthread_use_mm(mm_work->mm); >> +#if 0 >> + if (mm_work->owner) >> + mm_work->ret = cgroup_attach_task_all(mm_work->owner, >> + current); >> +#endif >> + } else { >> +#if 0 >> + //TODO: check it >> + cgroup_release(current); >> +#endif >> + kthread_unuse_mm(mm_work->mm); >> + } >> +} >> + >> +static void vdpasim_worker_queue_mm(struct vdpasim *vdpasim, >> + struct vdpasim_mm_work *mm_work) >> +{ >> + struct kthread_work *work = &mm_work->work; >> + >> + kthread_init_work(work, vdpasim_mm_work_fn); >> + kthread_queue_work(vdpasim->worker, work); >> + >> + spin_unlock(&vdpasim->lock); >> + kthread_flush_work(work); >> + spin_lock(&vdpasim->lock); >> +} >> + >> +static int vdpasim_worker_bind_mm(struct vdpasim *vdpasim, >> + struct mm_struct *new_mm, >> + struct task_struct *owner) >> +{ >> + struct vdpasim_mm_work mm_work; >> + >> + mm_work.owner = owner; >> + mm_work.mm = new_mm; >> + mm_work.bind = true; >> + >> + vdpasim_worker_queue_mm(vdpasim, &mm_work); >> + > >Should we wait for the work to be finished? Yep, I'm waiting inside vdpasim_worker_queue_mm() calling kthread_flush_work(). If we will use mutex, I think we can avoid the lock release around that call. > >> + if (!mm_work.ret) >> + vdpasim->mm_bound = new_mm; >> + >> + return mm_work.ret; >> +} >> + >> +static void vdpasim_worker_unbind_mm(struct vdpasim *vdpasim) >> +{ >> + struct vdpasim_mm_work mm_work; >> + >> + if (!vdpasim->mm_bound) >> + return; >> + >> + mm_work.mm = vdpasim->mm_bound; >> + mm_work.bind = false; >> + >> + vdpasim_worker_queue_mm(vdpasim, &mm_work); >> + >> + vdpasim->mm_bound = NULL; >> +} >> static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa) >> { >> return container_of(vdpa, struct vdpasim, vdpa); >> @@ -66,8 +146,10 @@ static void vdpasim_vq_notify(struct vringh *vring) >> static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx) >> { >> struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; >> + bool va_enabled = use_va && vdpasim->mm_bound; >> >> - vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, false, false, >> + vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, false, >> + va_enabled, >> (struct vring_desc *)(uintptr_t)vq->desc_addr, >> (struct vring_avail *) >> (uintptr_t)vq->driver_addr, >> @@ -96,6 +178,9 @@ static void vdpasim_do_reset(struct vdpasim *vdpasim) >> { >> int i; >> >> + //TODO: should we cancel the works? >> + vdpasim_worker_unbind_mm(vdpasim); > >We probably don't need this since it's the virtio level reset so we >need to keep the mm bound in this case. Otherwise we may break the >guest. It should be the responsibility of the driver to call >config_ops->unbind if it needs to do that. Got it, my biggest concern was when we go from a vhost-vdpa virtio-vdpa, but as you said, in vhost-vdpa I can call unbind before releasing the device. Thanks, Stefano
On Wed, Dec 14, 2022 at 5:31 PM Stefano Garzarella <sgarzare@redhat.com> wrote: > > The new "use_va" module parameter (default: false) is used in Why not true by default? I'd say it makes more sense for the simulator to use va mode and only use pa for testing it. > vdpa_alloc_device() to inform the vDPA framework that the device > supports VA. > > vringh is initialized to use VA only when "use_va" is true and the > user's mm has been bound. So, only when the bus supports user VA > (e.g. vhost-vdpa). > > vdpasim_mm_work_fn work is used to attach the kthread to the user > address space when the .bind_mm callback is invoked, and to detach > it when the device is reset. > > Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> > --- > drivers/vdpa/vdpa_sim/vdpa_sim.h | 1 + > drivers/vdpa/vdpa_sim/vdpa_sim.c | 104 ++++++++++++++++++++++++++++++- > 2 files changed, 103 insertions(+), 2 deletions(-) > > diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h > index 07ef53ea375e..1b010e5c0445 100644 > --- a/drivers/vdpa/vdpa_sim/vdpa_sim.h > +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h > @@ -55,6 +55,7 @@ struct vdpasim { > struct vdpasim_virtqueue *vqs; > struct kthread_worker *worker; > struct kthread_work work; > + struct mm_struct *mm_bound; > struct vdpasim_dev_attr dev_attr; > /* spinlock to synchronize virtqueue state */ > spinlock_t lock; > diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c > index 36a1d2e0a6ba..6e07cedef30c 100644 > --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c > +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c > @@ -36,10 +36,90 @@ module_param(max_iotlb_entries, int, 0444); > MODULE_PARM_DESC(max_iotlb_entries, > "Maximum number of iotlb entries for each address space. 0 means unlimited. (default: 2048)"); > > +static bool use_va; > +module_param(use_va, bool, 0444); > +MODULE_PARM_DESC(use_va, "Enable the device's ability to use VA"); > + > #define VDPASIM_QUEUE_ALIGN PAGE_SIZE > #define VDPASIM_QUEUE_MAX 256 > #define VDPASIM_VENDOR_ID 0 > > +struct vdpasim_mm_work { > + struct kthread_work work; > + struct task_struct *owner; > + struct mm_struct *mm; > + bool bind; > + int ret; > +}; > + > +static void vdpasim_mm_work_fn(struct kthread_work *work) > +{ > + struct vdpasim_mm_work *mm_work = > + container_of(work, struct vdpasim_mm_work, work); > + > + mm_work->ret = 0; > + > + if (mm_work->bind) { > + kthread_use_mm(mm_work->mm); > +#if 0 > + if (mm_work->owner) > + mm_work->ret = cgroup_attach_task_all(mm_work->owner, > + current); > +#endif > + } else { > +#if 0 > + //TODO: check it > + cgroup_release(current); > +#endif > + kthread_unuse_mm(mm_work->mm); > + } > +} > + > +static void vdpasim_worker_queue_mm(struct vdpasim *vdpasim, > + struct vdpasim_mm_work *mm_work) > +{ > + struct kthread_work *work = &mm_work->work; > + > + kthread_init_work(work, vdpasim_mm_work_fn); > + kthread_queue_work(vdpasim->worker, work); > + > + spin_unlock(&vdpasim->lock); > + kthread_flush_work(work); > + spin_lock(&vdpasim->lock); > +} > + > +static int vdpasim_worker_bind_mm(struct vdpasim *vdpasim, > + struct mm_struct *new_mm, > + struct task_struct *owner) > +{ > + struct vdpasim_mm_work mm_work; > + > + mm_work.owner = owner; > + mm_work.mm = new_mm; > + mm_work.bind = true; > + > + vdpasim_worker_queue_mm(vdpasim, &mm_work); > + > + if (!mm_work.ret) > + vdpasim->mm_bound = new_mm; > + > + return mm_work.ret; > +} > + > +static void vdpasim_worker_unbind_mm(struct vdpasim *vdpasim) > +{ > + struct vdpasim_mm_work mm_work; > + > + if (!vdpasim->mm_bound) > + return; > + > + mm_work.mm = vdpasim->mm_bound; > + mm_work.bind = false; > + > + vdpasim_worker_queue_mm(vdpasim, &mm_work); > + > + vdpasim->mm_bound = NULL; > +} > static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa) > { > return container_of(vdpa, struct vdpasim, vdpa); > @@ -66,8 +146,10 @@ static void vdpasim_vq_notify(struct vringh *vring) > static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx) > { > struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; > + bool va_enabled = use_va && vdpasim->mm_bound; > > - vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, false, false, > + vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, false, > + va_enabled, > (struct vring_desc *)(uintptr_t)vq->desc_addr, > (struct vring_avail *) > (uintptr_t)vq->driver_addr, > @@ -96,6 +178,9 @@ static void vdpasim_do_reset(struct vdpasim *vdpasim) > { > int i; > > + //TODO: should we cancel the works? > + vdpasim_worker_unbind_mm(vdpasim); > + > spin_lock(&vdpasim->iommu_lock); > > for (i = 0; i < vdpasim->dev_attr.nvqs; i++) { > @@ -275,7 +360,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr, > > vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, > dev_attr->ngroups, dev_attr->nas, > - dev_attr->name, false); > + dev_attr->name, use_va); > if (IS_ERR(vdpasim)) { > ret = PTR_ERR(vdpasim); > goto err_alloc; > @@ -657,6 +742,19 @@ static int vdpasim_set_map(struct vdpa_device *vdpa, unsigned int asid, > return ret; > } > > +static int vdpasim_bind_mm(struct vdpa_device *vdpa, struct mm_struct *mm, > + struct task_struct *owner) > +{ > + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); > + int ret; > + > + spin_lock(&vdpasim->lock); > + ret = vdpasim_worker_bind_mm(vdpasim, mm, owner); > + spin_unlock(&vdpasim->lock); > + > + return ret; > +} > + > static int vdpasim_dma_map(struct vdpa_device *vdpa, unsigned int asid, > u64 iova, u64 size, > u64 pa, u32 perm, void *opaque) > @@ -744,6 +842,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = { > .set_group_asid = vdpasim_set_group_asid, > .dma_map = vdpasim_dma_map, > .dma_unmap = vdpasim_dma_unmap, > + .bind_mm = vdpasim_bind_mm, > .free = vdpasim_free, > }; > > @@ -776,6 +875,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = { > .get_iova_range = vdpasim_get_iova_range, > .set_group_asid = vdpasim_set_group_asid, > .set_map = vdpasim_set_map, > + .bind_mm = vdpasim_bind_mm, > .free = vdpasim_free, > }; > > -- > 2.38.1 >
On Wed, Dec 21, 2022 at 08:17:41AM +0100, Eugenio Perez Martin wrote: >On Wed, Dec 14, 2022 at 5:31 PM Stefano Garzarella <sgarzare@redhat.com> wrote: >> >> The new "use_va" module parameter (default: false) is used in > >Why not true by default? I'd say it makes more sense for the simulator >to use va mode and only use pa for testing it. Yep, you are right. I'll change it in the version. I initially left it at false because we usually use the simulator to test the paths that would be used for the real hardware. Thanks, Stefano
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h index 07ef53ea375e..1b010e5c0445 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.h +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h @@ -55,6 +55,7 @@ struct vdpasim { struct vdpasim_virtqueue *vqs; struct kthread_worker *worker; struct kthread_work work; + struct mm_struct *mm_bound; struct vdpasim_dev_attr dev_attr; /* spinlock to synchronize virtqueue state */ spinlock_t lock; diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 36a1d2e0a6ba..6e07cedef30c 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -36,10 +36,90 @@ module_param(max_iotlb_entries, int, 0444); MODULE_PARM_DESC(max_iotlb_entries, "Maximum number of iotlb entries for each address space. 0 means unlimited. (default: 2048)"); +static bool use_va; +module_param(use_va, bool, 0444); +MODULE_PARM_DESC(use_va, "Enable the device's ability to use VA"); + #define VDPASIM_QUEUE_ALIGN PAGE_SIZE #define VDPASIM_QUEUE_MAX 256 #define VDPASIM_VENDOR_ID 0 +struct vdpasim_mm_work { + struct kthread_work work; + struct task_struct *owner; + struct mm_struct *mm; + bool bind; + int ret; +}; + +static void vdpasim_mm_work_fn(struct kthread_work *work) +{ + struct vdpasim_mm_work *mm_work = + container_of(work, struct vdpasim_mm_work, work); + + mm_work->ret = 0; + + if (mm_work->bind) { + kthread_use_mm(mm_work->mm); +#if 0 + if (mm_work->owner) + mm_work->ret = cgroup_attach_task_all(mm_work->owner, + current); +#endif + } else { +#if 0 + //TODO: check it + cgroup_release(current); +#endif + kthread_unuse_mm(mm_work->mm); + } +} + +static void vdpasim_worker_queue_mm(struct vdpasim *vdpasim, + struct vdpasim_mm_work *mm_work) +{ + struct kthread_work *work = &mm_work->work; + + kthread_init_work(work, vdpasim_mm_work_fn); + kthread_queue_work(vdpasim->worker, work); + + spin_unlock(&vdpasim->lock); + kthread_flush_work(work); + spin_lock(&vdpasim->lock); +} + +static int vdpasim_worker_bind_mm(struct vdpasim *vdpasim, + struct mm_struct *new_mm, + struct task_struct *owner) +{ + struct vdpasim_mm_work mm_work; + + mm_work.owner = owner; + mm_work.mm = new_mm; + mm_work.bind = true; + + vdpasim_worker_queue_mm(vdpasim, &mm_work); + + if (!mm_work.ret) + vdpasim->mm_bound = new_mm; + + return mm_work.ret; +} + +static void vdpasim_worker_unbind_mm(struct vdpasim *vdpasim) +{ + struct vdpasim_mm_work mm_work; + + if (!vdpasim->mm_bound) + return; + + mm_work.mm = vdpasim->mm_bound; + mm_work.bind = false; + + vdpasim_worker_queue_mm(vdpasim, &mm_work); + + vdpasim->mm_bound = NULL; +} static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa) { return container_of(vdpa, struct vdpasim, vdpa); @@ -66,8 +146,10 @@ static void vdpasim_vq_notify(struct vringh *vring) static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx) { struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; + bool va_enabled = use_va && vdpasim->mm_bound; - vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, false, false, + vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, false, + va_enabled, (struct vring_desc *)(uintptr_t)vq->desc_addr, (struct vring_avail *) (uintptr_t)vq->driver_addr, @@ -96,6 +178,9 @@ static void vdpasim_do_reset(struct vdpasim *vdpasim) { int i; + //TODO: should we cancel the works? + vdpasim_worker_unbind_mm(vdpasim); + spin_lock(&vdpasim->iommu_lock); for (i = 0; i < vdpasim->dev_attr.nvqs; i++) { @@ -275,7 +360,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr, vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, dev_attr->ngroups, dev_attr->nas, - dev_attr->name, false); + dev_attr->name, use_va); if (IS_ERR(vdpasim)) { ret = PTR_ERR(vdpasim); goto err_alloc; @@ -657,6 +742,19 @@ static int vdpasim_set_map(struct vdpa_device *vdpa, unsigned int asid, return ret; } +static int vdpasim_bind_mm(struct vdpa_device *vdpa, struct mm_struct *mm, + struct task_struct *owner) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + int ret; + + spin_lock(&vdpasim->lock); + ret = vdpasim_worker_bind_mm(vdpasim, mm, owner); + spin_unlock(&vdpasim->lock); + + return ret; +} + static int vdpasim_dma_map(struct vdpa_device *vdpa, unsigned int asid, u64 iova, u64 size, u64 pa, u32 perm, void *opaque) @@ -744,6 +842,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = { .set_group_asid = vdpasim_set_group_asid, .dma_map = vdpasim_dma_map, .dma_unmap = vdpasim_dma_unmap, + .bind_mm = vdpasim_bind_mm, .free = vdpasim_free, }; @@ -776,6 +875,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = { .get_iova_range = vdpasim_get_iova_range, .set_group_asid = vdpasim_set_group_asid, .set_map = vdpasim_set_map, + .bind_mm = vdpasim_bind_mm, .free = vdpasim_free, };
The new "use_va" module parameter (default: false) is used in vdpa_alloc_device() to inform the vDPA framework that the device supports VA. vringh is initialized to use VA only when "use_va" is true and the user's mm has been bound. So, only when the bus supports user VA (e.g. vhost-vdpa). vdpasim_mm_work_fn work is used to attach the kthread to the user address space when the .bind_mm callback is invoked, and to detach it when the device is reset. Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> --- drivers/vdpa/vdpa_sim/vdpa_sim.h | 1 + drivers/vdpa/vdpa_sim/vdpa_sim.c | 104 ++++++++++++++++++++++++++++++- 2 files changed, 103 insertions(+), 2 deletions(-)