diff mbox

drm/ttm: Remove set_need_resched from the ttm fault handler

Message ID 1384454946-3429-1-git-send-email-thellstrom@vmware.com (mailing list archive)
State New, archived
Headers show

Commit Message

Thomas Hellstrom Nov. 14, 2013, 6:49 p.m. UTC
Addresses
"[BUG] completely bonkers use of set_need_resched + VM_FAULT_NOPAGE".

In the first occurence it was used to try to be nice while releasing the
mmap_sem and retrying the fault to work around a locking inversion.
The second occurence was never used.

There has been some discussion whether we should change the locking order to
mmap_sem -> bo_reserve. This patch doesn't address that issue, and leaves
that locking order undefined. The solution that we release the mmap_sem if
tryreserve fails and wait for the buffer to become unreserved is something
we want in any case, and follows how the core vm system waits for pages
to be come unlocked while releasing the mmap_sem.

The code also outlines what needs to be changed if we want to establish the
locking order as mmap_sem -> bo::reserve.

One slight issue that remains with this code is that the fault handler might
be prone to starvation if another thread countinously reserves the buffer.
IMO that usage pattern is highly unlikely.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c    |   35 ++++++++++++++++++++++++++++++++++-
 drivers/gpu/drm/ttm/ttm_bo_vm.c |   26 ++++++++++++++++++++------
 include/drm/ttm/ttm_bo_api.h    |    4 +++-
 3 files changed, 57 insertions(+), 8 deletions(-)

Comments

Jakob Bornecrantz Nov. 18, 2013, 3:07 p.m. UTC | #1
On Thu, Nov 14, 2013 at 7:49 PM, Thomas Hellstrom <thellstrom@vmware.com> wrote:
> Addresses
> "[BUG] completely bonkers use of set_need_resched + VM_FAULT_NOPAGE".
>
> In the first occurence it was used to try to be nice while releasing the
> mmap_sem and retrying the fault to work around a locking inversion.
> The second occurence was never used.
>
> There has been some discussion whether we should change the locking order to
> mmap_sem -> bo_reserve. This patch doesn't address that issue, and leaves
> that locking order undefined. The solution that we release the mmap_sem if
> tryreserve fails and wait for the buffer to become unreserved is something
> we want in any case, and follows how the core vm system waits for pages
> to be come unlocked while releasing the mmap_sem.
>
> The code also outlines what needs to be changed if we want to establish the
> locking order as mmap_sem -> bo::reserve.
>
> One slight issue that remains with this code is that the fault handler might
> be prone to starvation if another thread countinously reserves the buffer.
> IMO that usage pattern is highly unlikely.
>
> Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>

Reviewed-by: Jakob Bornecrantz <jakob@vmware.com>

> ---
>  drivers/gpu/drm/ttm/ttm_bo.c    |   35 ++++++++++++++++++++++++++++++++++-
>  drivers/gpu/drm/ttm/ttm_bo_vm.c |   26 ++++++++++++++++++++------
>  include/drm/ttm/ttm_bo_api.h    |    4 +++-
>  3 files changed, 57 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 8d5a646..07e02c4 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -151,7 +151,7 @@ static void ttm_bo_release_list(struct kref *list_kref)
>         atomic_dec(&bo->glob->bo_count);
>         if (bo->resv == &bo->ttm_resv)
>                 reservation_object_fini(&bo->ttm_resv);
> -
> +       mutex_destroy(&bo->wu_mutex);
>         if (bo->destroy)
>                 bo->destroy(bo);
>         else {
> @@ -1123,6 +1123,7 @@ int ttm_bo_init(struct ttm_bo_device *bdev,
>         INIT_LIST_HEAD(&bo->ddestroy);
>         INIT_LIST_HEAD(&bo->swap);
>         INIT_LIST_HEAD(&bo->io_reserve_lru);
> +       mutex_init(&bo->wu_mutex);
>         bo->bdev = bdev;
>         bo->glob = bdev->glob;
>         bo->type = type;
> @@ -1704,3 +1705,35 @@ void ttm_bo_swapout_all(struct ttm_bo_device *bdev)
>                 ;
>  }
>  EXPORT_SYMBOL(ttm_bo_swapout_all);
> +
> +/**
> + * ttm_bo_wait_unreserved - interruptible wait for a buffer object to become
> + * unreserved
> + *
> + * @bo: Pointer to buffer
> + */
> +int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo)
> +{
> +       int ret;
> +
> +       /*
> +        * In the absense of a wait_unlocked API,
> +        * Use the bo::wu_mutex to avoid triggering livelocks due to
> +        * concurrent use of this function. Note that this use of
> +        * bo::wu_mutex can go away if we change locking order to
> +        * mmap_sem -> bo::reserve.
> +        */
> +       ret = mutex_lock_interruptible(&bo->wu_mutex);
> +       if (unlikely(ret != 0))
> +               return -ERESTARTSYS;
> +       if (!ww_mutex_is_locked(&bo->resv->lock))
> +               goto out_unlock;
> +       ret = ttm_bo_reserve_nolru(bo, true, false, false, NULL);
> +       if (unlikely(ret != 0))
> +               goto out_unlock;
> +       ww_mutex_unlock(&bo->resv->lock);
> +
> +out_unlock:
> +       mutex_unlock(&bo->wu_mutex);
> +       return ret;
> +}
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> index ac617f3..b249ab9 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> @@ -107,13 +107,28 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>         /*
>          * Work around locking order reversal in fault / nopfn
>          * between mmap_sem and bo_reserve: Perform a trylock operation
> -        * for reserve, and if it fails, retry the fault after scheduling.
> +        * for reserve, and if it fails, retry the fault after waiting
> +        * for the buffer to become unreserved.
>          */
> -
> -       ret = ttm_bo_reserve(bo, true, true, false, 0);
> +       ret = ttm_bo_reserve(bo, true, true, false, NULL);
>         if (unlikely(ret != 0)) {
> -               if (ret == -EBUSY)
> -                       set_need_resched();
> +               if (ret != -EBUSY)
> +                       return VM_FAULT_NOPAGE;
> +
> +               if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
> +                       if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
> +                               up_read(&vma->vm_mm->mmap_sem);
> +                               (void) ttm_bo_wait_unreserved(bo);
> +                       }
> +
> +                       return VM_FAULT_RETRY;
> +               }
> +
> +               /*
> +                * If we'd want to change locking order to
> +                * mmap_sem -> bo::reserve, we'd use a blocking reserve here
> +                * instead of retrying the fault...
> +                */
>                 return VM_FAULT_NOPAGE;
>         }
>
> @@ -123,7 +138,6 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>                 case 0:
>                         break;
>                 case -EBUSY:
> -                       set_need_resched();
>                 case -ERESTARTSYS:
>                         retval = VM_FAULT_NOPAGE;
>                         goto out_unlock;
> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
> index 751eaff..ee127ec 100644
> --- a/include/drm/ttm/ttm_bo_api.h
> +++ b/include/drm/ttm/ttm_bo_api.h
> @@ -169,6 +169,7 @@ struct ttm_tt;
>   * @offset: The current GPU offset, which can have different meanings
>   * depending on the memory type. For SYSTEM type memory, it should be 0.
>   * @cur_placement: Hint of current placement.
> + * @wu_mutex: Wait unreserved mutex.
>   *
>   * Base class for TTM buffer object, that deals with data placement and CPU
>   * mappings. GPU mappings are really up to the driver, but for simpler GPUs
> @@ -250,6 +251,7 @@ struct ttm_buffer_object {
>
>         struct reservation_object *resv;
>         struct reservation_object ttm_resv;
> +       struct mutex wu_mutex;
>  };
>
>  /**
> @@ -702,5 +704,5 @@ extern ssize_t ttm_bo_io(struct ttm_bo_device *bdev, struct file *filp,
>                          size_t count, loff_t *f_pos, bool write);
>
>  extern void ttm_bo_swapout_all(struct ttm_bo_device *bdev);
> -
> +extern int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo);
>  #endif
> --
> 1.7.10.4
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel
diff mbox

Patch

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 8d5a646..07e02c4 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -151,7 +151,7 @@  static void ttm_bo_release_list(struct kref *list_kref)
 	atomic_dec(&bo->glob->bo_count);
 	if (bo->resv == &bo->ttm_resv)
 		reservation_object_fini(&bo->ttm_resv);
-
+	mutex_destroy(&bo->wu_mutex);
 	if (bo->destroy)
 		bo->destroy(bo);
 	else {
@@ -1123,6 +1123,7 @@  int ttm_bo_init(struct ttm_bo_device *bdev,
 	INIT_LIST_HEAD(&bo->ddestroy);
 	INIT_LIST_HEAD(&bo->swap);
 	INIT_LIST_HEAD(&bo->io_reserve_lru);
+	mutex_init(&bo->wu_mutex);
 	bo->bdev = bdev;
 	bo->glob = bdev->glob;
 	bo->type = type;
@@ -1704,3 +1705,35 @@  void ttm_bo_swapout_all(struct ttm_bo_device *bdev)
 		;
 }
 EXPORT_SYMBOL(ttm_bo_swapout_all);
+
+/**
+ * ttm_bo_wait_unreserved - interruptible wait for a buffer object to become
+ * unreserved
+ *
+ * @bo: Pointer to buffer
+ */
+int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo)
+{
+	int ret;
+
+	/*
+	 * In the absense of a wait_unlocked API,
+	 * Use the bo::wu_mutex to avoid triggering livelocks due to
+	 * concurrent use of this function. Note that this use of
+	 * bo::wu_mutex can go away if we change locking order to
+	 * mmap_sem -> bo::reserve.
+	 */
+	ret = mutex_lock_interruptible(&bo->wu_mutex);
+	if (unlikely(ret != 0))
+		return -ERESTARTSYS;
+	if (!ww_mutex_is_locked(&bo->resv->lock))
+		goto out_unlock;
+	ret = ttm_bo_reserve_nolru(bo, true, false, false, NULL);
+	if (unlikely(ret != 0))
+		goto out_unlock;
+	ww_mutex_unlock(&bo->resv->lock);
+
+out_unlock:
+	mutex_unlock(&bo->wu_mutex);
+	return ret;
+}
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index ac617f3..b249ab9 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -107,13 +107,28 @@  static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	/*
 	 * Work around locking order reversal in fault / nopfn
 	 * between mmap_sem and bo_reserve: Perform a trylock operation
-	 * for reserve, and if it fails, retry the fault after scheduling.
+	 * for reserve, and if it fails, retry the fault after waiting
+	 * for the buffer to become unreserved.
 	 */
-
-	ret = ttm_bo_reserve(bo, true, true, false, 0);
+	ret = ttm_bo_reserve(bo, true, true, false, NULL);
 	if (unlikely(ret != 0)) {
-		if (ret == -EBUSY)
-			set_need_resched();
+		if (ret != -EBUSY)
+			return VM_FAULT_NOPAGE;
+
+		if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
+			if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
+				up_read(&vma->vm_mm->mmap_sem);
+				(void) ttm_bo_wait_unreserved(bo);
+			}
+
+			return VM_FAULT_RETRY;
+		}
+
+		/*
+		 * If we'd want to change locking order to
+		 * mmap_sem -> bo::reserve, we'd use a blocking reserve here
+		 * instead of retrying the fault...
+		 */
 		return VM_FAULT_NOPAGE;
 	}
 
@@ -123,7 +138,6 @@  static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		case 0:
 			break;
 		case -EBUSY:
-			set_need_resched();
 		case -ERESTARTSYS:
 			retval = VM_FAULT_NOPAGE;
 			goto out_unlock;
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 751eaff..ee127ec 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -169,6 +169,7 @@  struct ttm_tt;
  * @offset: The current GPU offset, which can have different meanings
  * depending on the memory type. For SYSTEM type memory, it should be 0.
  * @cur_placement: Hint of current placement.
+ * @wu_mutex: Wait unreserved mutex.
  *
  * Base class for TTM buffer object, that deals with data placement and CPU
  * mappings. GPU mappings are really up to the driver, but for simpler GPUs
@@ -250,6 +251,7 @@  struct ttm_buffer_object {
 
 	struct reservation_object *resv;
 	struct reservation_object ttm_resv;
+	struct mutex wu_mutex;
 };
 
 /**
@@ -702,5 +704,5 @@  extern ssize_t ttm_bo_io(struct ttm_bo_device *bdev, struct file *filp,
 			 size_t count, loff_t *f_pos, bool write);
 
 extern void ttm_bo_swapout_all(struct ttm_bo_device *bdev);
-
+extern int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo);
 #endif