diff mbox series

[v3,4/6] drm/xe: Use ttm_bo_access in xe_vm_snapshot_capture_delayed

Message ID 20241019192030.1505020-5-matthew.brost@intel.com (mailing list archive)
State New, archived
Headers show
Series Fix non-contiguous VRAM BO access in Xe | expand

Commit Message

Matthew Brost Oct. 19, 2024, 7:20 p.m. UTC
Non-contiguous mapping of BO in VRAM doesn't work, use ttm_bo_access
instead.

v2:
 - Fix error handling

Fixes: 0eb2a18a8fad ("drm/xe: Implement VM snapshot support for BO's and userptr")
Suggested-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

Comments

Matthew Auld Oct. 21, 2024, 9:07 a.m. UTC | #1
On 19/10/2024 20:20, Matthew Brost wrote:
> Non-contiguous mapping of BO in VRAM doesn't work, use ttm_bo_access
> instead.
> 
> v2:
>   - Fix error handling
> 
> Fixes: 0eb2a18a8fad ("drm/xe: Implement VM snapshot support for BO's and userptr")
> Suggested-by: Matthew Auld <matthew.auld@intel.com>
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>

The other user looks to be the clear color stuff for display. See 
intel_bo_read_from_page(). I think that is also potentially busted?

> ---
>   drivers/gpu/drm/xe/xe_vm.c | 17 ++++++-----------
>   1 file changed, 6 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index c99380271de6..c8782da3a5c3 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -3303,7 +3303,6 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
>   
>   	for (int i = 0; i < snap->num_snaps; i++) {
>   		struct xe_bo *bo = snap->snap[i].bo;
> -		struct iosys_map src;
>   		int err;
>   
>   		if (IS_ERR(snap->snap[i].data))
> @@ -3316,16 +3315,12 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
>   		}
>   
>   		if (bo) {
> -			xe_bo_lock(bo, false);

I think we still need the lock, or is that grabbed somewhere else? Also 
I guess CI doesn't currently hit this path?

> -			err = ttm_bo_vmap(&bo->ttm, &src);
> -			if (!err) {
> -				xe_map_memcpy_from(xe_bo_device(bo),
> -						   snap->snap[i].data,
> -						   &src, snap->snap[i].bo_ofs,
> -						   snap->snap[i].len);
> -				ttm_bo_vunmap(&bo->ttm, &src);
> -			}
> -			xe_bo_unlock(bo);
> +			err = ttm_bo_access(&bo->ttm, snap->snap[i].bo_ofs,
> +					    snap->snap[i].data, snap->snap[i].len, 0);
> +			if (!(err < 0) && err != snap->snap[i].len)
> +				err = -EIO;
> +			else if (!(err < 0))
> +				err = 0;
>   		} else {
>   			void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;
>
Matthew Brost Oct. 21, 2024, 4:58 p.m. UTC | #2
On Mon, Oct 21, 2024 at 10:07:31AM +0100, Matthew Auld wrote:
> On 19/10/2024 20:20, Matthew Brost wrote:
> > Non-contiguous mapping of BO in VRAM doesn't work, use ttm_bo_access
> > instead.
> > 
> > v2:
> >   - Fix error handling
> > 
> > Fixes: 0eb2a18a8fad ("drm/xe: Implement VM snapshot support for BO's and userptr")
> > Suggested-by: Matthew Auld <matthew.auld@intel.com>
> > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> 
> The other user looks to be the clear color stuff for display. See
> intel_bo_read_from_page(). I think that is also potentially busted?
> 

I looked at display but missed this. Will fix in next rev.

> > ---
> >   drivers/gpu/drm/xe/xe_vm.c | 17 ++++++-----------
> >   1 file changed, 6 insertions(+), 11 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > index c99380271de6..c8782da3a5c3 100644
> > --- a/drivers/gpu/drm/xe/xe_vm.c
> > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > @@ -3303,7 +3303,6 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
> >   	for (int i = 0; i < snap->num_snaps; i++) {
> >   		struct xe_bo *bo = snap->snap[i].bo;
> > -		struct iosys_map src;
> >   		int err;
> >   		if (IS_ERR(snap->snap[i].data))
> > @@ -3316,16 +3315,12 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
> >   		}
> >   		if (bo) {
> > -			xe_bo_lock(bo, false);
> 
> I think we still need the lock, or is that grabbed somewhere else? Also I
> guess CI doesn't currently hit this path?
> 

Yep. ttm_bo_access does a ttm_bo_reserve / ttm_bo_unreserve which is a
lock / unlock.

Matt

> > -			err = ttm_bo_vmap(&bo->ttm, &src);
> > -			if (!err) {
> > -				xe_map_memcpy_from(xe_bo_device(bo),
> > -						   snap->snap[i].data,
> > -						   &src, snap->snap[i].bo_ofs,
> > -						   snap->snap[i].len);
> > -				ttm_bo_vunmap(&bo->ttm, &src);
> > -			}
> > -			xe_bo_unlock(bo);
> > +			err = ttm_bo_access(&bo->ttm, snap->snap[i].bo_ofs,
> > +					    snap->snap[i].data, snap->snap[i].len, 0);
> > +			if (!(err < 0) && err != snap->snap[i].len)
> > +				err = -EIO;
> > +			else if (!(err < 0))
> > +				err = 0;
> >   		} else {
> >   			void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;
diff mbox series

Patch

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index c99380271de6..c8782da3a5c3 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -3303,7 +3303,6 @@  void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
 
 	for (int i = 0; i < snap->num_snaps; i++) {
 		struct xe_bo *bo = snap->snap[i].bo;
-		struct iosys_map src;
 		int err;
 
 		if (IS_ERR(snap->snap[i].data))
@@ -3316,16 +3315,12 @@  void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
 		}
 
 		if (bo) {
-			xe_bo_lock(bo, false);
-			err = ttm_bo_vmap(&bo->ttm, &src);
-			if (!err) {
-				xe_map_memcpy_from(xe_bo_device(bo),
-						   snap->snap[i].data,
-						   &src, snap->snap[i].bo_ofs,
-						   snap->snap[i].len);
-				ttm_bo_vunmap(&bo->ttm, &src);
-			}
-			xe_bo_unlock(bo);
+			err = ttm_bo_access(&bo->ttm, snap->snap[i].bo_ofs,
+					    snap->snap[i].data, snap->snap[i].len, 0);
+			if (!(err < 0) && err != snap->snap[i].len)
+				err = -EIO;
+			else if (!(err < 0))
+				err = 0;
 		} else {
 			void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;