[RFC,1/1] drm/ttm, drm/xe: Add ttm_bo_access

Message ID	20241017233941.1047522-2-matthew.brost@intel.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <dri-devel-bounces@lists.freedesktop.org> From: Matthew Brost <matthew.brost@intel.com> To: intel-xe@lists.freedesktop.org, dri-devel@lists.freedesktop.org Cc: thomas.hellstrom@linux.intel.com Subject: [RFC PATCH 1/1] drm/ttm, drm/xe: Add ttm_bo_access Date: Thu, 17 Oct 2024 16:39:40 -0700 Message-Id: <20241017233941.1047522-2-matthew.brost@intel.com> In-Reply-To: <20241017233941.1047522-1-matthew.brost@intel.com> References: <20241017233941.1047522-1-matthew.brost@intel.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Precedence: list Errors-To: dri-devel-bounces@lists.freedesktop.org Sender: "dri-devel" <dri-devel-bounces@lists.freedesktop.org>
Series	Enable non-contiguous VRAM access in Xe \| expand [RFC,0/1] Enable non-contiguous VRAM access in Xe [RFC,1/1] drm/ttm, drm/xe: Add ttm_bo_access

Matthew Brost Oct. 17, 2024, 11:39 p.m. UTC

Non-contiguous VRAM cannot be mapped in Xe nor can non-visible VRAM
easily be accessed. Add ttm_bo_access, which is similar to
ttm_bo_vm_access, to access such memory.

Visible VRAM access is only supported at the momement but a follow up
can add GPU access to non-visible VRAM.

Suggested-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/ttm/ttm_bo_vm.c | 20 +++++++++-----
 drivers/gpu/drm/xe/xe_bo.c      | 48 +++++++++++++++++++++++++++++++++
 include/drm/ttm/ttm_bo.h        |  2 ++
 3 files changed, 64 insertions(+), 6 deletions(-)

Thomas Hellstrom Oct. 18, 2024, 7:27 a.m. UTC | #1

On Thu, 2024-10-17 at 16:39 -0700, Matthew Brost wrote:
> Non-contiguous VRAM cannot be mapped in Xe nor can non-visible VRAM
> easily be accessed. Add ttm_bo_access, which is similar to
> ttm_bo_vm_access, to access such memory.
> 
> Visible VRAM access is only supported at the momement but a follow up
> can add GPU access to non-visible VRAM.
> 
> Suggested-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>  drivers/gpu/drm/ttm/ttm_bo_vm.c | 20 +++++++++-----
>  drivers/gpu/drm/xe/xe_bo.c      | 48
> +++++++++++++++++++++++++++++++++
>  include/drm/ttm/ttm_bo.h        |  2 ++
>  3 files changed, 64 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c
> b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> index 2c699ed1963a..b53cc064da44 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> @@ -405,13 +405,9 @@ static int ttm_bo_vm_access_kmap(struct
> ttm_buffer_object *bo,
>  	return len;
>  }
>  
> -int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
> -		     void *buf, int len, int write)
> +int ttm_bo_access(struct ttm_buffer_object *bo, unsigned long
> offset,
> +		  void *buf, int len, int write)
>  {
> -	struct ttm_buffer_object *bo = vma->vm_private_data;
> -	unsigned long offset = (addr) - vma->vm_start +
> -		((vma->vm_pgoff - drm_vma_node_start(&bo-
> >base.vma_node))
> -		 << PAGE_SHIFT);
>  	int ret;
>  
>  	if (len < 1 || (offset + len) > bo->base.size)
> @@ -439,6 +435,18 @@ int ttm_bo_vm_access(struct vm_area_struct *vma,
> unsigned long addr,
>  
>  	return ret;
>  }
> +EXPORT_SYMBOL(ttm_bo_access);
> +
> +int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
> +		     void *buf, int len, int write)
> +{
> +	struct ttm_buffer_object *bo = vma->vm_private_data;
> +	unsigned long offset = (addr) - vma->vm_start +
> +		((vma->vm_pgoff - drm_vma_node_start(&bo-
> >base.vma_node))
> +		 << PAGE_SHIFT);
> +
> +	return ttm_bo_access(bo, offset, buf, len, write);
> +}
>  EXPORT_SYMBOL(ttm_bo_vm_access);
>  
>  static const struct vm_operations_struct ttm_bo_vm_ops = {
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index 5b232f2951b1..267f3b03a6d0 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -1111,6 +1111,53 @@ static void xe_ttm_bo_swap_notify(struct
> ttm_buffer_object *ttm_bo)
>  	}
>  }
>  
> +static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo,
> +				unsigned long offset, void *buf, int
> len,
> +				int write)
> +{
> +	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
> +	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
> +	struct iosys_map vmap;
> +	struct xe_res_cursor cursor;
> +	struct xe_mem_region *vram;
> +	void __iomem *virtual;
> +	int bytes_left = len;
> +
> +	xe_bo_assert_held(bo);
> +
> +	if (!mem_type_is_vram(ttm_bo->resource->mem_type))
> +		return -EIO;
> +
> +	/* FIXME: Use GPU for non-visible VRAM */
> +	if (!(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS))
> +		return -EINVAL;

Is it possible to check whether the bo is actually already in cpu
visible space even if the flag is missing? Also is there an error code
convention for the access() callback? Should we return -EIO here as
well?

> +
> +	vram = res_to_mem_region(ttm_bo->resource);
> +	xe_res_first(ttm_bo->resource, offset & ~PAGE_MASK, 0,
> &cursor);
> +
> +	do {
> +		int wcount = PAGE_SIZE - (offset & PAGE_MASK) >
> bytes_left ?
> +			bytes_left : PAGE_SIZE - (offset &
> PAGE_MASK);
> +
> +		virtual = (u8 __force *)vram->mapping +
> cursor.start;

Is it possible to use (u8 __iomem *)?

> +
> +		iosys_map_set_vaddr_iomem(&vmap, (void __iomem
> *)virtual);
And no typecast here (virtual should be __iomem already)

> +		if (write)
> +			xe_map_memcpy_to(xe, &vmap, offset &
> PAGE_MASK, buf,
> +					 wcount);
> +		else
> +			xe_map_memcpy_from(xe, buf, &vmap, offset &
> PAGE_MASK,
> +					   wcount);
> +
> +		offset += wcount;
> +		buf += wcount;
> +		bytes_left -= wcount;
> +		xe_res_next(&cursor, PAGE_SIZE);
> +	} while (bytes_left);
> +
> +	return len;
> +}
> +
Otherwise LTGM.
/Thomas


>  const struct ttm_device_funcs xe_ttm_funcs = {
>  	.ttm_tt_create = xe_ttm_tt_create,
>  	.ttm_tt_populate = xe_ttm_tt_populate,
> @@ -1120,6 +1167,7 @@ const struct ttm_device_funcs xe_ttm_funcs = {
>  	.move = xe_bo_move,
>  	.io_mem_reserve = xe_ttm_io_mem_reserve,
>  	.io_mem_pfn = xe_ttm_io_mem_pfn,
> +	.access_memory = xe_ttm_access_memory,
>  	.release_notify = xe_ttm_bo_release_notify,
>  	.eviction_valuable = ttm_bo_eviction_valuable,
>  	.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
> diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
> index 5804408815be..8ea11cd8df39 100644
> --- a/include/drm/ttm/ttm_bo.h
> +++ b/include/drm/ttm/ttm_bo.h
> @@ -421,6 +421,8 @@ void ttm_bo_unpin(struct ttm_buffer_object *bo);
>  int ttm_bo_evict_first(struct ttm_device *bdev,
>  		       struct ttm_resource_manager *man,
>  		       struct ttm_operation_ctx *ctx);
> +int ttm_bo_access(struct ttm_buffer_object *bo, unsigned long
> offset,
> +		  void *buf, int len, int write);
>  vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
>  			     struct vm_fault *vmf);
>  vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,

Matthew Brost Oct. 18, 2024, 8:11 a.m. UTC | #2

On Fri, Oct 18, 2024 at 09:27:48AM +0200, Thomas Hellström wrote:
> On Thu, 2024-10-17 at 16:39 -0700, Matthew Brost wrote:
> > Non-contiguous VRAM cannot be mapped in Xe nor can non-visible VRAM
> > easily be accessed. Add ttm_bo_access, which is similar to
> > ttm_bo_vm_access, to access such memory.
> > 
> > Visible VRAM access is only supported at the momement but a follow up
> > can add GPU access to non-visible VRAM.
> > 
> > Suggested-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > ---
> >  drivers/gpu/drm/ttm/ttm_bo_vm.c | 20 +++++++++-----
> >  drivers/gpu/drm/xe/xe_bo.c      | 48
> > +++++++++++++++++++++++++++++++++
> >  include/drm/ttm/ttm_bo.h        |  2 ++
> >  3 files changed, 64 insertions(+), 6 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c
> > b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> > index 2c699ed1963a..b53cc064da44 100644
> > --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
> > +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> > @@ -405,13 +405,9 @@ static int ttm_bo_vm_access_kmap(struct
> > ttm_buffer_object *bo,
> >  	return len;
> >  }
> >  
> > -int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
> > -		     void *buf, int len, int write)
> > +int ttm_bo_access(struct ttm_buffer_object *bo, unsigned long
> > offset,
> > +		  void *buf, int len, int write)
> >  {
> > -	struct ttm_buffer_object *bo = vma->vm_private_data;
> > -	unsigned long offset = (addr) - vma->vm_start +
> > -		((vma->vm_pgoff - drm_vma_node_start(&bo-
> > >base.vma_node))
> > -		 << PAGE_SHIFT);
> >  	int ret;
> >  
> >  	if (len < 1 || (offset + len) > bo->base.size)
> > @@ -439,6 +435,18 @@ int ttm_bo_vm_access(struct vm_area_struct *vma,
> > unsigned long addr,
> >  
> >  	return ret;
> >  }
> > +EXPORT_SYMBOL(ttm_bo_access);
> > +
> > +int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
> > +		     void *buf, int len, int write)
> > +{
> > +	struct ttm_buffer_object *bo = vma->vm_private_data;
> > +	unsigned long offset = (addr) - vma->vm_start +
> > +		((vma->vm_pgoff - drm_vma_node_start(&bo-
> > >base.vma_node))
> > +		 << PAGE_SHIFT);
> > +
> > +	return ttm_bo_access(bo, offset, buf, len, write);
> > +}
> >  EXPORT_SYMBOL(ttm_bo_vm_access);
> >  
> >  static const struct vm_operations_struct ttm_bo_vm_ops = {
> > diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> > index 5b232f2951b1..267f3b03a6d0 100644
> > --- a/drivers/gpu/drm/xe/xe_bo.c
> > +++ b/drivers/gpu/drm/xe/xe_bo.c
> > @@ -1111,6 +1111,53 @@ static void xe_ttm_bo_swap_notify(struct
> > ttm_buffer_object *ttm_bo)
> >  	}
> >  }
> >  
> > +static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo,
> > +				unsigned long offset, void *buf, int
> > len,
> > +				int write)
> > +{
> > +	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
> > +	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
> > +	struct iosys_map vmap;
> > +	struct xe_res_cursor cursor;
> > +	struct xe_mem_region *vram;
> > +	void __iomem *virtual;
> > +	int bytes_left = len;
> > +
> > +	xe_bo_assert_held(bo);
> > +
> > +	if (!mem_type_is_vram(ttm_bo->resource->mem_type))
> > +		return -EIO;
> > +
> > +	/* FIXME: Use GPU for non-visible VRAM */
> > +	if (!(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS))
> > +		return -EINVAL;
> 
> Is it possible to check whether the bo is actually already in cpu
> visible space even if the flag is missing? Also is there an error code
> convention for the access() callback? Should we return -EIO here as
> well?
> 

We should be able to setup of cursor, walk it, and it if all in visible
space proceed with the memcpy. Let me look at that. Eventually we will
need GPU access based on the eudebug feedback but making this better for
now is probably a good idea.

Yea, EIO seems like a better return code too.

> > +
> > +	vram = res_to_mem_region(ttm_bo->resource);
> > +	xe_res_first(ttm_bo->resource, offset & ~PAGE_MASK, 0,
> > &cursor);
> > +
> > +	do {
> > +		int wcount = PAGE_SIZE - (offset & PAGE_MASK) >
> > bytes_left ?
> > +			bytes_left : PAGE_SIZE - (offset &
> > PAGE_MASK);
> > +
> > +		virtual = (u8 __force *)vram->mapping +
> > cursor.start;
> 
> Is it possible to use (u8 __iomem *)?
>

Yes, this is copy / paste from somewhere else in the driver.
 
> > +
> > +		iosys_map_set_vaddr_iomem(&vmap, (void __iomem
> > *)virtual);
> And no typecast here (virtual should be __iomem already)
> 

Will fix.

> > +		if (write)
> > +			xe_map_memcpy_to(xe, &vmap, offset &
> > PAGE_MASK, buf,
> > +					 wcount);
> > +		else
> > +			xe_map_memcpy_from(xe, buf, &vmap, offset &
> > PAGE_MASK,
> > +					   wcount);
> > +
> > +		offset += wcount;
> > +		buf += wcount;
> > +		bytes_left -= wcount;
> > +		xe_res_next(&cursor, PAGE_SIZE);
> > +	} while (bytes_left);
> > +
> > +	return len;
> > +}
> > +
> Otherwise LTGM.
> /Thomas
>

Thanks. Will split the TTM part and Xe parts in individual patches, add
kernel doc, and fixup comments in next rev.

Matt
 
> 
> >  const struct ttm_device_funcs xe_ttm_funcs = {
> >  	.ttm_tt_create = xe_ttm_tt_create,
> >  	.ttm_tt_populate = xe_ttm_tt_populate,
> > @@ -1120,6 +1167,7 @@ const struct ttm_device_funcs xe_ttm_funcs = {
> >  	.move = xe_bo_move,
> >  	.io_mem_reserve = xe_ttm_io_mem_reserve,
> >  	.io_mem_pfn = xe_ttm_io_mem_pfn,
> > +	.access_memory = xe_ttm_access_memory,
> >  	.release_notify = xe_ttm_bo_release_notify,
> >  	.eviction_valuable = ttm_bo_eviction_valuable,
> >  	.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
> > diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
> > index 5804408815be..8ea11cd8df39 100644
> > --- a/include/drm/ttm/ttm_bo.h
> > +++ b/include/drm/ttm/ttm_bo.h
> > @@ -421,6 +421,8 @@ void ttm_bo_unpin(struct ttm_buffer_object *bo);
> >  int ttm_bo_evict_first(struct ttm_device *bdev,
> >  		       struct ttm_resource_manager *man,
> >  		       struct ttm_operation_ctx *ctx);
> > +int ttm_bo_access(struct ttm_buffer_object *bo, unsigned long
> > offset,
> > +		  void *buf, int len, int write);
> >  vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
> >  			     struct vm_fault *vmf);
> >  vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
>

Matthew Auld Oct. 18, 2024, 9:08 a.m. UTC | #3

On 18/10/2024 00:39, Matthew Brost wrote:
> Non-contiguous VRAM cannot be mapped in Xe nor can non-visible VRAM
> easily be accessed. Add ttm_bo_access, which is similar to
> ttm_bo_vm_access, to access such memory.

Is the plan to roll this out also to object error capture and clear 
color access? Those places seem to be using ttm vmap/kmap which only 
seems to work with contiguous VRAM, but in those cases we are mapping 
userspace objects which are potentially not contiguous so I assume that 
stuff is also quite broken atm?

> 
> Visible VRAM access is only supported at the momement but a follow up
> can add GPU access to non-visible VRAM.
> 
> Suggested-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>   drivers/gpu/drm/ttm/ttm_bo_vm.c | 20 +++++++++-----
>   drivers/gpu/drm/xe/xe_bo.c      | 48 +++++++++++++++++++++++++++++++++
>   include/drm/ttm/ttm_bo.h        |  2 ++
>   3 files changed, 64 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> index 2c699ed1963a..b53cc064da44 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> @@ -405,13 +405,9 @@ static int ttm_bo_vm_access_kmap(struct ttm_buffer_object *bo,
>   	return len;
>   }
>   
> -int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
> -		     void *buf, int len, int write)
> +int ttm_bo_access(struct ttm_buffer_object *bo, unsigned long offset,
> +		  void *buf, int len, int write)
>   {
> -	struct ttm_buffer_object *bo = vma->vm_private_data;
> -	unsigned long offset = (addr) - vma->vm_start +
> -		((vma->vm_pgoff - drm_vma_node_start(&bo->base.vma_node))
> -		 << PAGE_SHIFT);
>   	int ret;
>   
>   	if (len < 1 || (offset + len) > bo->base.size)
> @@ -439,6 +435,18 @@ int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
>   
>   	return ret;
>   }
> +EXPORT_SYMBOL(ttm_bo_access);
> +
> +int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
> +		     void *buf, int len, int write)
> +{
> +	struct ttm_buffer_object *bo = vma->vm_private_data;
> +	unsigned long offset = (addr) - vma->vm_start +
> +		((vma->vm_pgoff - drm_vma_node_start(&bo->base.vma_node))
> +		 << PAGE_SHIFT);
> +
> +	return ttm_bo_access(bo, offset, buf, len, write);
> +}
>   EXPORT_SYMBOL(ttm_bo_vm_access);
>   
>   static const struct vm_operations_struct ttm_bo_vm_ops = {
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index 5b232f2951b1..267f3b03a6d0 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -1111,6 +1111,53 @@ static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo)
>   	}
>   }
>   
> +static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo,
> +				unsigned long offset, void *buf, int len,
> +				int write)
> +{
> +	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
> +	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
> +	struct iosys_map vmap;
> +	struct xe_res_cursor cursor;
> +	struct xe_mem_region *vram;
> +	void __iomem *virtual;
> +	int bytes_left = len;
> +
> +	xe_bo_assert_held(bo);

I think we need rpm somewhere? Although bo lock might make this tricky.

> +
> +	if (!mem_type_is_vram(ttm_bo->resource->mem_type))
> +		return -EIO;
> +
> +	/* FIXME: Use GPU for non-visible VRAM */
> +	if (!(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS))
> +		return -EINVAL;
> +
> +	vram = res_to_mem_region(ttm_bo->resource);
> +	xe_res_first(ttm_bo->resource, offset & ~PAGE_MASK, 0, &cursor);
> +
> +	do {
> +		int wcount = PAGE_SIZE - (offset & PAGE_MASK) > bytes_left ?
> +			bytes_left : PAGE_SIZE - (offset & PAGE_MASK);
> +
> +		virtual = (u8 __force *)vram->mapping + cursor.start;
> +
> +		iosys_map_set_vaddr_iomem(&vmap, (void __iomem *)virtual);
> +		if (write)
> +			xe_map_memcpy_to(xe, &vmap, offset & PAGE_MASK, buf,
> +					 wcount);
> +		else
> +			xe_map_memcpy_from(xe, buf, &vmap, offset & PAGE_MASK,
> +					   wcount);
> +
> +		offset += wcount;
> +		buf += wcount;
> +		bytes_left -= wcount;
> +		xe_res_next(&cursor, PAGE_SIZE);
> +	} while (bytes_left);
> +
> +	return len;
> +}
> +
>   const struct ttm_device_funcs xe_ttm_funcs = {
>   	.ttm_tt_create = xe_ttm_tt_create,
>   	.ttm_tt_populate = xe_ttm_tt_populate,
> @@ -1120,6 +1167,7 @@ const struct ttm_device_funcs xe_ttm_funcs = {
>   	.move = xe_bo_move,
>   	.io_mem_reserve = xe_ttm_io_mem_reserve,
>   	.io_mem_pfn = xe_ttm_io_mem_pfn,
> +	.access_memory = xe_ttm_access_memory,
>   	.release_notify = xe_ttm_bo_release_notify,
>   	.eviction_valuable = ttm_bo_eviction_valuable,
>   	.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
> diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
> index 5804408815be..8ea11cd8df39 100644
> --- a/include/drm/ttm/ttm_bo.h
> +++ b/include/drm/ttm/ttm_bo.h
> @@ -421,6 +421,8 @@ void ttm_bo_unpin(struct ttm_buffer_object *bo);
>   int ttm_bo_evict_first(struct ttm_device *bdev,
>   		       struct ttm_resource_manager *man,
>   		       struct ttm_operation_ctx *ctx);
> +int ttm_bo_access(struct ttm_buffer_object *bo, unsigned long offset,
> +		  void *buf, int len, int write);
>   vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
>   			     struct vm_fault *vmf);
>   vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,

Matthew Brost Oct. 18, 2024, 4:25 p.m. UTC | #4

On Fri, Oct 18, 2024 at 10:08:06AM +0100, Matthew Auld wrote:
> On 18/10/2024 00:39, Matthew Brost wrote:
> > Non-contiguous VRAM cannot be mapped in Xe nor can non-visible VRAM
> > easily be accessed. Add ttm_bo_access, which is similar to
> > ttm_bo_vm_access, to access such memory.
> 
> Is the plan to roll this out also to object error capture and clear color
> access? Those places seem to be using ttm vmap/kmap which only seems to work
> with contiguous VRAM, but in those cases we are mapping userspace objects
> which are potentially not contiguous so I assume that stuff is also quite
> broken atm?
> 

I quickly sent this out without checking the error capture code, but
that seems to be broken but no one is complaining? Seems odd. 

Let me look into this a bit more before posting a proper series. Will
also update the error capture if needed.

> > 
> > Visible VRAM access is only supported at the momement but a follow up
> > can add GPU access to non-visible VRAM.
> > 
> > Suggested-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > ---
> >   drivers/gpu/drm/ttm/ttm_bo_vm.c | 20 +++++++++-----
> >   drivers/gpu/drm/xe/xe_bo.c      | 48 +++++++++++++++++++++++++++++++++
> >   include/drm/ttm/ttm_bo.h        |  2 ++
> >   3 files changed, 64 insertions(+), 6 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> > index 2c699ed1963a..b53cc064da44 100644
> > --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
> > +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> > @@ -405,13 +405,9 @@ static int ttm_bo_vm_access_kmap(struct ttm_buffer_object *bo,
> >   	return len;
> >   }
> > -int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
> > -		     void *buf, int len, int write)
> > +int ttm_bo_access(struct ttm_buffer_object *bo, unsigned long offset,
> > +		  void *buf, int len, int write)
> >   {
> > -	struct ttm_buffer_object *bo = vma->vm_private_data;
> > -	unsigned long offset = (addr) - vma->vm_start +
> > -		((vma->vm_pgoff - drm_vma_node_start(&bo->base.vma_node))
> > -		 << PAGE_SHIFT);
> >   	int ret;
> >   	if (len < 1 || (offset + len) > bo->base.size)
> > @@ -439,6 +435,18 @@ int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
> >   	return ret;
> >   }
> > +EXPORT_SYMBOL(ttm_bo_access);
> > +
> > +int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
> > +		     void *buf, int len, int write)
> > +{
> > +	struct ttm_buffer_object *bo = vma->vm_private_data;
> > +	unsigned long offset = (addr) - vma->vm_start +
> > +		((vma->vm_pgoff - drm_vma_node_start(&bo->base.vma_node))
> > +		 << PAGE_SHIFT);
> > +
> > +	return ttm_bo_access(bo, offset, buf, len, write);
> > +}
> >   EXPORT_SYMBOL(ttm_bo_vm_access);
> >   static const struct vm_operations_struct ttm_bo_vm_ops = {
> > diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> > index 5b232f2951b1..267f3b03a6d0 100644
> > --- a/drivers/gpu/drm/xe/xe_bo.c
> > +++ b/drivers/gpu/drm/xe/xe_bo.c
> > @@ -1111,6 +1111,53 @@ static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo)
> >   	}
> >   }
> > +static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo,
> > +				unsigned long offset, void *buf, int len,
> > +				int write)
> > +{
> > +	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
> > +	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
> > +	struct iosys_map vmap;
> > +	struct xe_res_cursor cursor;
> > +	struct xe_mem_region *vram;
> > +	void __iomem *virtual;
> > +	int bytes_left = len;
> > +
> > +	xe_bo_assert_held(bo);
> 
> I think we need rpm somewhere? Although bo lock might make this tricky.
> 

Do we? OFC if we interact with hardware we should but also if hardware
is powered off a BO shouldn't be in VRAM.

Maybe do a get_if_awake and bail otherwise?

Matt

> > +
> > +	if (!mem_type_is_vram(ttm_bo->resource->mem_type))
> > +		return -EIO;
> > +
> > +	/* FIXME: Use GPU for non-visible VRAM */
> > +	if (!(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS))
> > +		return -EINVAL;
> > +
> > +	vram = res_to_mem_region(ttm_bo->resource);
> > +	xe_res_first(ttm_bo->resource, offset & ~PAGE_MASK, 0, &cursor);
> > +
> > +	do {
> > +		int wcount = PAGE_SIZE - (offset & PAGE_MASK) > bytes_left ?
> > +			bytes_left : PAGE_SIZE - (offset & PAGE_MASK);
> > +
> > +		virtual = (u8 __force *)vram->mapping + cursor.start;
> > +
> > +		iosys_map_set_vaddr_iomem(&vmap, (void __iomem *)virtual);
> > +		if (write)
> > +			xe_map_memcpy_to(xe, &vmap, offset & PAGE_MASK, buf,
> > +					 wcount);
> > +		else
> > +			xe_map_memcpy_from(xe, buf, &vmap, offset & PAGE_MASK,
> > +					   wcount);
> > +
> > +		offset += wcount;
> > +		buf += wcount;
> > +		bytes_left -= wcount;
> > +		xe_res_next(&cursor, PAGE_SIZE);
> > +	} while (bytes_left);
> > +
> > +	return len;
> > +}
> > +
> >   const struct ttm_device_funcs xe_ttm_funcs = {
> >   	.ttm_tt_create = xe_ttm_tt_create,
> >   	.ttm_tt_populate = xe_ttm_tt_populate,
> > @@ -1120,6 +1167,7 @@ const struct ttm_device_funcs xe_ttm_funcs = {
> >   	.move = xe_bo_move,
> >   	.io_mem_reserve = xe_ttm_io_mem_reserve,
> >   	.io_mem_pfn = xe_ttm_io_mem_pfn,
> > +	.access_memory = xe_ttm_access_memory,
> >   	.release_notify = xe_ttm_bo_release_notify,
> >   	.eviction_valuable = ttm_bo_eviction_valuable,
> >   	.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
> > diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
> > index 5804408815be..8ea11cd8df39 100644
> > --- a/include/drm/ttm/ttm_bo.h
> > +++ b/include/drm/ttm/ttm_bo.h
> > @@ -421,6 +421,8 @@ void ttm_bo_unpin(struct ttm_buffer_object *bo);
> >   int ttm_bo_evict_first(struct ttm_device *bdev,
> >   		       struct ttm_resource_manager *man,
> >   		       struct ttm_operation_ctx *ctx);
> > +int ttm_bo_access(struct ttm_buffer_object *bo, unsigned long offset,
> > +		  void *buf, int len, int write);
> >   vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
> >   			     struct vm_fault *vmf);
> >   vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,

Matthew Auld Oct. 21, 2024, 8:49 a.m. UTC | #5

On 18/10/2024 17:25, Matthew Brost wrote:
> On Fri, Oct 18, 2024 at 10:08:06AM +0100, Matthew Auld wrote:
>> On 18/10/2024 00:39, Matthew Brost wrote:
>>> Non-contiguous VRAM cannot be mapped in Xe nor can non-visible VRAM
>>> easily be accessed. Add ttm_bo_access, which is similar to
>>> ttm_bo_vm_access, to access such memory.
>>
>> Is the plan to roll this out also to object error capture and clear color
>> access? Those places seem to be using ttm vmap/kmap which only seems to work
>> with contiguous VRAM, but in those cases we are mapping userspace objects
>> which are potentially not contiguous so I assume that stuff is also quite
>> broken atm?
>>
> 
> I quickly sent this out without checking the error capture code, but
> that seems to be broken but no one is complaining? Seems odd.

It looks like we are also missing some IGTs for the vma dumpable stuff.

> 
> Let me look into this a bit more before posting a proper series. Will
> also update the error capture if needed.
> 
>>>
>>> Visible VRAM access is only supported at the momement but a follow up
>>> can add GPU access to non-visible VRAM.
>>>
>>> Suggested-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>>> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
>>> ---
>>>    drivers/gpu/drm/ttm/ttm_bo_vm.c | 20 +++++++++-----
>>>    drivers/gpu/drm/xe/xe_bo.c      | 48 +++++++++++++++++++++++++++++++++
>>>    include/drm/ttm/ttm_bo.h        |  2 ++
>>>    3 files changed, 64 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>> index 2c699ed1963a..b53cc064da44 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>> @@ -405,13 +405,9 @@ static int ttm_bo_vm_access_kmap(struct ttm_buffer_object *bo,
>>>    	return len;
>>>    }
>>> -int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
>>> -		     void *buf, int len, int write)
>>> +int ttm_bo_access(struct ttm_buffer_object *bo, unsigned long offset,
>>> +		  void *buf, int len, int write)
>>>    {
>>> -	struct ttm_buffer_object *bo = vma->vm_private_data;
>>> -	unsigned long offset = (addr) - vma->vm_start +
>>> -		((vma->vm_pgoff - drm_vma_node_start(&bo->base.vma_node))
>>> -		 << PAGE_SHIFT);
>>>    	int ret;
>>>    	if (len < 1 || (offset + len) > bo->base.size)
>>> @@ -439,6 +435,18 @@ int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
>>>    	return ret;
>>>    }
>>> +EXPORT_SYMBOL(ttm_bo_access);
>>> +
>>> +int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
>>> +		     void *buf, int len, int write)
>>> +{
>>> +	struct ttm_buffer_object *bo = vma->vm_private_data;
>>> +	unsigned long offset = (addr) - vma->vm_start +
>>> +		((vma->vm_pgoff - drm_vma_node_start(&bo->base.vma_node))
>>> +		 << PAGE_SHIFT);
>>> +
>>> +	return ttm_bo_access(bo, offset, buf, len, write);
>>> +}
>>>    EXPORT_SYMBOL(ttm_bo_vm_access);
>>>    static const struct vm_operations_struct ttm_bo_vm_ops = {
>>> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
>>> index 5b232f2951b1..267f3b03a6d0 100644
>>> --- a/drivers/gpu/drm/xe/xe_bo.c
>>> +++ b/drivers/gpu/drm/xe/xe_bo.c
>>> @@ -1111,6 +1111,53 @@ static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo)
>>>    	}
>>>    }
>>> +static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo,
>>> +				unsigned long offset, void *buf, int len,
>>> +				int write)
>>> +{
>>> +	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
>>> +	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
>>> +	struct iosys_map vmap;
>>> +	struct xe_res_cursor cursor;
>>> +	struct xe_mem_region *vram;
>>> +	void __iomem *virtual;
>>> +	int bytes_left = len;
>>> +
>>> +	xe_bo_assert_held(bo);
>>
>> I think we need rpm somewhere? Although bo lock might make this tricky.
>>
> 
> Do we? OFC if we interact with hardware we should but also if hardware
> is powered off a BO shouldn't be in VRAM.

There is also d3hot which doesn't kick stuff out to system memory, but 
VRAM is still inaccessible in that mode.

> 
> Maybe do a get_if_awake and bail otherwise?
> 
> Matt
> 
>>> +
>>> +	if (!mem_type_is_vram(ttm_bo->resource->mem_type))
>>> +		return -EIO;
>>> +
>>> +	/* FIXME: Use GPU for non-visible VRAM */
>>> +	if (!(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS))
>>> +		return -EINVAL;
>>> +
>>> +	vram = res_to_mem_region(ttm_bo->resource);
>>> +	xe_res_first(ttm_bo->resource, offset & ~PAGE_MASK, 0, &cursor);
>>> +
>>> +	do {
>>> +		int wcount = PAGE_SIZE - (offset & PAGE_MASK) > bytes_left ?
>>> +			bytes_left : PAGE_SIZE - (offset & PAGE_MASK);
>>> +
>>> +		virtual = (u8 __force *)vram->mapping + cursor.start;
>>> +
>>> +		iosys_map_set_vaddr_iomem(&vmap, (void __iomem *)virtual);
>>> +		if (write)
>>> +			xe_map_memcpy_to(xe, &vmap, offset & PAGE_MASK, buf,
>>> +					 wcount);
>>> +		else
>>> +			xe_map_memcpy_from(xe, buf, &vmap, offset & PAGE_MASK,
>>> +					   wcount);
>>> +
>>> +		offset += wcount;
>>> +		buf += wcount;
>>> +		bytes_left -= wcount;
>>> +		xe_res_next(&cursor, PAGE_SIZE);
>>> +	} while (bytes_left);
>>> +
>>> +	return len;
>>> +}
>>> +
>>>    const struct ttm_device_funcs xe_ttm_funcs = {
>>>    	.ttm_tt_create = xe_ttm_tt_create,
>>>    	.ttm_tt_populate = xe_ttm_tt_populate,
>>> @@ -1120,6 +1167,7 @@ const struct ttm_device_funcs xe_ttm_funcs = {
>>>    	.move = xe_bo_move,
>>>    	.io_mem_reserve = xe_ttm_io_mem_reserve,
>>>    	.io_mem_pfn = xe_ttm_io_mem_pfn,
>>> +	.access_memory = xe_ttm_access_memory,
>>>    	.release_notify = xe_ttm_bo_release_notify,
>>>    	.eviction_valuable = ttm_bo_eviction_valuable,
>>>    	.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
>>> diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
>>> index 5804408815be..8ea11cd8df39 100644
>>> --- a/include/drm/ttm/ttm_bo.h
>>> +++ b/include/drm/ttm/ttm_bo.h
>>> @@ -421,6 +421,8 @@ void ttm_bo_unpin(struct ttm_buffer_object *bo);
>>>    int ttm_bo_evict_first(struct ttm_device *bdev,
>>>    		       struct ttm_resource_manager *man,
>>>    		       struct ttm_operation_ctx *ctx);
>>> +int ttm_bo_access(struct ttm_buffer_object *bo, unsigned long offset,
>>> +		  void *buf, int len, int write);
>>>    vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
>>>    			     struct vm_fault *vmf);
>>>    vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,

[RFC,1/1] drm/ttm, drm/xe: Add ttm_bo_access

Commit Message

Comments

Patch