diff mbox series

[v2,17/29] drm/xe: Add SVM device memory mirroring

Message ID 20241016032518.539495-18-matthew.brost@intel.com (mailing list archive)
State New, archived
Headers show
Series Introduce GPU SVM and Xe SVM implementation | expand

Commit Message

Matthew Brost Oct. 16, 2024, 3:25 a.m. UTC
Add SVM device memory mirroring which enables device pages for
migration.

TODO: Hide this behind Kconfig

Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com
Signed-off-by: Oak Zeng <oak.zeng@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h |  8 ++++
 drivers/gpu/drm/xe/xe_svm.c          | 56 +++++++++++++++++++++++++++-
 drivers/gpu/drm/xe/xe_svm.h          |  3 ++
 drivers/gpu/drm/xe/xe_tile.c         |  5 +++
 4 files changed, 70 insertions(+), 2 deletions(-)

Comments

Thomas Hellstrom Nov. 19, 2024, 4:50 p.m. UTC | #1
On Tue, 2024-10-15 at 20:25 -0700, Matthew Brost wrote:
> Add SVM device memory mirroring which enables device pages for
> migration.
> 
> TODO: Hide this behind Kconfig
> 
> Signed-off-by: Niranjana Vishwanathapura
> <niranjana.vishwanathapura@intel.com
> Signed-off-by: Oak Zeng <oak.zeng@intel.com>
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>  drivers/gpu/drm/xe/xe_device_types.h |  8 ++++
>  drivers/gpu/drm/xe/xe_svm.c          | 56
> +++++++++++++++++++++++++++-
>  drivers/gpu/drm/xe/xe_svm.h          |  3 ++
>  drivers/gpu/drm/xe/xe_tile.c         |  5 +++
>  4 files changed, 70 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_device_types.h
> b/drivers/gpu/drm/xe/xe_device_types.h
> index 85bede4dd646..2ac5de7751c9 100644
> --- a/drivers/gpu/drm/xe/xe_device_types.h
> +++ b/drivers/gpu/drm/xe/xe_device_types.h
> @@ -104,6 +104,14 @@ struct xe_mem_region {
>  	resource_size_t actual_physical_size;
>  	/** @mapping: pointer to VRAM mappable space */
>  	void __iomem *mapping;
> +	/** @pagemap: Used to remap device memory as ZONE_DEVICE */
> +	struct dev_pagemap pagemap;
> +	/**
> +	 * @hpa_base: base host physical address
> +	 *
> +	 * This is generated when remap device memory as ZONE_DEVICE
> +	 */
> +	resource_size_t hpa_base;
>  };
>  
>  /**
> diff --git a/drivers/gpu/drm/xe/xe_svm.c
> b/drivers/gpu/drm/xe/xe_svm.c
> index 1d8021b4e2f0..22e6341117bd 100644
> --- a/drivers/gpu/drm/xe/xe_svm.c
> +++ b/drivers/gpu/drm/xe/xe_svm.c
> @@ -21,6 +21,11 @@ static struct xe_vm *range_to_vm(struct
> drm_gpusvm_range *r)
>  	return gpusvm_to_vm(r->gpusvm);
>  }
>  
> +static void *xe_svm_devm_owner(struct xe_device *xe)
> +{
> +	return xe;
> +}
> +
>  static struct drm_gpusvm_range *
>  xe_svm_range_alloc(struct drm_gpusvm *gpusvm)
>  {
> @@ -284,8 +289,9 @@ int xe_svm_init(struct xe_vm *vm)
>  		  xe_svm_garbage_collector_work_func);
>  
>  	return drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe-
> >drm,
> -			       current->mm, NULL, 0, vm->size,
> -			       SZ_512M, &gpusvm_ops,
> fault_chunk_sizes,
> +			       current->mm, xe_svm_devm_owner(vm-
> >xe), 0,
> +			       vm->size, SZ_512M, &gpusvm_ops,
> +			       fault_chunk_sizes,
>  			       ARRAY_SIZE(fault_chunk_sizes));
>  }
>  
> @@ -383,3 +389,49 @@ bool xe_svm_has_mapping(struct xe_vm *vm, u64
> start, u64 end)
>  {
>  	return drm_gpusvm_has_mapping(&vm->svm.gpusvm, start, end);
>  }
> +
> +/**
> + * xe_devm_add: Remap and provide memmap backing for device memory
xe_devm_add():

Otherwise LGTM.
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com<


> + * @tile: tile that the memory region belongs to
> + * @mr: memory region to remap
> + *
> + * This remap device memory to host physical address space and
> create
> + * struct page to back device memory
> + *
> + * Return: 0 on success standard error code otherwise
> + */
> +int xe_devm_add(struct xe_tile *tile, struct xe_mem_region *mr)

> +{
> +	struct xe_device *xe = tile_to_xe(tile);
> +	struct device *dev = &to_pci_dev(xe->drm.dev)->dev;
> +	struct resource *res;
> +	void *addr;
> +	int ret;
> +
> +	res = devm_request_free_mem_region(dev, &iomem_resource,
> +					   mr->usable_size);
> +	if (IS_ERR(res)) {
> +		ret = PTR_ERR(res);
> +		return ret;
> +	}
> +
> +	mr->pagemap.type = MEMORY_DEVICE_PRIVATE;
> +	mr->pagemap.range.start = res->start;
> +	mr->pagemap.range.end = res->end;
> +	mr->pagemap.nr_range = 1;
> +	mr->pagemap.ops = drm_gpusvm_pagemap_ops_get();
> +	mr->pagemap.owner = xe_svm_devm_owner(xe);
> +	addr = devm_memremap_pages(dev, &mr->pagemap);
> +	if (IS_ERR(addr)) {
> +		devm_release_mem_region(dev, res->start,
> resource_size(res));
> +		ret = PTR_ERR(addr);
> +		drm_err(&xe->drm, "Failed to remap tile %d memory,
> errno %d\n",
> +				tile->id, ret);
> +		return ret;
> +	}
> +	mr->hpa_base = res->start;
> +
> +	drm_info(&xe->drm, "Added tile %d memory [%llx-%llx] to
> devm, remapped to %pr\n",
> +		 tile->id, mr->io_start, mr->io_start + mr-
> >usable_size, res);
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/xe/xe_svm.h
> b/drivers/gpu/drm/xe/xe_svm.h
> index 472fbc51f30e..760d22cefb1e 100644
> --- a/drivers/gpu/drm/xe/xe_svm.h
> +++ b/drivers/gpu/drm/xe/xe_svm.h
> @@ -11,6 +11,7 @@
>  
>  #define XE_INTERCONNECT_VRAM DRM_INTERCONNECT_DRIVER
>  
> +struct xe_mem_region;
>  struct xe_tile;
>  struct xe_vm;
>  struct xe_vma;
> @@ -22,6 +23,8 @@ struct xe_svm_range {
>  	u8 tile_invalidated;
>  };
>  
> +int xe_devm_add(struct xe_tile *tile, struct xe_mem_region *mr);
> +
>  int xe_svm_init(struct xe_vm *vm);
>  void xe_svm_fini(struct xe_vm *vm);
>  void xe_svm_close(struct xe_vm *vm);
> diff --git a/drivers/gpu/drm/xe/xe_tile.c
> b/drivers/gpu/drm/xe/xe_tile.c
> index 07cf7cfe4abd..bbb430392dfb 100644
> --- a/drivers/gpu/drm/xe/xe_tile.c
> +++ b/drivers/gpu/drm/xe/xe_tile.c
> @@ -13,6 +13,7 @@
>  #include "xe_migrate.h"
>  #include "xe_pcode.h"
>  #include "xe_sa.h"
> +#include "xe_svm.h"
>  #include "xe_tile.h"
>  #include "xe_tile_sysfs.h"
>  #include "xe_ttm_vram_mgr.h"
> @@ -164,6 +165,7 @@ static int tile_ttm_mgr_init(struct xe_tile
> *tile)
>   */
>  int xe_tile_init_noalloc(struct xe_tile *tile)
>  {
> +	struct xe_device *xe = tile_to_xe(tile);
>  	int err;
>  
>  	err = tile_ttm_mgr_init(tile);
> @@ -176,6 +178,9 @@ int xe_tile_init_noalloc(struct xe_tile *tile)
>  
>  	xe_wa_apply_tile_workarounds(tile);
>  
> +	if (xe->info.has_usm && IS_DGFX(xe))
> +		xe_devm_add(tile, &tile->mem.vram);
> +
>  	err = xe_tile_sysfs_init(tile);
>  
>  	return 0;
Gwan-gyeong Mun Nov. 20, 2024, 3:05 a.m. UTC | #2
On 10/16/24 6:25 AM, Matthew Brost wrote:
> +/**
> + * xe_devm_add: Remap and provide memmap backing for device memory
> + * @tile: tile that the memory region belongs to
> + * @mr: memory region to remap
> + *
> + * This remap device memory to host physical address space and create
> + * struct page to back device memory
> + *
> + * Return: 0 on success standard error code otherwise
> + */
> +int xe_devm_add(struct xe_tile *tile, struct xe_mem_region *mr)
> +{
> +	struct xe_device *xe = tile_to_xe(tile);
> +	struct device *dev = &to_pci_dev(xe->drm.dev)->dev;
> +	struct resource *res;
> +	void *addr;
> +	int ret;
> +
> +	res = devm_request_free_mem_region(dev, &iomem_resource,
> +					   mr->usable_size);
To use the devm_request_free_mem_region() function, 
CONFIG_GET_FREE_REGION=y in config.
xe's kconfig need to have CONFIG_GET_FREE_REGION dependency.
> +	if (IS_ERR(res)) {
> +		ret = PTR_ERR(res);
> +		return ret;
> +	}
> +
> +	mr->pagemap.type = MEMORY_DEVICE_PRIVATE;
> +	mr->pagemap.range.start = res->start;
> +	mr->pagemap.range.end = res->end;
> +	mr->pagemap.nr_range = 1;
> +	mr->pagemap.ops = drm_gpusvm_pagemap_ops_get();
> +	mr->pagemap.owner = xe_svm_devm_owner(xe);
> +	addr = devm_memremap_pages(dev, &mr->pagemap);
> +	if (IS_ERR(addr)) {
> +		devm_release_mem_region(dev, res->start, resource_size(res));
> +		ret = PTR_ERR(addr);
> +		drm_err(&xe->drm, "Failed to remap tile %d memory, errno %d\n",
> +				tile->id, ret);
> +		return ret;
> +	}
> +	mr->hpa_base = res->start;
> +
> +	drm_info(&xe->drm, "Added tile %d memory [%llx-%llx] to devm, remapped to %pr\n",
> +		 tile->id, mr->io_start, mr->io_start + mr->usable_size, res);
> +	return 0;
> +}
diff mbox series

Patch

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 85bede4dd646..2ac5de7751c9 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -104,6 +104,14 @@  struct xe_mem_region {
 	resource_size_t actual_physical_size;
 	/** @mapping: pointer to VRAM mappable space */
 	void __iomem *mapping;
+	/** @pagemap: Used to remap device memory as ZONE_DEVICE */
+	struct dev_pagemap pagemap;
+	/**
+	 * @hpa_base: base host physical address
+	 *
+	 * This is generated when remap device memory as ZONE_DEVICE
+	 */
+	resource_size_t hpa_base;
 };
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 1d8021b4e2f0..22e6341117bd 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -21,6 +21,11 @@  static struct xe_vm *range_to_vm(struct drm_gpusvm_range *r)
 	return gpusvm_to_vm(r->gpusvm);
 }
 
+static void *xe_svm_devm_owner(struct xe_device *xe)
+{
+	return xe;
+}
+
 static struct drm_gpusvm_range *
 xe_svm_range_alloc(struct drm_gpusvm *gpusvm)
 {
@@ -284,8 +289,9 @@  int xe_svm_init(struct xe_vm *vm)
 		  xe_svm_garbage_collector_work_func);
 
 	return drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm,
-			       current->mm, NULL, 0, vm->size,
-			       SZ_512M, &gpusvm_ops, fault_chunk_sizes,
+			       current->mm, xe_svm_devm_owner(vm->xe), 0,
+			       vm->size, SZ_512M, &gpusvm_ops,
+			       fault_chunk_sizes,
 			       ARRAY_SIZE(fault_chunk_sizes));
 }
 
@@ -383,3 +389,49 @@  bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end)
 {
 	return drm_gpusvm_has_mapping(&vm->svm.gpusvm, start, end);
 }
+
+/**
+ * xe_devm_add: Remap and provide memmap backing for device memory
+ * @tile: tile that the memory region belongs to
+ * @mr: memory region to remap
+ *
+ * This remap device memory to host physical address space and create
+ * struct page to back device memory
+ *
+ * Return: 0 on success standard error code otherwise
+ */
+int xe_devm_add(struct xe_tile *tile, struct xe_mem_region *mr)
+{
+	struct xe_device *xe = tile_to_xe(tile);
+	struct device *dev = &to_pci_dev(xe->drm.dev)->dev;
+	struct resource *res;
+	void *addr;
+	int ret;
+
+	res = devm_request_free_mem_region(dev, &iomem_resource,
+					   mr->usable_size);
+	if (IS_ERR(res)) {
+		ret = PTR_ERR(res);
+		return ret;
+	}
+
+	mr->pagemap.type = MEMORY_DEVICE_PRIVATE;
+	mr->pagemap.range.start = res->start;
+	mr->pagemap.range.end = res->end;
+	mr->pagemap.nr_range = 1;
+	mr->pagemap.ops = drm_gpusvm_pagemap_ops_get();
+	mr->pagemap.owner = xe_svm_devm_owner(xe);
+	addr = devm_memremap_pages(dev, &mr->pagemap);
+	if (IS_ERR(addr)) {
+		devm_release_mem_region(dev, res->start, resource_size(res));
+		ret = PTR_ERR(addr);
+		drm_err(&xe->drm, "Failed to remap tile %d memory, errno %d\n",
+				tile->id, ret);
+		return ret;
+	}
+	mr->hpa_base = res->start;
+
+	drm_info(&xe->drm, "Added tile %d memory [%llx-%llx] to devm, remapped to %pr\n",
+		 tile->id, mr->io_start, mr->io_start + mr->usable_size, res);
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index 472fbc51f30e..760d22cefb1e 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -11,6 +11,7 @@ 
 
 #define XE_INTERCONNECT_VRAM DRM_INTERCONNECT_DRIVER
 
+struct xe_mem_region;
 struct xe_tile;
 struct xe_vm;
 struct xe_vma;
@@ -22,6 +23,8 @@  struct xe_svm_range {
 	u8 tile_invalidated;
 };
 
+int xe_devm_add(struct xe_tile *tile, struct xe_mem_region *mr);
+
 int xe_svm_init(struct xe_vm *vm);
 void xe_svm_fini(struct xe_vm *vm);
 void xe_svm_close(struct xe_vm *vm);
diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c
index 07cf7cfe4abd..bbb430392dfb 100644
--- a/drivers/gpu/drm/xe/xe_tile.c
+++ b/drivers/gpu/drm/xe/xe_tile.c
@@ -13,6 +13,7 @@ 
 #include "xe_migrate.h"
 #include "xe_pcode.h"
 #include "xe_sa.h"
+#include "xe_svm.h"
 #include "xe_tile.h"
 #include "xe_tile_sysfs.h"
 #include "xe_ttm_vram_mgr.h"
@@ -164,6 +165,7 @@  static int tile_ttm_mgr_init(struct xe_tile *tile)
  */
 int xe_tile_init_noalloc(struct xe_tile *tile)
 {
+	struct xe_device *xe = tile_to_xe(tile);
 	int err;
 
 	err = tile_ttm_mgr_init(tile);
@@ -176,6 +178,9 @@  int xe_tile_init_noalloc(struct xe_tile *tile)
 
 	xe_wa_apply_tile_workarounds(tile);
 
+	if (xe->info.has_usm && IS_DGFX(xe))
+		xe_devm_add(tile, &tile->mem.vram);
+
 	err = xe_tile_sysfs_init(tile);
 
 	return 0;