diff mbox series

[v8,3/6] drm/ttm: Use fault-injection to test error paths

Message ID 20240816133717.3102-4-thomas.hellstrom@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series TTM shrinker helpers and xe buffer object shrinker | expand

Commit Message

Thomas Hellstrom Aug. 16, 2024, 1:37 p.m. UTC
Use fault-injection to test partial TTM swapout and interrupted swapin.
Return -EINTR for swapin to test the callers ability to handle and
restart the swapin, and on swapout perform a partial swapout to test that
the swapin and release_shrunken functionality.

v8:
- Use the core fault-injection system.

Cc: Christian König <christian.koenig@amd.com>
Cc: Somalapuram Amaranath <Amaranath.Somalapuram@amd.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: <dri-devel@lists.freedesktop.org>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com> #v7
---
 drivers/gpu/drm/ttm/ttm_pool.c | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

Comments

kernel test robot Aug. 16, 2024, 4:32 p.m. UTC | #1
Hi Thomas,

kernel test robot noticed the following build errors:

[auto build test ERROR on next-20240816]
[also build test ERROR on v6.11-rc3]
[cannot apply to drm-xe/drm-xe-next linus/master v6.11-rc3 v6.11-rc2 v6.11-rc1]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Thomas-Hellstr-m/drm-ttm-Add-a-virtual-base-class-for-graphics-memory-backup/20240816-213947
base:   next-20240816
patch link:    https://lore.kernel.org/r/20240816133717.3102-4-thomas.hellstrom%40linux.intel.com
patch subject: [PATCH v8 3/6] drm/ttm: Use fault-injection to test error paths
config: x86_64-defconfig (https://download.01.org/0day-ci/archive/20240817/202408170041.l5SO7IpQ-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-12) 11.3.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240817/202408170041.l5SO7IpQ-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202408170041.l5SO7IpQ-lkp@intel.com/

All errors (new ones prefixed by >>):

   drivers/gpu/drm/ttm/ttm_pool.c: In function 'ttm_pool_restore_tt':
>> drivers/gpu/drm/ttm/ttm_pool.c:456:29: error: implicit declaration of function 'should_fail'; did you mean 'schedule_tail'? [-Werror=implicit-function-declaration]
     456 |                             should_fail(&backup_fault_inject, 1)) {
         |                             ^~~~~~~~~~~
         |                             schedule_tail
>> drivers/gpu/drm/ttm/ttm_pool.c:456:42: error: 'backup_fault_inject' undeclared (first use in this function)
     456 |                             should_fail(&backup_fault_inject, 1)) {
         |                                          ^~~~~~~~~~~~~~~~~~~
   drivers/gpu/drm/ttm/ttm_pool.c:456:42: note: each undeclared identifier is reported only once for each function it appears in
   drivers/gpu/drm/ttm/ttm_pool.c: In function 'ttm_pool_backup_tt':
   drivers/gpu/drm/ttm/ttm_pool.c:908:64: error: 'backup_fault_inject' undeclared (first use in this function)
     908 |         if (IS_ENABLED(CONFIG_FAULT_INJECTION) && should_fail(&backup_fault_inject, 1))
         |                                                                ^~~~~~~~~~~~~~~~~~~
   cc1: some warnings being treated as errors


vim +456 drivers/gpu/drm/ttm/ttm_pool.c

   434	
   435	static int ttm_pool_restore_tt(struct ttm_pool_tt_restore *restore,
   436				       struct ttm_backup *backup,
   437				       struct ttm_operation_ctx *ctx)
   438	{
   439		static unsigned long __maybe_unused swappedin;
   440		unsigned int i, nr = 1 << restore->order;
   441		int ret = 0;
   442	
   443		if (!ttm_pool_restore_valid(restore))
   444			return 0;
   445	
   446		for (i = restore->restored_pages; i < nr; ++i) {
   447			struct page *p = restore->old_pages[i];
   448	
   449			if (ttm_backup_page_ptr_is_handle(p)) {
   450				unsigned long handle = ttm_backup_page_ptr_to_handle(p);
   451	
   452				if (handle == 0)
   453					continue;
   454	
   455				if (IS_ENABLED(CONFIG_FAULT_INJECTION) && ctx->interruptible &&
 > 456				    should_fail(&backup_fault_inject, 1)) {
   457					ret = -EINTR;
   458					break;
   459				}
   460	
   461				ret = backup->ops->copy_backed_up_page
   462					(backup, restore->first_page[i],
   463					 handle, ctx->interruptible);
   464				if (ret)
   465					break;
   466	
   467				backup->ops->drop(backup, handle);
   468			} else if (p) {
   469				/*
   470				 * We could probably avoid splitting the old page
   471				 * using clever logic, but ATM we don't care.
   472				 */
   473				ttm_pool_split_for_swap(restore->pool, p);
   474				copy_highpage(restore->first_page[i], p);
   475				__free_pages(p, 0);
   476			}
   477	
   478			restore->restored_pages++;
   479			restore->old_pages[i] = NULL;
   480			cond_resched();
   481		}
   482	
   483		return ret;
   484	}
   485
kernel test robot Aug. 16, 2024, 5:35 p.m. UTC | #2
Hi Thomas,

kernel test robot noticed the following build errors:

[auto build test ERROR on next-20240816]
[also build test ERROR on v6.11-rc3]
[cannot apply to drm-xe/drm-xe-next linus/master v6.11-rc3 v6.11-rc2 v6.11-rc1]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Thomas-Hellstr-m/drm-ttm-Add-a-virtual-base-class-for-graphics-memory-backup/20240816-213947
base:   next-20240816
patch link:    https://lore.kernel.org/r/20240816133717.3102-4-thomas.hellstrom%40linux.intel.com
patch subject: [PATCH v8 3/6] drm/ttm: Use fault-injection to test error paths
config: x86_64-rhel-8.3-rust (https://download.01.org/0day-ci/archive/20240817/202408170144.N7YoWTCX-lkp@intel.com/config)
compiler: clang version 18.1.5 (https://github.com/llvm/llvm-project 617a15a9eac96088ae5e9134248d8236e34b91b1)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240817/202408170144.N7YoWTCX-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202408170144.N7YoWTCX-lkp@intel.com/

All errors (new ones prefixed by >>):

>> drivers/gpu/drm/ttm/ttm_pool.c:456:8: error: call to undeclared function 'should_fail'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
     456 |                             should_fail(&backup_fault_inject, 1)) {
         |                             ^
>> drivers/gpu/drm/ttm/ttm_pool.c:456:21: error: use of undeclared identifier 'backup_fault_inject'
     456 |                             should_fail(&backup_fault_inject, 1)) {
         |                                          ^
   drivers/gpu/drm/ttm/ttm_pool.c:908:44: error: call to undeclared function 'should_fail'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
     908 |         if (IS_ENABLED(CONFIG_FAULT_INJECTION) && should_fail(&backup_fault_inject, 1))
         |                                                   ^
   drivers/gpu/drm/ttm/ttm_pool.c:908:57: error: use of undeclared identifier 'backup_fault_inject'
     908 |         if (IS_ENABLED(CONFIG_FAULT_INJECTION) && should_fail(&backup_fault_inject, 1))
         |                                                                ^
   4 errors generated.


vim +/should_fail +456 drivers/gpu/drm/ttm/ttm_pool.c

   434	
   435	static int ttm_pool_restore_tt(struct ttm_pool_tt_restore *restore,
   436				       struct ttm_backup *backup,
   437				       struct ttm_operation_ctx *ctx)
   438	{
   439		static unsigned long __maybe_unused swappedin;
   440		unsigned int i, nr = 1 << restore->order;
   441		int ret = 0;
   442	
   443		if (!ttm_pool_restore_valid(restore))
   444			return 0;
   445	
   446		for (i = restore->restored_pages; i < nr; ++i) {
   447			struct page *p = restore->old_pages[i];
   448	
   449			if (ttm_backup_page_ptr_is_handle(p)) {
   450				unsigned long handle = ttm_backup_page_ptr_to_handle(p);
   451	
   452				if (handle == 0)
   453					continue;
   454	
   455				if (IS_ENABLED(CONFIG_FAULT_INJECTION) && ctx->interruptible &&
 > 456				    should_fail(&backup_fault_inject, 1)) {
   457					ret = -EINTR;
   458					break;
   459				}
   460	
   461				ret = backup->ops->copy_backed_up_page
   462					(backup, restore->first_page[i],
   463					 handle, ctx->interruptible);
   464				if (ret)
   465					break;
   466	
   467				backup->ops->drop(backup, handle);
   468			} else if (p) {
   469				/*
   470				 * We could probably avoid splitting the old page
   471				 * using clever logic, but ATM we don't care.
   472				 */
   473				ttm_pool_split_for_swap(restore->pool, p);
   474				copy_highpage(restore->first_page[i], p);
   475				__free_pages(p, 0);
   476			}
   477	
   478			restore->restored_pages++;
   479			restore->old_pages[i] = NULL;
   480			cond_resched();
   481		}
   482	
   483		return ret;
   484	}
   485
diff mbox series

Patch

diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 0d224cd9f8eb..0824b66a9aac 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -48,6 +48,11 @@ 
 
 #include "ttm_module.h"
 
+#ifdef CONFIG_FAULT_INJECTION
+#include <linux/fault-inject.h>
+static DECLARE_FAULT_ATTR(backup_fault_inject);
+#endif
+
 /**
  * struct ttm_pool_dma - Helper object for coherent DMA mappings
  *
@@ -431,6 +436,7 @@  static int ttm_pool_restore_tt(struct ttm_pool_tt_restore *restore,
 			       struct ttm_backup *backup,
 			       struct ttm_operation_ctx *ctx)
 {
+	static unsigned long __maybe_unused swappedin;
 	unsigned int i, nr = 1 << restore->order;
 	int ret = 0;
 
@@ -446,6 +452,12 @@  static int ttm_pool_restore_tt(struct ttm_pool_tt_restore *restore,
 			if (handle == 0)
 				continue;
 
+			if (IS_ENABLED(CONFIG_FAULT_INJECTION) && ctx->interruptible &&
+			    should_fail(&backup_fault_inject, 1)) {
+				ret = -EINTR;
+				break;
+			}
+
 			ret = backup->ops->copy_backed_up_page
 				(backup, restore->first_page[i],
 				 handle, ctx->interruptible);
@@ -889,7 +901,14 @@  long ttm_pool_backup_tt(struct ttm_pool *pool, struct ttm_tt *ttm,
 
 	alloc_gfp = GFP_KERNEL | __GFP_HIGH | __GFP_NOWARN | __GFP_RETRY_MAYFAIL;
 
-	for (i = 0; i < ttm->num_pages; ++i) {
+	num_pages = ttm->num_pages;
+
+	/* Pretend doing fault injection by shrinking only half of the pages. */
+
+	if (IS_ENABLED(CONFIG_FAULT_INJECTION) && should_fail(&backup_fault_inject, 1))
+		num_pages = DIV_ROUND_UP(num_pages, 2);
+
+	for (i = 0; i < num_pages; ++i) {
 		page = ttm->pages[i];
 		if (unlikely(!page))
 			continue;
@@ -1178,6 +1197,10 @@  int ttm_pool_mgr_init(unsigned long num_pages)
 			    &ttm_pool_debugfs_globals_fops);
 	debugfs_create_file("page_pool_shrink", 0400, ttm_debugfs_root, NULL,
 			    &ttm_pool_debugfs_shrink_fops);
+#ifdef CONFIG_FAULT_INJECTION
+	fault_create_debugfs_attr("backup_fault_inject", ttm_debugfs_root,
+				  &backup_fault_inject);
+#endif
 #endif
 
 	mm_shrinker = shrinker_alloc(0, "drm-ttm_pool");