diff mbox series

[1/2] drm/ttm: prevent moving of pinned BOs

Message ID 20230111114256.72669-1-christian.koenig@amd.com (mailing list archive)
State New, archived
Headers show
Series [1/2] drm/ttm: prevent moving of pinned BOs | expand

Commit Message

Christian König Jan. 11, 2023, 11:42 a.m. UTC
We have checks for this in the individual drivers move callback, but
it's probably better to generally forbit that on a higher level.

Also stops exporting ttm_resource_compat() since that's not necessary
any more after removing the extra checks in vmwgfx.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  4 ----
 drivers/gpu/drm/nouveau/nouveau_bo.c    |  3 ---
 drivers/gpu/drm/radeon/radeon_ttm.c     |  4 ----
 drivers/gpu/drm/ttm/ttm_bo.c            | 20 ++++++++++++--------
 drivers/gpu/drm/ttm/ttm_resource.c      |  1 -
 drivers/gpu/drm/vmwgfx/vmwgfx_bo.c      | 19 ++-----------------
 6 files changed, 14 insertions(+), 37 deletions(-)

Comments

Matthew Auld Jan. 11, 2023, 1:17 p.m. UTC | #1
On Wed, 11 Jan 2023 at 11:43, Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> We have checks for this in the individual drivers move callback, but
> it's probably better to generally forbit that on a higher level.
>
> Also stops exporting ttm_resource_compat() since that's not necessary
> any more after removing the extra checks in vmwgfx.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  4 ----
>  drivers/gpu/drm/nouveau/nouveau_bo.c    |  3 ---
>  drivers/gpu/drm/radeon/radeon_ttm.c     |  4 ----
>  drivers/gpu/drm/ttm/ttm_bo.c            | 20 ++++++++++++--------
>  drivers/gpu/drm/ttm/ttm_resource.c      |  1 -
>  drivers/gpu/drm/vmwgfx/vmwgfx_bo.c      | 19 ++-----------------
>  6 files changed, 14 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 068c2d8495fd..677cd7d91687 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -466,11 +466,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
>                         return r;
>         }
>
> -       /* Can't move a pinned BO */
>         abo = ttm_to_amdgpu_bo(bo);
> -       if (WARN_ON_ONCE(abo->tbo.pin_count > 0))
> -               return -EINVAL;
> -
>         adev = amdgpu_ttm_adev(bo->bdev);
>
>         if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM &&
> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
> index 288eebc70a67..c2ec91cc845d 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
> @@ -1015,9 +1015,6 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict,
>         if (ret)
>                 goto out_ntfy;
>
> -       if (nvbo->bo.pin_count)
> -               NV_WARN(drm, "Moving pinned object %p!\n", nvbo);
> -
>         if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA) {
>                 ret = nouveau_bo_vm_bind(bo, new_reg, &new_tile);
>                 if (ret)
> diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
> index 1e8e287e113c..67075c85f847 100644
> --- a/drivers/gpu/drm/radeon/radeon_ttm.c
> +++ b/drivers/gpu/drm/radeon/radeon_ttm.c
> @@ -211,11 +211,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
>         if (r)
>                 return r;
>
> -       /* Can't move a pinned BO */
>         rbo = container_of(bo, struct radeon_bo, tbo);
> -       if (WARN_ON_ONCE(rbo->tbo.pin_count > 0))
> -               return -EINVAL;
> -
>         rdev = radeon_get_rdev(bo->bdev);
>         if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
>                 ttm_bo_move_null(bo, new_mem);
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 326a3d13a829..9baccb2f6e99 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -894,14 +894,18 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
>         if (!placement->num_placement && !placement->num_busy_placement)
>                 return ttm_bo_pipeline_gutting(bo);
>
> -       /*
> -        * Check whether we need to move buffer.
> -        */
> -       if (!bo->resource || !ttm_resource_compat(bo->resource, placement)) {
> -               ret = ttm_bo_move_buffer(bo, placement, ctx);
> -               if (ret)
> -                       return ret;
> -       }
> +       /* Check whether we need to move buffer. */
> +       if (bo->resource && ttm_resource_compat(bo->resource, placement))
> +               return 0;

Note this now skips the tt create below (intentional?). I think i915
needed that, since it creates a dummy system resource initially for
all objects, and then relies on ZERO_ALLOC being set for certain
objects to know if the memory needs to be cleared or not when later
doing the dummy -> vram. Thoughts?

> +
> +       /* Moving of pinned BOs is forbidden */
> +       if (bo->pin_count)
> +               return -EINVAL;
> +
> +       ret = ttm_bo_move_buffer(bo, placement, ctx);
> +       if (ret)
> +               return ret;
> +
>         /*
>          * We might need to add a TTM.
>          */
> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
> index b8a826a24fb2..7333f7a87a2f 100644
> --- a/drivers/gpu/drm/ttm/ttm_resource.c
> +++ b/drivers/gpu/drm/ttm/ttm_resource.c
> @@ -361,7 +361,6 @@ bool ttm_resource_compat(struct ttm_resource *res,
>
>         return false;
>  }
> -EXPORT_SYMBOL(ttm_resource_compat);
>
>  void ttm_resource_set_bo(struct ttm_resource *res,
>                          struct ttm_buffer_object *bo)
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
> index 321c551784a1..dbcef460c452 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
> @@ -87,12 +87,7 @@ int vmw_bo_pin_in_placement(struct vmw_private *dev_priv,
>         if (unlikely(ret != 0))
>                 goto err;
>
> -       if (buf->base.pin_count > 0)
> -               ret = ttm_resource_compat(bo->resource, placement)
> -                       ? 0 : -EINVAL;
> -       else
> -               ret = ttm_bo_validate(bo, placement, &ctx);
> -
> +       ret = ttm_bo_validate(bo, placement, &ctx);
>         if (!ret)
>                 vmw_bo_pin_reserved(buf, true);
>
> @@ -128,12 +123,6 @@ int vmw_bo_pin_in_vram_or_gmr(struct vmw_private *dev_priv,
>         if (unlikely(ret != 0))
>                 goto err;
>
> -       if (buf->base.pin_count > 0) {
> -               ret = ttm_resource_compat(bo->resource, &vmw_vram_gmr_placement)
> -                       ? 0 : -EINVAL;
> -               goto out_unreserve;
> -       }
> -
>         ret = ttm_bo_validate(bo, &vmw_vram_gmr_placement, &ctx);
>         if (likely(ret == 0) || ret == -ERESTARTSYS)
>                 goto out_unreserve;
> @@ -218,11 +207,7 @@ int vmw_bo_pin_in_start_of_vram(struct vmw_private *dev_priv,
>                 (void) ttm_bo_validate(bo, &vmw_sys_placement, &ctx);
>         }
>
> -       if (buf->base.pin_count > 0)
> -               ret = ttm_resource_compat(bo->resource, &placement)
> -                       ? 0 : -EINVAL;
> -       else
> -               ret = ttm_bo_validate(bo, &placement, &ctx);
> +       ret = ttm_bo_validate(bo, &placement, &ctx);
>
>         /* For some reason we didn't end up at the start of vram */
>         WARN_ON(ret == 0 && bo->resource->start != 0);
> --
> 2.34.1
>
kernel test robot Jan. 11, 2023, 2:09 p.m. UTC | #2
Hi Christian,

I love your patch! Perhaps something to improve:

[auto build test WARNING on drm-misc/drm-misc-next]
[also build test WARNING on drm-intel/for-linux-next drm-intel/for-linux-next-fixes linus/master v6.2-rc3 next-20230111]
[cannot apply to drm-tip/drm-tip]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Christian-K-nig/drm-ttm-replace-busy-placement-with-flags/20230111-194408
base:   git://anongit.freedesktop.org/drm/drm-misc drm-misc-next
patch link:    https://lore.kernel.org/r/20230111114256.72669-1-christian.koenig%40amd.com
patch subject: [Intel-gfx] [PATCH 1/2] drm/ttm: prevent moving of pinned BOs
config: ia64-allyesconfig
compiler: ia64-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/303ff8f52b95005f1da7386c72a3907441e8de28
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Christian-K-nig/drm-ttm-replace-busy-placement-with-flags/20230111-194408
        git checkout 303ff8f52b95005f1da7386c72a3907441e8de28
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=ia64 olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=ia64 SHELL=/bin/bash drivers/gpu/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   drivers/gpu/drm/radeon/radeon_ttm.c: In function 'radeon_bo_move':
>> drivers/gpu/drm/radeon/radeon_ttm.c:201:27: warning: variable 'rbo' set but not used [-Wunused-but-set-variable]
     201 |         struct radeon_bo *rbo;
         |                           ^~~


vim +/rbo +201 drivers/gpu/drm/radeon/radeon_ttm.c

771fe6b912fca5 Jerome Glisse   2009-06-05  193  
2823f4f019d888 Christian König 2017-04-26  194  static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
2823f4f019d888 Christian König 2017-04-26  195  			  struct ttm_operation_ctx *ctx,
ebdf565169af00 Dave Airlie     2020-10-29  196  			  struct ttm_resource *new_mem,
ebdf565169af00 Dave Airlie     2020-10-29  197  			  struct ttm_place *hop)
771fe6b912fca5 Jerome Glisse   2009-06-05  198  {
d3116756a710e3 Christian König 2021-04-12  199  	struct ttm_resource *old_mem = bo->resource;
771fe6b912fca5 Jerome Glisse   2009-06-05  200  	struct radeon_device *rdev;
e1a575ada8d2a3 Michel Dänzer   2016-03-28 @201  	struct radeon_bo *rbo;
a32ba6bdca21fd Christian König 2021-07-12  202  	int r;
771fe6b912fca5 Jerome Glisse   2009-06-05  203  
bfe5e585b44fb8 Dave Airlie     2020-10-20  204  	if (new_mem->mem_type == TTM_PL_TT) {
bfe5e585b44fb8 Dave Airlie     2020-10-20  205  		r = radeon_ttm_tt_bind(bo->bdev, bo->ttm, new_mem);
bfe5e585b44fb8 Dave Airlie     2020-10-20  206  		if (r)
bfe5e585b44fb8 Dave Airlie     2020-10-20  207  			return r;
bfe5e585b44fb8 Dave Airlie     2020-10-20  208  	}
6d820003295977 Dave Airlie     2020-10-20  209  
0ef1ed813e6b13 Dave Airlie     2020-09-23  210  	r = ttm_bo_wait_ctx(bo, ctx);
88932a7be27d89 Christian König 2016-06-06  211  	if (r)
9afdda82ee7f69 Christian König 2020-11-25  212  		return r;
88932a7be27d89 Christian König 2016-06-06  213  
e1a575ada8d2a3 Michel Dänzer   2016-03-28  214  	rbo = container_of(bo, struct radeon_bo, tbo);
771fe6b912fca5 Jerome Glisse   2009-06-05  215  	rdev = radeon_get_rdev(bo->bdev);
771fe6b912fca5 Jerome Glisse   2009-06-05  216  	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
ecfe6953fa0011 Dave Airlie     2020-09-08  217  		ttm_bo_move_null(bo, new_mem);
9afdda82ee7f69 Christian König 2020-11-25  218  		goto out;
771fe6b912fca5 Jerome Glisse   2009-06-05  219  	}
51e50e54220432 Dave Airlie     2020-09-24  220  	if (old_mem->mem_type == TTM_PL_SYSTEM &&
51e50e54220432 Dave Airlie     2020-09-24  221  	    new_mem->mem_type == TTM_PL_TT) {
ecfe6953fa0011 Dave Airlie     2020-09-08  222  		ttm_bo_move_null(bo, new_mem);
9afdda82ee7f69 Christian König 2020-11-25  223  		goto out;
771fe6b912fca5 Jerome Glisse   2009-06-05  224  	}
51e50e54220432 Dave Airlie     2020-09-24  225  
51e50e54220432 Dave Airlie     2020-09-24  226  	if (old_mem->mem_type == TTM_PL_TT &&
c37d951cb42aa3 Dave Airlie     2020-10-19  227  	    new_mem->mem_type == TTM_PL_SYSTEM) {
29a1d482e4044a Dave Airlie     2020-10-20  228  		radeon_ttm_tt_unbind(bo->bdev, bo->ttm);
bfa3357ef9abc9 Christian König 2021-04-15  229  		ttm_resource_free(bo, &bo->resource);
c37d951cb42aa3 Dave Airlie     2020-10-19  230  		ttm_bo_assign_mem(bo, new_mem);
9afdda82ee7f69 Christian König 2020-11-25  231  		goto out;
c37d951cb42aa3 Dave Airlie     2020-10-19  232  	}
9afdda82ee7f69 Christian König 2020-11-25  233  	if (rdev->ring[radeon_copy_ring_index(rdev)].ready &&
9afdda82ee7f69 Christian König 2020-11-25  234  	    rdev->asic->copy.copy != NULL) {
9afdda82ee7f69 Christian König 2020-11-25  235  		if ((old_mem->mem_type == TTM_PL_SYSTEM &&
9afdda82ee7f69 Christian König 2020-11-25  236  		     new_mem->mem_type == TTM_PL_VRAM) ||
9afdda82ee7f69 Christian König 2020-11-25  237  		    (old_mem->mem_type == TTM_PL_VRAM &&
9afdda82ee7f69 Christian König 2020-11-25  238  		     new_mem->mem_type == TTM_PL_SYSTEM)) {
9afdda82ee7f69 Christian König 2020-11-25  239  			hop->fpfn = 0;
9afdda82ee7f69 Christian König 2020-11-25  240  			hop->lpfn = 0;
9afdda82ee7f69 Christian König 2020-11-25  241  			hop->mem_type = TTM_PL_TT;
9afdda82ee7f69 Christian König 2020-11-25  242  			hop->flags = 0;
9afdda82ee7f69 Christian König 2020-11-25  243  			return -EMULTIHOP;
771fe6b912fca5 Jerome Glisse   2009-06-05  244  		}
771fe6b912fca5 Jerome Glisse   2009-06-05  245  
28a68f82826675 Dave Airlie     2020-10-29  246  		r = radeon_move_blit(bo, evict, new_mem, old_mem);
9afdda82ee7f69 Christian König 2020-11-25  247  	} else {
9afdda82ee7f69 Christian König 2020-11-25  248  		r = -ENODEV;
9afdda82ee7f69 Christian König 2020-11-25  249  	}
9afdda82ee7f69 Christian König 2020-11-25  250  
1ab2e1059916b9 Michel Dänzer   2009-07-28  251  	if (r) {
3e98d829ad0a59 Roger He        2017-12-08  252  		r = ttm_bo_move_memcpy(bo, ctx, new_mem);
9afdda82ee7f69 Christian König 2020-11-25  253  		if (r)
9afdda82ee7f69 Christian König 2020-11-25  254  			return r;
67e8e3f970ad74 Marek Olšák     2014-03-02  255  	}
67e8e3f970ad74 Marek Olšák     2014-03-02  256  
9afdda82ee7f69 Christian König 2020-11-25  257  out:
67e8e3f970ad74 Marek Olšák     2014-03-02  258  	/* update statistics */
e11bfb99d6ece2 Christian König 2020-12-09  259  	atomic64_add(bo->base.size, &rdev->num_bytes_moved);
a32ba6bdca21fd Christian König 2021-07-12  260  	radeon_bo_move_notify(bo);
67e8e3f970ad74 Marek Olšák     2014-03-02  261  	return 0;
67e8e3f970ad74 Marek Olšák     2014-03-02  262  }
771fe6b912fca5 Jerome Glisse   2009-06-05  263
Yujie Liu Jan. 15, 2023, 4:14 p.m. UTC | #3
Greeting,

FYI, we noticed BUG:kernel_NULL_pointer_dereference,address due to commit (built with gcc-11):

commit: 303ff8f52b95005f1da7386c72a3907441e8de28 ("[Intel-gfx] [PATCH 1/2] drm/ttm: prevent moving of pinned BOs")
url: https://github.com/intel-lab-lkp/linux/commits/Christian-K-nig/drm-ttm-replace-busy-placement-with-flags/20230111-194408
base: git://anongit.freedesktop.org/drm/drm-misc drm-misc-next
patch link: https://lore.kernel.org/all/20230111114256.72669-1-christian.koenig@amd.com/
patch subject: [Intel-gfx] [PATCH 1/2] drm/ttm: prevent moving of pinned BOs

in testcase: boot

on test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G

caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):


[    9.020473][  T186] BUG: kernel NULL pointer dereference, address: 000000000000000c
[    9.020476][  T186] #PF: supervisor read access in kernel mode
[    9.020478][  T186] #PF: error_code(0x0000) - not-present page
[    9.020479][  T186] PGD 0 P4D 0
[    9.020482][  T186] Oops: 0000 [#1] SMP PTI
[    9.020485][  T186] CPU: 1 PID: 186 Comm: systemd-udevd Not tainted 6.1.0-rc6-01478-g303ff8f52b95 #1
[    9.020488][  T186] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-debian-1.16.0-5 04/01/2014
[ 9.020489][ T186] RIP: 0010:ttm_bo_move_memcpy (drivers/gpu/drm/ttm/ttm_bo_util.c:186) ttm
[ 9.020499][ T186] Code: e8 1a 34 00 00 48 89 c3 48 83 f8 ea 0f 84 ad 00 00 00 48 81 fb 00 f0 ff ff 0f 87 e3 00 00 00 48 8b 03 80 78 10 00 75 57 31 ff <8b> 75 0c 48 8b 14 24 48 89 d9 e8 26 fc ff ff 48 8b 03 80 78 10 00
All code
========
   0:	e8 1a 34 00 00       	callq  0x341f
   5:	48 89 c3             	mov    %rax,%rbx
   8:	48 83 f8 ea          	cmp    $0xffffffffffffffea,%rax
   c:	0f 84 ad 00 00 00    	je     0xbf
  12:	48 81 fb 00 f0 ff ff 	cmp    $0xfffffffffffff000,%rbx
  19:	0f 87 e3 00 00 00    	ja     0x102
  1f:	48 8b 03             	mov    (%rbx),%rax
  22:	80 78 10 00          	cmpb   $0x0,0x10(%rax)
  26:	75 57                	jne    0x7f
  28:	31 ff                	xor    %edi,%edi
  2a:*	8b 75 0c             	mov    0xc(%rbp),%esi		<-- trapping instruction
  2d:	48 8b 14 24          	mov    (%rsp),%rdx
  31:	48 89 d9             	mov    %rbx,%rcx
  34:	e8 26 fc ff ff       	callq  0xfffffffffffffc5f
  39:	48 8b 03             	mov    (%rbx),%rax
  3c:	80 78 10 00          	cmpb   $0x0,0x10(%rax)

Code starting with the faulting instruction
===========================================
   0:	8b 75 0c             	mov    0xc(%rbp),%esi
   3:	48 8b 14 24          	mov    (%rsp),%rdx
   7:	48 89 d9             	mov    %rbx,%rcx
   a:	e8 26 fc ff ff       	callq  0xfffffffffffffc35
   f:	48 8b 03             	mov    (%rbx),%rax
  12:	80 78 10 00          	cmpb   $0x0,0x10(%rax)
[    9.020501][  T186] RSP: 0018:ffffc9000053f658 EFLAGS: 00010246
[    9.020503][  T186] RAX: ffffffffa04d65c0 RBX: ffffc9000053f688 RCX: ffff888139697028
[    9.020505][  T186] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000001
[    9.020506][  T186] RBP: 0000000000000000 R08: 0000000000000000 R09: 00000000fd3e7fff
[    9.020507][  T186] R10: ffffc8ff053fffff R11: 0000000000000000 R12: ffff888138912800
[    9.020509][  T186] R13: ffff888139536420 R14: ffff888139697010 R15: ffff8881397dcc00
[    9.020510][  T186] FS:  00007f122420a8c0(0000) GS:ffff88842fd00000(0000) knlGS:0000000000000000
[    9.020518][  T186] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    9.020520][  T186] CR2: 000000000000000c CR3: 0000000139642000 CR4: 00000000000406e0
[    9.020521][  T186] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[    9.020522][  T186] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[    9.020523][  T186] Call Trace:
[    9.020527][  T186]  <TASK>
[ 9.020529][ T186] ? unmap_mapping_range (mm/memory.c:3602) 
[ 9.020535][ T186] ttm_bo_handle_move_mem (drivers/gpu/drm/ttm/ttm_bo.c:153) ttm
[ 9.020543][ T186] ttm_bo_validate (drivers/gpu/drm/ttm/ttm_bo.c:855 drivers/gpu/drm/ttm/ttm_bo.c:905) ttm
[ 9.020551][ T186] drm_gem_vram_pin_locked (drivers/gpu/drm/drm_gem_vram_helper.c:295) drm_vram_helper
[ 9.020557][ T186] drm_gem_vram_pin (drivers/gpu/drm/drm_gem_vram_helper.c:334) drm_vram_helper
[ 9.020562][ T186] drm_gem_vram_plane_helper_prepare_fb (drivers/gpu/drm/drm_gem_vram_helper.c:675) drm_vram_helper
[ 9.020571][ T186] drm_atomic_helper_prepare_planes (drivers/gpu/drm/drm_atomic_helper.c:2522 (discriminator 6)) drm_kms_helper
[ 9.020591][ T186] drm_atomic_helper_commit (drivers/gpu/drm/drm_atomic_helper.c:1965 drivers/gpu/drm/drm_atomic_helper.c:1942) drm_kms_helper
[ 9.020604][ T186] drm_atomic_commit (drivers/gpu/drm/drm_atomic.c:1443) drm
[ 9.020639][ T186] ? drm_plane_get_damage_clips.cold (drivers/gpu/drm/drm_atomic.c:109) drm
[ 9.020667][ T186] drm_client_modeset_commit_atomic (drivers/gpu/drm/drm_client_modeset.c:1045) drm
[ 9.020692][ T186] drm_client_modeset_commit_locked (drivers/gpu/drm/drm_client_modeset.c:1148) drm
[ 9.020715][ T186] drm_client_modeset_commit (drivers/gpu/drm/drm_client_modeset.c:1176) drm
[ 9.020738][ T186] drm_fb_helper_set_par (drivers/gpu/drm/drm_fb_helper.c:252 drivers/gpu/drm/drm_fb_helper.c:227 drivers/gpu/drm/drm_fb_helper.c:1631) drm_kms_helper
[ 9.020752][ T186] fbcon_init (drivers/video/fbdev/core/fbcon.c:1097) 
[ 9.020782][ T186] visual_init (drivers/tty/vt/vt.c:1075) 
[ 9.020785][ T186] do_bind_con_driver+0x1d0/0x300 
[ 9.020789][ T186] do_take_over_console (drivers/tty/vt/vt.c:4273) 
[ 9.020791][ T186] do_fbcon_takeover (drivers/video/fbdev/core/fbcon.c:533) 
[ 9.020793][ T186] fbcon_fb_registered (drivers/video/fbdev/core/fbcon.c:3008 drivers/video/fbdev/core/fbcon.c:3028) 
[ 9.020795][ T186] do_register_framebuffer (drivers/video/fbdev/core/fbmem.c:1598) 
[ 9.020800][ T186] register_framebuffer (drivers/video/fbdev/core/fbmem.c:1704) 
[ 9.020803][ T186] __drm_fb_helper_initial_config_and_unlock (drivers/gpu/drm/drm_fb_helper.c:2165) drm_kms_helper
[ 9.020815][ T186] drm_fbdev_client_hotplug (drivers/gpu/drm/drm_fbdev_generic.c:407) drm_kms_helper
[ 9.020827][ T186] drm_fbdev_generic_setup (drivers/gpu/drm/drm_fbdev_generic.c:492) drm_kms_helper
[ 9.020840][ T186] bochs_pci_probe (drivers/gpu/drm/tiny/bochs.c:675 drivers/gpu/drm/tiny/bochs.c:640) bochs
[ 9.020847][ T186] local_pci_probe (drivers/pci/pci-driver.c:324) 
[ 9.020852][ T186] pci_call_probe (drivers/pci/pci-driver.c:392) 
[ 9.020855][ T186] ? kernfs_create_link (fs/kernfs/symlink.c:48) 
[ 9.020860][ T186] pci_device_probe (drivers/pci/pci-driver.c:461) 
[ 9.020862][ T186] really_probe (drivers/base/dd.c:560 drivers/base/dd.c:639) 
[ 9.020865][ T186] ? kvm_clock_get_cycles (arch/x86/include/asm/preempt.h:85 arch/x86/kernel/kvmclock.c:80 arch/x86/kernel/kvmclock.c:86) 
[ 9.020869][ T186] __driver_probe_device (drivers/base/dd.c:719 drivers/base/dd.c:776) 
[ 9.020871][ T186] driver_probe_device (drivers/base/dd.c:808) 
[ 9.020873][ T186] __driver_attach (drivers/base/dd.c:1191) 
[ 9.020875][ T186] ? __device_attach_driver (drivers/base/dd.c:1135) 
[ 9.020876][ T186] ? __device_attach_driver (drivers/base/dd.c:1135) 
[ 9.020878][ T186] bus_for_each_dev (drivers/base/bus.c:301) 
[ 9.020883][ T186] bus_add_driver (drivers/base/bus.c:618) 
[ 9.020885][ T186] driver_register (drivers/base/driver.c:246) 
[    9.020888][  T186]  ? 0xffffffffa0413000
[ 9.020889][ T186] do_one_initcall (init/main.c:1303) 
[ 9.020894][ T186] ? kmalloc_trace (mm/slab_common.c:1048) 
[ 9.020897][ T186] do_init_module (kernel/module/main.c:2455) 
[ 9.020901][ T186] __do_sys_finit_module (kernel/module/main.c:2949) 
[ 9.020904][ T186] do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) 
[ 9.020909][ T186] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) 
[    9.020912][  T186] RIP: 0033:0x7f12246c39b9
[ 9.020914][ T186] Code: 00 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d a7 54 0c 00 f7 d8 64 89 01 48
All code
========
   0:	00 c3                	add    %al,%bl
   2:	66 2e 0f 1f 84 00 00 	nopw   %cs:0x0(%rax,%rax,1)
   9:	00 00 00 
   c:	0f 1f 44 00 00       	nopl   0x0(%rax,%rax,1)
  11:	48 89 f8             	mov    %rdi,%rax
  14:	48 89 f7             	mov    %rsi,%rdi
  17:	48 89 d6             	mov    %rdx,%rsi
  1a:	48 89 ca             	mov    %rcx,%rdx
  1d:	4d 89 c2             	mov    %r8,%r10
  20:	4d 89 c8             	mov    %r9,%r8
  23:	4c 8b 4c 24 08       	mov    0x8(%rsp),%r9
  28:	0f 05                	syscall 
  2a:*	48 3d 01 f0 ff ff    	cmp    $0xfffffffffffff001,%rax		<-- trapping instruction
  30:	73 01                	jae    0x33
  32:	c3                   	retq   
  33:	48 8b 0d a7 54 0c 00 	mov    0xc54a7(%rip),%rcx        # 0xc54e1
  3a:	f7 d8                	neg    %eax
  3c:	64 89 01             	mov    %eax,%fs:(%rcx)
  3f:	48                   	rex.W

Code starting with the faulting instruction
===========================================
   0:	48 3d 01 f0 ff ff    	cmp    $0xfffffffffffff001,%rax
   6:	73 01                	jae    0x9
   8:	c3                   	retq   
   9:	48 8b 0d a7 54 0c 00 	mov    0xc54a7(%rip),%rcx        # 0xc54b7
  10:	f7 d8                	neg    %eax
  12:	64 89 01             	mov    %eax,%fs:(%rcx)
  15:	48                   	rex.W


If you fix the issue, kindly add following tag
| Reported-by: kernel test robot <yujie.liu@intel.com>
| Link: https://lore.kernel.org/oe-lkp/202301152355.43107953-yujie.liu@intel.com


To reproduce:

        # build kernel
	cd linux
	cp config-6.1.0-rc6-01478-g303ff8f52b95 .config
	make HOSTCC=gcc-11 CC=gcc-11 ARCH=x86_64 olddefconfig prepare modules_prepare bzImage modules
	make HOSTCC=gcc-11 CC=gcc-11 ARCH=x86_64 INSTALL_MOD_PATH=<mod-install-dir> modules_install
	cd <mod-install-dir>
	find lib/ | cpio -o -H newc --quiet | gzip > modules.cgz


        git clone https://github.com/intel/lkp-tests.git
        cd lkp-tests
        bin/lkp qemu -k <bzImage> -m modules.cgz job-script # job-script is attached in this email

        # if come across any failure that blocks the test,
        # please remove ~/.lkp and /lkp dir to run from a clean state.
Christian König Jan. 24, 2023, 9:51 a.m. UTC | #4
Am 11.01.23 um 14:17 schrieb Matthew Auld:
> On Wed, 11 Jan 2023 at 11:43, Christian König
> <ckoenig.leichtzumerken@gmail.com> wrote:
>> We have checks for this in the individual drivers move callback, but
>> it's probably better to generally forbit that on a higher level.
>>
>> Also stops exporting ttm_resource_compat() since that's not necessary
>> any more after removing the extra checks in vmwgfx.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  4 ----
>>   drivers/gpu/drm/nouveau/nouveau_bo.c    |  3 ---
>>   drivers/gpu/drm/radeon/radeon_ttm.c     |  4 ----
>>   drivers/gpu/drm/ttm/ttm_bo.c            | 20 ++++++++++++--------
>>   drivers/gpu/drm/ttm/ttm_resource.c      |  1 -
>>   drivers/gpu/drm/vmwgfx/vmwgfx_bo.c      | 19 ++-----------------
>>   6 files changed, 14 insertions(+), 37 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index 068c2d8495fd..677cd7d91687 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -466,11 +466,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
>>                          return r;
>>          }
>>
>> -       /* Can't move a pinned BO */
>>          abo = ttm_to_amdgpu_bo(bo);
>> -       if (WARN_ON_ONCE(abo->tbo.pin_count > 0))
>> -               return -EINVAL;
>> -
>>          adev = amdgpu_ttm_adev(bo->bdev);
>>
>>          if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM &&
>> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
>> index 288eebc70a67..c2ec91cc845d 100644
>> --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
>> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
>> @@ -1015,9 +1015,6 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict,
>>          if (ret)
>>                  goto out_ntfy;
>>
>> -       if (nvbo->bo.pin_count)
>> -               NV_WARN(drm, "Moving pinned object %p!\n", nvbo);
>> -
>>          if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA) {
>>                  ret = nouveau_bo_vm_bind(bo, new_reg, &new_tile);
>>                  if (ret)
>> diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
>> index 1e8e287e113c..67075c85f847 100644
>> --- a/drivers/gpu/drm/radeon/radeon_ttm.c
>> +++ b/drivers/gpu/drm/radeon/radeon_ttm.c
>> @@ -211,11 +211,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
>>          if (r)
>>                  return r;
>>
>> -       /* Can't move a pinned BO */
>>          rbo = container_of(bo, struct radeon_bo, tbo);
>> -       if (WARN_ON_ONCE(rbo->tbo.pin_count > 0))
>> -               return -EINVAL;
>> -
>>          rdev = radeon_get_rdev(bo->bdev);
>>          if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
>>                  ttm_bo_move_null(bo, new_mem);
>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>> index 326a3d13a829..9baccb2f6e99 100644
>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>> @@ -894,14 +894,18 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
>>          if (!placement->num_placement && !placement->num_busy_placement)
>>                  return ttm_bo_pipeline_gutting(bo);
>>
>> -       /*
>> -        * Check whether we need to move buffer.
>> -        */
>> -       if (!bo->resource || !ttm_resource_compat(bo->resource, placement)) {
>> -               ret = ttm_bo_move_buffer(bo, placement, ctx);
>> -               if (ret)
>> -                       return ret;
>> -       }
>> +       /* Check whether we need to move buffer. */
>> +       if (bo->resource && ttm_resource_compat(bo->resource, placement))
>> +               return 0;
> Note this now skips the tt create below (intentional?). I think i915
> needed that, since it creates a dummy system resource initially for
> all objects, and then relies on ZERO_ALLOC being set for certain
> objects to know if the memory needs to be cleared or not when later
> doing the dummy -> vram. Thoughts?

That's unproblematic. On initial allocation bo->resource is NULL so we 
never branch out here.

Christian.

>
>> +
>> +       /* Moving of pinned BOs is forbidden */
>> +       if (bo->pin_count)
>> +               return -EINVAL;
>> +
>> +       ret = ttm_bo_move_buffer(bo, placement, ctx);
>> +       if (ret)
>> +               return ret;
>> +
>>          /*
>>           * We might need to add a TTM.
>>           */
>> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
>> index b8a826a24fb2..7333f7a87a2f 100644
>> --- a/drivers/gpu/drm/ttm/ttm_resource.c
>> +++ b/drivers/gpu/drm/ttm/ttm_resource.c
>> @@ -361,7 +361,6 @@ bool ttm_resource_compat(struct ttm_resource *res,
>>
>>          return false;
>>   }
>> -EXPORT_SYMBOL(ttm_resource_compat);
>>
>>   void ttm_resource_set_bo(struct ttm_resource *res,
>>                           struct ttm_buffer_object *bo)
>> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
>> index 321c551784a1..dbcef460c452 100644
>> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
>> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
>> @@ -87,12 +87,7 @@ int vmw_bo_pin_in_placement(struct vmw_private *dev_priv,
>>          if (unlikely(ret != 0))
>>                  goto err;
>>
>> -       if (buf->base.pin_count > 0)
>> -               ret = ttm_resource_compat(bo->resource, placement)
>> -                       ? 0 : -EINVAL;
>> -       else
>> -               ret = ttm_bo_validate(bo, placement, &ctx);
>> -
>> +       ret = ttm_bo_validate(bo, placement, &ctx);
>>          if (!ret)
>>                  vmw_bo_pin_reserved(buf, true);
>>
>> @@ -128,12 +123,6 @@ int vmw_bo_pin_in_vram_or_gmr(struct vmw_private *dev_priv,
>>          if (unlikely(ret != 0))
>>                  goto err;
>>
>> -       if (buf->base.pin_count > 0) {
>> -               ret = ttm_resource_compat(bo->resource, &vmw_vram_gmr_placement)
>> -                       ? 0 : -EINVAL;
>> -               goto out_unreserve;
>> -       }
>> -
>>          ret = ttm_bo_validate(bo, &vmw_vram_gmr_placement, &ctx);
>>          if (likely(ret == 0) || ret == -ERESTARTSYS)
>>                  goto out_unreserve;
>> @@ -218,11 +207,7 @@ int vmw_bo_pin_in_start_of_vram(struct vmw_private *dev_priv,
>>                  (void) ttm_bo_validate(bo, &vmw_sys_placement, &ctx);
>>          }
>>
>> -       if (buf->base.pin_count > 0)
>> -               ret = ttm_resource_compat(bo->resource, &placement)
>> -                       ? 0 : -EINVAL;
>> -       else
>> -               ret = ttm_bo_validate(bo, &placement, &ctx);
>> +       ret = ttm_bo_validate(bo, &placement, &ctx);
>>
>>          /* For some reason we didn't end up at the start of vram */
>>          WARN_ON(ret == 0 && bo->resource->start != 0);
>> --
>> 2.34.1
>>
Matthew Auld Jan. 24, 2023, 10:12 a.m. UTC | #5
On Tue, 24 Jan 2023 at 09:51, Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Am 11.01.23 um 14:17 schrieb Matthew Auld:
> > On Wed, 11 Jan 2023 at 11:43, Christian König
> > <ckoenig.leichtzumerken@gmail.com> wrote:
> >> We have checks for this in the individual drivers move callback, but
> >> it's probably better to generally forbit that on a higher level.
> >>
> >> Also stops exporting ttm_resource_compat() since that's not necessary
> >> any more after removing the extra checks in vmwgfx.
> >>
> >> Signed-off-by: Christian König <christian.koenig@amd.com>
> >> ---
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  4 ----
> >>   drivers/gpu/drm/nouveau/nouveau_bo.c    |  3 ---
> >>   drivers/gpu/drm/radeon/radeon_ttm.c     |  4 ----
> >>   drivers/gpu/drm/ttm/ttm_bo.c            | 20 ++++++++++++--------
> >>   drivers/gpu/drm/ttm/ttm_resource.c      |  1 -
> >>   drivers/gpu/drm/vmwgfx/vmwgfx_bo.c      | 19 ++-----------------
> >>   6 files changed, 14 insertions(+), 37 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> >> index 068c2d8495fd..677cd7d91687 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> >> @@ -466,11 +466,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
> >>                          return r;
> >>          }
> >>
> >> -       /* Can't move a pinned BO */
> >>          abo = ttm_to_amdgpu_bo(bo);
> >> -       if (WARN_ON_ONCE(abo->tbo.pin_count > 0))
> >> -               return -EINVAL;
> >> -
> >>          adev = amdgpu_ttm_adev(bo->bdev);
> >>
> >>          if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM &&
> >> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
> >> index 288eebc70a67..c2ec91cc845d 100644
> >> --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
> >> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
> >> @@ -1015,9 +1015,6 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict,
> >>          if (ret)
> >>                  goto out_ntfy;
> >>
> >> -       if (nvbo->bo.pin_count)
> >> -               NV_WARN(drm, "Moving pinned object %p!\n", nvbo);
> >> -
> >>          if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA) {
> >>                  ret = nouveau_bo_vm_bind(bo, new_reg, &new_tile);
> >>                  if (ret)
> >> diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
> >> index 1e8e287e113c..67075c85f847 100644
> >> --- a/drivers/gpu/drm/radeon/radeon_ttm.c
> >> +++ b/drivers/gpu/drm/radeon/radeon_ttm.c
> >> @@ -211,11 +211,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
> >>          if (r)
> >>                  return r;
> >>
> >> -       /* Can't move a pinned BO */
> >>          rbo = container_of(bo, struct radeon_bo, tbo);
> >> -       if (WARN_ON_ONCE(rbo->tbo.pin_count > 0))
> >> -               return -EINVAL;
> >> -
> >>          rdev = radeon_get_rdev(bo->bdev);
> >>          if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
> >>                  ttm_bo_move_null(bo, new_mem);
> >> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> >> index 326a3d13a829..9baccb2f6e99 100644
> >> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> >> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> >> @@ -894,14 +894,18 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
> >>          if (!placement->num_placement && !placement->num_busy_placement)
> >>                  return ttm_bo_pipeline_gutting(bo);
> >>
> >> -       /*
> >> -        * Check whether we need to move buffer.
> >> -        */
> >> -       if (!bo->resource || !ttm_resource_compat(bo->resource, placement)) {
> >> -               ret = ttm_bo_move_buffer(bo, placement, ctx);
> >> -               if (ret)
> >> -                       return ret;
> >> -       }
> >> +       /* Check whether we need to move buffer. */
> >> +       if (bo->resource && ttm_resource_compat(bo->resource, placement))
> >> +               return 0;
> > Note this now skips the tt create below (intentional?). I think i915
> > needed that, since it creates a dummy system resource initially for
> > all objects, and then relies on ZERO_ALLOC being set for certain
> > objects to know if the memory needs to be cleared or not when later
> > doing the dummy -> vram. Thoughts?
>
> That's unproblematic. On initial allocation bo->resource is NULL so we
> never branch out here.

Here is what I see in drm-tip, when first creating an object with ttm:

ttm_bo_init_reserved() -> ttm_resource_alloc(PL_SYSTEM, &bo->resource)
-> ttm_bo_validate()

So bo->resource is for sure not NULL on initial allocation, and is
pointing to PL_SYSTEM. And in i915 we initially stuff everything into
SYSTEM as a dummy placement.

IIRC you had a series that tried to address that (allowing NULL
resource or so), but it never landed:
https://patchwork.freedesktop.org/patch/500698/?series=107680&rev=2

>
> Christian.
>
> >
> >> +
> >> +       /* Moving of pinned BOs is forbidden */
> >> +       if (bo->pin_count)
> >> +               return -EINVAL;
> >> +
> >> +       ret = ttm_bo_move_buffer(bo, placement, ctx);
> >> +       if (ret)
> >> +               return ret;
> >> +
> >>          /*
> >>           * We might need to add a TTM.
> >>           */
> >> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
> >> index b8a826a24fb2..7333f7a87a2f 100644
> >> --- a/drivers/gpu/drm/ttm/ttm_resource.c
> >> +++ b/drivers/gpu/drm/ttm/ttm_resource.c
> >> @@ -361,7 +361,6 @@ bool ttm_resource_compat(struct ttm_resource *res,
> >>
> >>          return false;
> >>   }
> >> -EXPORT_SYMBOL(ttm_resource_compat);
> >>
> >>   void ttm_resource_set_bo(struct ttm_resource *res,
> >>                           struct ttm_buffer_object *bo)
> >> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
> >> index 321c551784a1..dbcef460c452 100644
> >> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
> >> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
> >> @@ -87,12 +87,7 @@ int vmw_bo_pin_in_placement(struct vmw_private *dev_priv,
> >>          if (unlikely(ret != 0))
> >>                  goto err;
> >>
> >> -       if (buf->base.pin_count > 0)
> >> -               ret = ttm_resource_compat(bo->resource, placement)
> >> -                       ? 0 : -EINVAL;
> >> -       else
> >> -               ret = ttm_bo_validate(bo, placement, &ctx);
> >> -
> >> +       ret = ttm_bo_validate(bo, placement, &ctx);
> >>          if (!ret)
> >>                  vmw_bo_pin_reserved(buf, true);
> >>
> >> @@ -128,12 +123,6 @@ int vmw_bo_pin_in_vram_or_gmr(struct vmw_private *dev_priv,
> >>          if (unlikely(ret != 0))
> >>                  goto err;
> >>
> >> -       if (buf->base.pin_count > 0) {
> >> -               ret = ttm_resource_compat(bo->resource, &vmw_vram_gmr_placement)
> >> -                       ? 0 : -EINVAL;
> >> -               goto out_unreserve;
> >> -       }
> >> -
> >>          ret = ttm_bo_validate(bo, &vmw_vram_gmr_placement, &ctx);
> >>          if (likely(ret == 0) || ret == -ERESTARTSYS)
> >>                  goto out_unreserve;
> >> @@ -218,11 +207,7 @@ int vmw_bo_pin_in_start_of_vram(struct vmw_private *dev_priv,
> >>                  (void) ttm_bo_validate(bo, &vmw_sys_placement, &ctx);
> >>          }
> >>
> >> -       if (buf->base.pin_count > 0)
> >> -               ret = ttm_resource_compat(bo->resource, &placement)
> >> -                       ? 0 : -EINVAL;
> >> -       else
> >> -               ret = ttm_bo_validate(bo, &placement, &ctx);
> >> +       ret = ttm_bo_validate(bo, &placement, &ctx);
> >>
> >>          /* For some reason we didn't end up at the start of vram */
> >>          WARN_ON(ret == 0 && bo->resource->start != 0);
> >> --
> >> 2.34.1
> >>
>
Christian König Jan. 24, 2023, 10:15 a.m. UTC | #6
Am 24.01.23 um 11:12 schrieb Matthew Auld:
> On Tue, 24 Jan 2023 at 09:51, Christian König
> <ckoenig.leichtzumerken@gmail.com> wrote:
>> Am 11.01.23 um 14:17 schrieb Matthew Auld:
>>> On Wed, 11 Jan 2023 at 11:43, Christian König
>>> <ckoenig.leichtzumerken@gmail.com> wrote:
>>>> We have checks for this in the individual drivers move callback, but
>>>> it's probably better to generally forbit that on a higher level.
>>>>
>>>> Also stops exporting ttm_resource_compat() since that's not necessary
>>>> any more after removing the extra checks in vmwgfx.
>>>>
>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>> ---
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  4 ----
>>>>    drivers/gpu/drm/nouveau/nouveau_bo.c    |  3 ---
>>>>    drivers/gpu/drm/radeon/radeon_ttm.c     |  4 ----
>>>>    drivers/gpu/drm/ttm/ttm_bo.c            | 20 ++++++++++++--------
>>>>    drivers/gpu/drm/ttm/ttm_resource.c      |  1 -
>>>>    drivers/gpu/drm/vmwgfx/vmwgfx_bo.c      | 19 ++-----------------
>>>>    6 files changed, 14 insertions(+), 37 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> index 068c2d8495fd..677cd7d91687 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> @@ -466,11 +466,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
>>>>                           return r;
>>>>           }
>>>>
>>>> -       /* Can't move a pinned BO */
>>>>           abo = ttm_to_amdgpu_bo(bo);
>>>> -       if (WARN_ON_ONCE(abo->tbo.pin_count > 0))
>>>> -               return -EINVAL;
>>>> -
>>>>           adev = amdgpu_ttm_adev(bo->bdev);
>>>>
>>>>           if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM &&
>>>> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
>>>> index 288eebc70a67..c2ec91cc845d 100644
>>>> --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
>>>> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
>>>> @@ -1015,9 +1015,6 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict,
>>>>           if (ret)
>>>>                   goto out_ntfy;
>>>>
>>>> -       if (nvbo->bo.pin_count)
>>>> -               NV_WARN(drm, "Moving pinned object %p!\n", nvbo);
>>>> -
>>>>           if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA) {
>>>>                   ret = nouveau_bo_vm_bind(bo, new_reg, &new_tile);
>>>>                   if (ret)
>>>> diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
>>>> index 1e8e287e113c..67075c85f847 100644
>>>> --- a/drivers/gpu/drm/radeon/radeon_ttm.c
>>>> +++ b/drivers/gpu/drm/radeon/radeon_ttm.c
>>>> @@ -211,11 +211,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
>>>>           if (r)
>>>>                   return r;
>>>>
>>>> -       /* Can't move a pinned BO */
>>>>           rbo = container_of(bo, struct radeon_bo, tbo);
>>>> -       if (WARN_ON_ONCE(rbo->tbo.pin_count > 0))
>>>> -               return -EINVAL;
>>>> -
>>>>           rdev = radeon_get_rdev(bo->bdev);
>>>>           if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
>>>>                   ttm_bo_move_null(bo, new_mem);
>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>>>> index 326a3d13a829..9baccb2f6e99 100644
>>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>>> @@ -894,14 +894,18 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
>>>>           if (!placement->num_placement && !placement->num_busy_placement)
>>>>                   return ttm_bo_pipeline_gutting(bo);
>>>>
>>>> -       /*
>>>> -        * Check whether we need to move buffer.
>>>> -        */
>>>> -       if (!bo->resource || !ttm_resource_compat(bo->resource, placement)) {
>>>> -               ret = ttm_bo_move_buffer(bo, placement, ctx);
>>>> -               if (ret)
>>>> -                       return ret;
>>>> -       }
>>>> +       /* Check whether we need to move buffer. */
>>>> +       if (bo->resource && ttm_resource_compat(bo->resource, placement))
>>>> +               return 0;
>>> Note this now skips the tt create below (intentional?). I think i915
>>> needed that, since it creates a dummy system resource initially for
>>> all objects, and then relies on ZERO_ALLOC being set for certain
>>> objects to know if the memory needs to be cleared or not when later
>>> doing the dummy -> vram. Thoughts?
>> That's unproblematic. On initial allocation bo->resource is NULL so we
>> never branch out here.
> Here is what I see in drm-tip, when first creating an object with ttm:
>
> ttm_bo_init_reserved() -> ttm_resource_alloc(PL_SYSTEM, &bo->resource)
> -> ttm_bo_validate()
>
> So bo->resource is for sure not NULL on initial allocation, and is
> pointing to PL_SYSTEM. And in i915 we initially stuff everything into
> SYSTEM as a dummy placement.
>
> IIRC you had a series that tried to address that (allowing NULL
> resource or so), but it never landed:
> https://patchwork.freedesktop.org/patch/500698/?series=107680&rev=2

Oh! My recollection was that this was done!

Sorry my memory failed me here, thanks for the notice.

Christian.

>
>> Christian.
>>
>>>> +
>>>> +       /* Moving of pinned BOs is forbidden */
>>>> +       if (bo->pin_count)
>>>> +               return -EINVAL;
>>>> +
>>>> +       ret = ttm_bo_move_buffer(bo, placement, ctx);
>>>> +       if (ret)
>>>> +               return ret;
>>>> +
>>>>           /*
>>>>            * We might need to add a TTM.
>>>>            */
>>>> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
>>>> index b8a826a24fb2..7333f7a87a2f 100644
>>>> --- a/drivers/gpu/drm/ttm/ttm_resource.c
>>>> +++ b/drivers/gpu/drm/ttm/ttm_resource.c
>>>> @@ -361,7 +361,6 @@ bool ttm_resource_compat(struct ttm_resource *res,
>>>>
>>>>           return false;
>>>>    }
>>>> -EXPORT_SYMBOL(ttm_resource_compat);
>>>>
>>>>    void ttm_resource_set_bo(struct ttm_resource *res,
>>>>                            struct ttm_buffer_object *bo)
>>>> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
>>>> index 321c551784a1..dbcef460c452 100644
>>>> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
>>>> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
>>>> @@ -87,12 +87,7 @@ int vmw_bo_pin_in_placement(struct vmw_private *dev_priv,
>>>>           if (unlikely(ret != 0))
>>>>                   goto err;
>>>>
>>>> -       if (buf->base.pin_count > 0)
>>>> -               ret = ttm_resource_compat(bo->resource, placement)
>>>> -                       ? 0 : -EINVAL;
>>>> -       else
>>>> -               ret = ttm_bo_validate(bo, placement, &ctx);
>>>> -
>>>> +       ret = ttm_bo_validate(bo, placement, &ctx);
>>>>           if (!ret)
>>>>                   vmw_bo_pin_reserved(buf, true);
>>>>
>>>> @@ -128,12 +123,6 @@ int vmw_bo_pin_in_vram_or_gmr(struct vmw_private *dev_priv,
>>>>           if (unlikely(ret != 0))
>>>>                   goto err;
>>>>
>>>> -       if (buf->base.pin_count > 0) {
>>>> -               ret = ttm_resource_compat(bo->resource, &vmw_vram_gmr_placement)
>>>> -                       ? 0 : -EINVAL;
>>>> -               goto out_unreserve;
>>>> -       }
>>>> -
>>>>           ret = ttm_bo_validate(bo, &vmw_vram_gmr_placement, &ctx);
>>>>           if (likely(ret == 0) || ret == -ERESTARTSYS)
>>>>                   goto out_unreserve;
>>>> @@ -218,11 +207,7 @@ int vmw_bo_pin_in_start_of_vram(struct vmw_private *dev_priv,
>>>>                   (void) ttm_bo_validate(bo, &vmw_sys_placement, &ctx);
>>>>           }
>>>>
>>>> -       if (buf->base.pin_count > 0)
>>>> -               ret = ttm_resource_compat(bo->resource, &placement)
>>>> -                       ? 0 : -EINVAL;
>>>> -       else
>>>> -               ret = ttm_bo_validate(bo, &placement, &ctx);
>>>> +       ret = ttm_bo_validate(bo, &placement, &ctx);
>>>>
>>>>           /* For some reason we didn't end up at the start of vram */
>>>>           WARN_ON(ret == 0 && bo->resource->start != 0);
>>>> --
>>>> 2.34.1
>>>>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 068c2d8495fd..677cd7d91687 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -466,11 +466,7 @@  static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
 			return r;
 	}
 
-	/* Can't move a pinned BO */
 	abo = ttm_to_amdgpu_bo(bo);
-	if (WARN_ON_ONCE(abo->tbo.pin_count > 0))
-		return -EINVAL;
-
 	adev = amdgpu_ttm_adev(bo->bdev);
 
 	if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM &&
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 288eebc70a67..c2ec91cc845d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -1015,9 +1015,6 @@  nouveau_bo_move(struct ttm_buffer_object *bo, bool evict,
 	if (ret)
 		goto out_ntfy;
 
-	if (nvbo->bo.pin_count)
-		NV_WARN(drm, "Moving pinned object %p!\n", nvbo);
-
 	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA) {
 		ret = nouveau_bo_vm_bind(bo, new_reg, &new_tile);
 		if (ret)
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 1e8e287e113c..67075c85f847 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -211,11 +211,7 @@  static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
 	if (r)
 		return r;
 
-	/* Can't move a pinned BO */
 	rbo = container_of(bo, struct radeon_bo, tbo);
-	if (WARN_ON_ONCE(rbo->tbo.pin_count > 0))
-		return -EINVAL;
-
 	rdev = radeon_get_rdev(bo->bdev);
 	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
 		ttm_bo_move_null(bo, new_mem);
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 326a3d13a829..9baccb2f6e99 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -894,14 +894,18 @@  int ttm_bo_validate(struct ttm_buffer_object *bo,
 	if (!placement->num_placement && !placement->num_busy_placement)
 		return ttm_bo_pipeline_gutting(bo);
 
-	/*
-	 * Check whether we need to move buffer.
-	 */
-	if (!bo->resource || !ttm_resource_compat(bo->resource, placement)) {
-		ret = ttm_bo_move_buffer(bo, placement, ctx);
-		if (ret)
-			return ret;
-	}
+	/* Check whether we need to move buffer. */
+	if (bo->resource && ttm_resource_compat(bo->resource, placement))
+		return 0;
+
+	/* Moving of pinned BOs is forbidden */
+	if (bo->pin_count)
+		return -EINVAL;
+
+	ret = ttm_bo_move_buffer(bo, placement, ctx);
+	if (ret)
+		return ret;
+
 	/*
 	 * We might need to add a TTM.
 	 */
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
index b8a826a24fb2..7333f7a87a2f 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -361,7 +361,6 @@  bool ttm_resource_compat(struct ttm_resource *res,
 
 	return false;
 }
-EXPORT_SYMBOL(ttm_resource_compat);
 
 void ttm_resource_set_bo(struct ttm_resource *res,
 			 struct ttm_buffer_object *bo)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
index 321c551784a1..dbcef460c452 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
@@ -87,12 +87,7 @@  int vmw_bo_pin_in_placement(struct vmw_private *dev_priv,
 	if (unlikely(ret != 0))
 		goto err;
 
-	if (buf->base.pin_count > 0)
-		ret = ttm_resource_compat(bo->resource, placement)
-			? 0 : -EINVAL;
-	else
-		ret = ttm_bo_validate(bo, placement, &ctx);
-
+	ret = ttm_bo_validate(bo, placement, &ctx);
 	if (!ret)
 		vmw_bo_pin_reserved(buf, true);
 
@@ -128,12 +123,6 @@  int vmw_bo_pin_in_vram_or_gmr(struct vmw_private *dev_priv,
 	if (unlikely(ret != 0))
 		goto err;
 
-	if (buf->base.pin_count > 0) {
-		ret = ttm_resource_compat(bo->resource, &vmw_vram_gmr_placement)
-			? 0 : -EINVAL;
-		goto out_unreserve;
-	}
-
 	ret = ttm_bo_validate(bo, &vmw_vram_gmr_placement, &ctx);
 	if (likely(ret == 0) || ret == -ERESTARTSYS)
 		goto out_unreserve;
@@ -218,11 +207,7 @@  int vmw_bo_pin_in_start_of_vram(struct vmw_private *dev_priv,
 		(void) ttm_bo_validate(bo, &vmw_sys_placement, &ctx);
 	}
 
-	if (buf->base.pin_count > 0)
-		ret = ttm_resource_compat(bo->resource, &placement)
-			? 0 : -EINVAL;
-	else
-		ret = ttm_bo_validate(bo, &placement, &ctx);
+	ret = ttm_bo_validate(bo, &placement, &ctx);
 
 	/* For some reason we didn't end up at the start of vram */
 	WARN_ON(ret == 0 && bo->resource->start != 0);