diff mbox series

[drm-misc-next,v9,06/11] drm/nouveau: fence: separate fence alloc and emit

Message ID 20230803165238.8798-7-dakr@redhat.com (mailing list archive)
State New, archived
Headers show
Series Nouveau VM_BIND UAPI & DRM GPUVA Manager (merged) | expand

Commit Message

Danilo Krummrich Aug. 3, 2023, 4:52 p.m. UTC
The new (VM_BIND) UAPI exports DMA fences through DRM syncobjs. Hence,
in order to emit fences within DMA fence signalling critical sections
(e.g. as typically done in the DRM GPU schedulers run_job() callback) we
need to separate fence allocation and fence emitting.

Signed-off-by: Danilo Krummrich <dakr@redhat.com>
---
 drivers/gpu/drm/nouveau/dispnv04/crtc.c |  9 ++++-
 drivers/gpu/drm/nouveau/nouveau_bo.c    | 52 +++++++++++++++----------
 drivers/gpu/drm/nouveau/nouveau_chan.c  |  6 ++-
 drivers/gpu/drm/nouveau/nouveau_dmem.c  |  9 +++--
 drivers/gpu/drm/nouveau/nouveau_fence.c | 16 +++-----
 drivers/gpu/drm/nouveau/nouveau_fence.h |  3 +-
 drivers/gpu/drm/nouveau/nouveau_gem.c   |  5 ++-
 7 files changed, 59 insertions(+), 41 deletions(-)

Comments

Christian König Aug. 7, 2023, 6:07 p.m. UTC | #1
Am 03.08.23 um 18:52 schrieb Danilo Krummrich:
> The new (VM_BIND) UAPI exports DMA fences through DRM syncobjs. Hence,
> in order to emit fences within DMA fence signalling critical sections
> (e.g. as typically done in the DRM GPU schedulers run_job() callback) we
> need to separate fence allocation and fence emitting.

At least from the description that sounds like it might be illegal. 
Daniel can you take a look as well.

What exactly are you doing here?

Regards,
Christian.

>
> Signed-off-by: Danilo Krummrich <dakr@redhat.com>
> ---
>   drivers/gpu/drm/nouveau/dispnv04/crtc.c |  9 ++++-
>   drivers/gpu/drm/nouveau/nouveau_bo.c    | 52 +++++++++++++++----------
>   drivers/gpu/drm/nouveau/nouveau_chan.c  |  6 ++-
>   drivers/gpu/drm/nouveau/nouveau_dmem.c  |  9 +++--
>   drivers/gpu/drm/nouveau/nouveau_fence.c | 16 +++-----
>   drivers/gpu/drm/nouveau/nouveau_fence.h |  3 +-
>   drivers/gpu/drm/nouveau/nouveau_gem.c   |  5 ++-
>   7 files changed, 59 insertions(+), 41 deletions(-)
>
> diff --git a/drivers/gpu/drm/nouveau/dispnv04/crtc.c b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
> index a6f2e681bde9..a34924523133 100644
> --- a/drivers/gpu/drm/nouveau/dispnv04/crtc.c
> +++ b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
> @@ -1122,11 +1122,18 @@ nv04_page_flip_emit(struct nouveau_channel *chan,
>   	PUSH_NVSQ(push, NV_SW, NV_SW_PAGE_FLIP, 0x00000000);
>   	PUSH_KICK(push);
>   
> -	ret = nouveau_fence_new(chan, false, pfence);
> +	ret = nouveau_fence_new(pfence);
>   	if (ret)
>   		goto fail;
>   
> +	ret = nouveau_fence_emit(*pfence, chan);
> +	if (ret)
> +		goto fail_fence_unref;
> +
>   	return 0;
> +
> +fail_fence_unref:
> +	nouveau_fence_unref(pfence);
>   fail:
>   	spin_lock_irqsave(&dev->event_lock, flags);
>   	list_del(&s->head);
> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
> index 057bc995f19b..e9cbbf594e6f 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
> @@ -820,29 +820,39 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict,
>   		mutex_lock(&cli->mutex);
>   	else
>   		mutex_lock_nested(&cli->mutex, SINGLE_DEPTH_NESTING);
> +
>   	ret = nouveau_fence_sync(nouveau_bo(bo), chan, true, ctx->interruptible);
> -	if (ret == 0) {
> -		ret = drm->ttm.move(chan, bo, bo->resource, new_reg);
> -		if (ret == 0) {
> -			ret = nouveau_fence_new(chan, false, &fence);
> -			if (ret == 0) {
> -				/* TODO: figure out a better solution here
> -				 *
> -				 * wait on the fence here explicitly as going through
> -				 * ttm_bo_move_accel_cleanup somehow doesn't seem to do it.
> -				 *
> -				 * Without this the operation can timeout and we'll fallback to a
> -				 * software copy, which might take several minutes to finish.
> -				 */
> -				nouveau_fence_wait(fence, false, false);
> -				ret = ttm_bo_move_accel_cleanup(bo,
> -								&fence->base,
> -								evict, false,
> -								new_reg);
> -				nouveau_fence_unref(&fence);
> -			}
> -		}
> +	if (ret)
> +		goto out_unlock;
> +
> +	ret = drm->ttm.move(chan, bo, bo->resource, new_reg);
> +	if (ret)
> +		goto out_unlock;
> +
> +	ret = nouveau_fence_new(&fence);
> +	if (ret)
> +		goto out_unlock;
> +
> +	ret = nouveau_fence_emit(fence, chan);
> +	if (ret) {
> +		nouveau_fence_unref(&fence);
> +		goto out_unlock;
>   	}
> +
> +	/* TODO: figure out a better solution here
> +	 *
> +	 * wait on the fence here explicitly as going through
> +	 * ttm_bo_move_accel_cleanup somehow doesn't seem to do it.
> +	 *
> +	 * Without this the operation can timeout and we'll fallback to a
> +	 * software copy, which might take several minutes to finish.
> +	 */
> +	nouveau_fence_wait(fence, false, false);
> +	ret = ttm_bo_move_accel_cleanup(bo, &fence->base, evict, false,
> +					new_reg);
> +	nouveau_fence_unref(&fence);
> +
> +out_unlock:
>   	mutex_unlock(&cli->mutex);
>   	return ret;
>   }
> diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c
> index 6d639314250a..f69be4c8f9f2 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_chan.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
> @@ -62,9 +62,11 @@ nouveau_channel_idle(struct nouveau_channel *chan)
>   		struct nouveau_fence *fence = NULL;
>   		int ret;
>   
> -		ret = nouveau_fence_new(chan, false, &fence);
> +		ret = nouveau_fence_new(&fence);
>   		if (!ret) {
> -			ret = nouveau_fence_wait(fence, false, false);
> +			ret = nouveau_fence_emit(fence, chan);
> +			if (!ret)
> +				ret = nouveau_fence_wait(fence, false, false);
>   			nouveau_fence_unref(&fence);
>   		}
>   
> diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
> index 789857faa048..4ad40e42cae1 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
> @@ -209,7 +209,8 @@ static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
>   		goto done;
>   	}
>   
> -	nouveau_fence_new(dmem->migrate.chan, false, &fence);
> +	if (!nouveau_fence_new(&fence))
> +		nouveau_fence_emit(fence, dmem->migrate.chan);
>   	migrate_vma_pages(&args);
>   	nouveau_dmem_fence_done(&fence);
>   	dma_unmap_page(drm->dev->dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
> @@ -402,7 +403,8 @@ nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk *chunk)
>   		}
>   	}
>   
> -	nouveau_fence_new(chunk->drm->dmem->migrate.chan, false, &fence);
> +	if (!nouveau_fence_new(&fence))
> +		nouveau_fence_emit(fence, chunk->drm->dmem->migrate.chan);
>   	migrate_device_pages(src_pfns, dst_pfns, npages);
>   	nouveau_dmem_fence_done(&fence);
>   	migrate_device_finalize(src_pfns, dst_pfns, npages);
> @@ -675,7 +677,8 @@ static void nouveau_dmem_migrate_chunk(struct nouveau_drm *drm,
>   		addr += PAGE_SIZE;
>   	}
>   
> -	nouveau_fence_new(drm->dmem->migrate.chan, false, &fence);
> +	if (!nouveau_fence_new(&fence))
> +		nouveau_fence_emit(fence, chunk->drm->dmem->migrate.chan);
>   	migrate_vma_pages(args);
>   	nouveau_dmem_fence_done(&fence);
>   	nouveau_pfns_map(svmm, args->vma->vm_mm, args->start, pfns, i);
> diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
> index ee5e9d40c166..e946408f945b 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_fence.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
> @@ -210,6 +210,9 @@ nouveau_fence_emit(struct nouveau_fence *fence, struct nouveau_channel *chan)
>   	struct nouveau_fence_priv *priv = (void*)chan->drm->fence;
>   	int ret;
>   
> +	if (unlikely(!chan->fence))
> +		return -ENODEV;
> +
>   	fence->channel  = chan;
>   	fence->timeout  = jiffies + (15 * HZ);
>   
> @@ -396,25 +399,16 @@ nouveau_fence_unref(struct nouveau_fence **pfence)
>   }
>   
>   int
> -nouveau_fence_new(struct nouveau_channel *chan, bool sysmem,
> -		  struct nouveau_fence **pfence)
> +nouveau_fence_new(struct nouveau_fence **pfence)
>   {
>   	struct nouveau_fence *fence;
> -	int ret = 0;
> -
> -	if (unlikely(!chan->fence))
> -		return -ENODEV;
>   
>   	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
>   	if (!fence)
>   		return -ENOMEM;
>   
> -	ret = nouveau_fence_emit(fence, chan);
> -	if (ret)
> -		nouveau_fence_unref(&fence);
> -
>   	*pfence = fence;
> -	return ret;
> +	return 0;
>   }
>   
>   static const char *nouveau_fence_get_get_driver_name(struct dma_fence *fence)
> diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h
> index 0ca2bc85adf6..7c73c7c9834a 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_fence.h
> +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
> @@ -17,8 +17,7 @@ struct nouveau_fence {
>   	unsigned long timeout;
>   };
>   
> -int  nouveau_fence_new(struct nouveau_channel *, bool sysmem,
> -		       struct nouveau_fence **);
> +int  nouveau_fence_new(struct nouveau_fence **);
>   void nouveau_fence_unref(struct nouveau_fence **);
>   
>   int  nouveau_fence_emit(struct nouveau_fence *, struct nouveau_channel *);
> diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
> index a48f42aaeab9..9c8d1b911a01 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_gem.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
> @@ -873,8 +873,11 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
>   		}
>   	}
>   
> -	ret = nouveau_fence_new(chan, false, &fence);
> +	ret = nouveau_fence_new(&fence);
> +	if (!ret)
> +		ret = nouveau_fence_emit(fence, chan);
>   	if (ret) {
> +		nouveau_fence_unref(&fence);
>   		NV_PRINTK(err, cli, "error fencing pushbuf: %d\n", ret);
>   		WIND_RING(chan);
>   		goto out;
Danilo Krummrich Aug. 7, 2023, 6:54 p.m. UTC | #2
Hi Christian,

On 8/7/23 20:07, Christian König wrote:
> Am 03.08.23 um 18:52 schrieb Danilo Krummrich:
>> The new (VM_BIND) UAPI exports DMA fences through DRM syncobjs. Hence,
>> in order to emit fences within DMA fence signalling critical sections
>> (e.g. as typically done in the DRM GPU schedulers run_job() callback) we
>> need to separate fence allocation and fence emitting.
> 
> At least from the description that sounds like it might be illegal. 
> Daniel can you take a look as well.
> 
> What exactly are you doing here?

I'm basically doing exactly the same as amdgpu_fence_emit() does in 
amdgpu_ib_schedule() called by amdgpu_job_run().

The difference - and this is what this patch is for - is that I separate 
the fence allocation from emitting the fence, such that the fence 
structure is allocated before the job is submitted to the GPU scheduler. 
amdgpu solves this with GFP_ATOMIC within amdgpu_fence_emit() to 
allocate the fence structure in this case.

- Danilo

> 
> Regards,
> Christian.
> 
>>
>> Signed-off-by: Danilo Krummrich <dakr@redhat.com>
>> ---
>>   drivers/gpu/drm/nouveau/dispnv04/crtc.c |  9 ++++-
>>   drivers/gpu/drm/nouveau/nouveau_bo.c    | 52 +++++++++++++++----------
>>   drivers/gpu/drm/nouveau/nouveau_chan.c  |  6 ++-
>>   drivers/gpu/drm/nouveau/nouveau_dmem.c  |  9 +++--
>>   drivers/gpu/drm/nouveau/nouveau_fence.c | 16 +++-----
>>   drivers/gpu/drm/nouveau/nouveau_fence.h |  3 +-
>>   drivers/gpu/drm/nouveau/nouveau_gem.c   |  5 ++-
>>   7 files changed, 59 insertions(+), 41 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/nouveau/dispnv04/crtc.c 
>> b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
>> index a6f2e681bde9..a34924523133 100644
>> --- a/drivers/gpu/drm/nouveau/dispnv04/crtc.c
>> +++ b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
>> @@ -1122,11 +1122,18 @@ nv04_page_flip_emit(struct nouveau_channel *chan,
>>       PUSH_NVSQ(push, NV_SW, NV_SW_PAGE_FLIP, 0x00000000);
>>       PUSH_KICK(push);
>> -    ret = nouveau_fence_new(chan, false, pfence);
>> +    ret = nouveau_fence_new(pfence);
>>       if (ret)
>>           goto fail;
>> +    ret = nouveau_fence_emit(*pfence, chan);
>> +    if (ret)
>> +        goto fail_fence_unref;
>> +
>>       return 0;
>> +
>> +fail_fence_unref:
>> +    nouveau_fence_unref(pfence);
>>   fail:
>>       spin_lock_irqsave(&dev->event_lock, flags);
>>       list_del(&s->head);
>> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
>> b/drivers/gpu/drm/nouveau/nouveau_bo.c
>> index 057bc995f19b..e9cbbf594e6f 100644
>> --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
>> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
>> @@ -820,29 +820,39 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object 
>> *bo, int evict,
>>           mutex_lock(&cli->mutex);
>>       else
>>           mutex_lock_nested(&cli->mutex, SINGLE_DEPTH_NESTING);
>> +
>>       ret = nouveau_fence_sync(nouveau_bo(bo), chan, true, 
>> ctx->interruptible);
>> -    if (ret == 0) {
>> -        ret = drm->ttm.move(chan, bo, bo->resource, new_reg);
>> -        if (ret == 0) {
>> -            ret = nouveau_fence_new(chan, false, &fence);
>> -            if (ret == 0) {
>> -                /* TODO: figure out a better solution here
>> -                 *
>> -                 * wait on the fence here explicitly as going through
>> -                 * ttm_bo_move_accel_cleanup somehow doesn't seem to 
>> do it.
>> -                 *
>> -                 * Without this the operation can timeout and we'll 
>> fallback to a
>> -                 * software copy, which might take several minutes to 
>> finish.
>> -                 */
>> -                nouveau_fence_wait(fence, false, false);
>> -                ret = ttm_bo_move_accel_cleanup(bo,
>> -                                &fence->base,
>> -                                evict, false,
>> -                                new_reg);
>> -                nouveau_fence_unref(&fence);
>> -            }
>> -        }
>> +    if (ret)
>> +        goto out_unlock;
>> +
>> +    ret = drm->ttm.move(chan, bo, bo->resource, new_reg);
>> +    if (ret)
>> +        goto out_unlock;
>> +
>> +    ret = nouveau_fence_new(&fence);
>> +    if (ret)
>> +        goto out_unlock;
>> +
>> +    ret = nouveau_fence_emit(fence, chan);
>> +    if (ret) {
>> +        nouveau_fence_unref(&fence);
>> +        goto out_unlock;
>>       }
>> +
>> +    /* TODO: figure out a better solution here
>> +     *
>> +     * wait on the fence here explicitly as going through
>> +     * ttm_bo_move_accel_cleanup somehow doesn't seem to do it.
>> +     *
>> +     * Without this the operation can timeout and we'll fallback to a
>> +     * software copy, which might take several minutes to finish.
>> +     */
>> +    nouveau_fence_wait(fence, false, false);
>> +    ret = ttm_bo_move_accel_cleanup(bo, &fence->base, evict, false,
>> +                    new_reg);
>> +    nouveau_fence_unref(&fence);
>> +
>> +out_unlock:
>>       mutex_unlock(&cli->mutex);
>>       return ret;
>>   }
>> diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c 
>> b/drivers/gpu/drm/nouveau/nouveau_chan.c
>> index 6d639314250a..f69be4c8f9f2 100644
>> --- a/drivers/gpu/drm/nouveau/nouveau_chan.c
>> +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
>> @@ -62,9 +62,11 @@ nouveau_channel_idle(struct nouveau_channel *chan)
>>           struct nouveau_fence *fence = NULL;
>>           int ret;
>> -        ret = nouveau_fence_new(chan, false, &fence);
>> +        ret = nouveau_fence_new(&fence);
>>           if (!ret) {
>> -            ret = nouveau_fence_wait(fence, false, false);
>> +            ret = nouveau_fence_emit(fence, chan);
>> +            if (!ret)
>> +                ret = nouveau_fence_wait(fence, false, false);
>>               nouveau_fence_unref(&fence);
>>           }
>> diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c 
>> b/drivers/gpu/drm/nouveau/nouveau_dmem.c
>> index 789857faa048..4ad40e42cae1 100644
>> --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
>> +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
>> @@ -209,7 +209,8 @@ static vm_fault_t 
>> nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
>>           goto done;
>>       }
>> -    nouveau_fence_new(dmem->migrate.chan, false, &fence);
>> +    if (!nouveau_fence_new(&fence))
>> +        nouveau_fence_emit(fence, dmem->migrate.chan);
>>       migrate_vma_pages(&args);
>>       nouveau_dmem_fence_done(&fence);
>>       dma_unmap_page(drm->dev->dev, dma_addr, PAGE_SIZE, 
>> DMA_BIDIRECTIONAL);
>> @@ -402,7 +403,8 @@ nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk 
>> *chunk)
>>           }
>>       }
>> -    nouveau_fence_new(chunk->drm->dmem->migrate.chan, false, &fence);
>> +    if (!nouveau_fence_new(&fence))
>> +        nouveau_fence_emit(fence, chunk->drm->dmem->migrate.chan);
>>       migrate_device_pages(src_pfns, dst_pfns, npages);
>>       nouveau_dmem_fence_done(&fence);
>>       migrate_device_finalize(src_pfns, dst_pfns, npages);
>> @@ -675,7 +677,8 @@ static void nouveau_dmem_migrate_chunk(struct 
>> nouveau_drm *drm,
>>           addr += PAGE_SIZE;
>>       }
>> -    nouveau_fence_new(drm->dmem->migrate.chan, false, &fence);
>> +    if (!nouveau_fence_new(&fence))
>> +        nouveau_fence_emit(fence, chunk->drm->dmem->migrate.chan);
>>       migrate_vma_pages(args);
>>       nouveau_dmem_fence_done(&fence);
>>       nouveau_pfns_map(svmm, args->vma->vm_mm, args->start, pfns, i);
>> diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c 
>> b/drivers/gpu/drm/nouveau/nouveau_fence.c
>> index ee5e9d40c166..e946408f945b 100644
>> --- a/drivers/gpu/drm/nouveau/nouveau_fence.c
>> +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
>> @@ -210,6 +210,9 @@ nouveau_fence_emit(struct nouveau_fence *fence, 
>> struct nouveau_channel *chan)
>>       struct nouveau_fence_priv *priv = (void*)chan->drm->fence;
>>       int ret;
>> +    if (unlikely(!chan->fence))
>> +        return -ENODEV;
>> +
>>       fence->channel  = chan;
>>       fence->timeout  = jiffies + (15 * HZ);
>> @@ -396,25 +399,16 @@ nouveau_fence_unref(struct nouveau_fence **pfence)
>>   }
>>   int
>> -nouveau_fence_new(struct nouveau_channel *chan, bool sysmem,
>> -          struct nouveau_fence **pfence)
>> +nouveau_fence_new(struct nouveau_fence **pfence)
>>   {
>>       struct nouveau_fence *fence;
>> -    int ret = 0;
>> -
>> -    if (unlikely(!chan->fence))
>> -        return -ENODEV;
>>       fence = kzalloc(sizeof(*fence), GFP_KERNEL);
>>       if (!fence)
>>           return -ENOMEM;
>> -    ret = nouveau_fence_emit(fence, chan);
>> -    if (ret)
>> -        nouveau_fence_unref(&fence);
>> -
>>       *pfence = fence;
>> -    return ret;
>> +    return 0;
>>   }
>>   static const char *nouveau_fence_get_get_driver_name(struct 
>> dma_fence *fence)
>> diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h 
>> b/drivers/gpu/drm/nouveau/nouveau_fence.h
>> index 0ca2bc85adf6..7c73c7c9834a 100644
>> --- a/drivers/gpu/drm/nouveau/nouveau_fence.h
>> +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
>> @@ -17,8 +17,7 @@ struct nouveau_fence {
>>       unsigned long timeout;
>>   };
>> -int  nouveau_fence_new(struct nouveau_channel *, bool sysmem,
>> -               struct nouveau_fence **);
>> +int  nouveau_fence_new(struct nouveau_fence **);
>>   void nouveau_fence_unref(struct nouveau_fence **);
>>   int  nouveau_fence_emit(struct nouveau_fence *, struct 
>> nouveau_channel *);
>> diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c 
>> b/drivers/gpu/drm/nouveau/nouveau_gem.c
>> index a48f42aaeab9..9c8d1b911a01 100644
>> --- a/drivers/gpu/drm/nouveau/nouveau_gem.c
>> +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
>> @@ -873,8 +873,11 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, 
>> void *data,
>>           }
>>       }
>> -    ret = nouveau_fence_new(chan, false, &fence);
>> +    ret = nouveau_fence_new(&fence);
>> +    if (!ret)
>> +        ret = nouveau_fence_emit(fence, chan);
>>       if (ret) {
>> +        nouveau_fence_unref(&fence);
>>           NV_PRINTK(err, cli, "error fencing pushbuf: %d\n", ret);
>>           WIND_RING(chan);
>>           goto out;
>
Christian König Aug. 8, 2023, 6:06 a.m. UTC | #3
Am 07.08.23 um 20:54 schrieb Danilo Krummrich:
> Hi Christian,
>
> On 8/7/23 20:07, Christian König wrote:
>> Am 03.08.23 um 18:52 schrieb Danilo Krummrich:
>>> The new (VM_BIND) UAPI exports DMA fences through DRM syncobjs. Hence,
>>> in order to emit fences within DMA fence signalling critical sections
>>> (e.g. as typically done in the DRM GPU schedulers run_job() 
>>> callback) we
>>> need to separate fence allocation and fence emitting.
>>
>> At least from the description that sounds like it might be illegal. 
>> Daniel can you take a look as well.
>>
>> What exactly are you doing here?
>
> I'm basically doing exactly the same as amdgpu_fence_emit() does in 
> amdgpu_ib_schedule() called by amdgpu_job_run().
>
> The difference - and this is what this patch is for - is that I 
> separate the fence allocation from emitting the fence, such that the 
> fence structure is allocated before the job is submitted to the GPU 
> scheduler. amdgpu solves this with GFP_ATOMIC within 
> amdgpu_fence_emit() to allocate the fence structure in this case.

Yeah, that use case is perfectly valid. Maybe update the commit message 
a bit to better describe that.

Something like "Separate fence allocation and emitting to avoid 
allocation within DMA fence signalling critical sections inside the DRM 
scheduler. This helps implementing the new UAPI....".

Regards,
Christian.

>
> - Danilo
>
>>
>> Regards,
>> Christian.
>>
>>>
>>> Signed-off-by: Danilo Krummrich <dakr@redhat.com>
>>> ---
>>>   drivers/gpu/drm/nouveau/dispnv04/crtc.c |  9 ++++-
>>>   drivers/gpu/drm/nouveau/nouveau_bo.c    | 52 
>>> +++++++++++++++----------
>>>   drivers/gpu/drm/nouveau/nouveau_chan.c  |  6 ++-
>>>   drivers/gpu/drm/nouveau/nouveau_dmem.c  |  9 +++--
>>>   drivers/gpu/drm/nouveau/nouveau_fence.c | 16 +++-----
>>>   drivers/gpu/drm/nouveau/nouveau_fence.h |  3 +-
>>>   drivers/gpu/drm/nouveau/nouveau_gem.c   |  5 ++-
>>>   7 files changed, 59 insertions(+), 41 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/nouveau/dispnv04/crtc.c 
>>> b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
>>> index a6f2e681bde9..a34924523133 100644
>>> --- a/drivers/gpu/drm/nouveau/dispnv04/crtc.c
>>> +++ b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
>>> @@ -1122,11 +1122,18 @@ nv04_page_flip_emit(struct nouveau_channel 
>>> *chan,
>>>       PUSH_NVSQ(push, NV_SW, NV_SW_PAGE_FLIP, 0x00000000);
>>>       PUSH_KICK(push);
>>> -    ret = nouveau_fence_new(chan, false, pfence);
>>> +    ret = nouveau_fence_new(pfence);
>>>       if (ret)
>>>           goto fail;
>>> +    ret = nouveau_fence_emit(*pfence, chan);
>>> +    if (ret)
>>> +        goto fail_fence_unref;
>>> +
>>>       return 0;
>>> +
>>> +fail_fence_unref:
>>> +    nouveau_fence_unref(pfence);
>>>   fail:
>>>       spin_lock_irqsave(&dev->event_lock, flags);
>>>       list_del(&s->head);
>>> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
>>> b/drivers/gpu/drm/nouveau/nouveau_bo.c
>>> index 057bc995f19b..e9cbbf594e6f 100644
>>> --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
>>> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
>>> @@ -820,29 +820,39 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object 
>>> *bo, int evict,
>>>           mutex_lock(&cli->mutex);
>>>       else
>>>           mutex_lock_nested(&cli->mutex, SINGLE_DEPTH_NESTING);
>>> +
>>>       ret = nouveau_fence_sync(nouveau_bo(bo), chan, true, 
>>> ctx->interruptible);
>>> -    if (ret == 0) {
>>> -        ret = drm->ttm.move(chan, bo, bo->resource, new_reg);
>>> -        if (ret == 0) {
>>> -            ret = nouveau_fence_new(chan, false, &fence);
>>> -            if (ret == 0) {
>>> -                /* TODO: figure out a better solution here
>>> -                 *
>>> -                 * wait on the fence here explicitly as going through
>>> -                 * ttm_bo_move_accel_cleanup somehow doesn't seem 
>>> to do it.
>>> -                 *
>>> -                 * Without this the operation can timeout and we'll 
>>> fallback to a
>>> -                 * software copy, which might take several minutes 
>>> to finish.
>>> -                 */
>>> -                nouveau_fence_wait(fence, false, false);
>>> -                ret = ttm_bo_move_accel_cleanup(bo,
>>> -                                &fence->base,
>>> -                                evict, false,
>>> -                                new_reg);
>>> -                nouveau_fence_unref(&fence);
>>> -            }
>>> -        }
>>> +    if (ret)
>>> +        goto out_unlock;
>>> +
>>> +    ret = drm->ttm.move(chan, bo, bo->resource, new_reg);
>>> +    if (ret)
>>> +        goto out_unlock;
>>> +
>>> +    ret = nouveau_fence_new(&fence);
>>> +    if (ret)
>>> +        goto out_unlock;
>>> +
>>> +    ret = nouveau_fence_emit(fence, chan);
>>> +    if (ret) {
>>> +        nouveau_fence_unref(&fence);
>>> +        goto out_unlock;
>>>       }
>>> +
>>> +    /* TODO: figure out a better solution here
>>> +     *
>>> +     * wait on the fence here explicitly as going through
>>> +     * ttm_bo_move_accel_cleanup somehow doesn't seem to do it.
>>> +     *
>>> +     * Without this the operation can timeout and we'll fallback to a
>>> +     * software copy, which might take several minutes to finish.
>>> +     */
>>> +    nouveau_fence_wait(fence, false, false);
>>> +    ret = ttm_bo_move_accel_cleanup(bo, &fence->base, evict, false,
>>> +                    new_reg);
>>> +    nouveau_fence_unref(&fence);
>>> +
>>> +out_unlock:
>>>       mutex_unlock(&cli->mutex);
>>>       return ret;
>>>   }
>>> diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c 
>>> b/drivers/gpu/drm/nouveau/nouveau_chan.c
>>> index 6d639314250a..f69be4c8f9f2 100644
>>> --- a/drivers/gpu/drm/nouveau/nouveau_chan.c
>>> +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
>>> @@ -62,9 +62,11 @@ nouveau_channel_idle(struct nouveau_channel *chan)
>>>           struct nouveau_fence *fence = NULL;
>>>           int ret;
>>> -        ret = nouveau_fence_new(chan, false, &fence);
>>> +        ret = nouveau_fence_new(&fence);
>>>           if (!ret) {
>>> -            ret = nouveau_fence_wait(fence, false, false);
>>> +            ret = nouveau_fence_emit(fence, chan);
>>> +            if (!ret)
>>> +                ret = nouveau_fence_wait(fence, false, false);
>>>               nouveau_fence_unref(&fence);
>>>           }
>>> diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c 
>>> b/drivers/gpu/drm/nouveau/nouveau_dmem.c
>>> index 789857faa048..4ad40e42cae1 100644
>>> --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
>>> +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
>>> @@ -209,7 +209,8 @@ static vm_fault_t 
>>> nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
>>>           goto done;
>>>       }
>>> -    nouveau_fence_new(dmem->migrate.chan, false, &fence);
>>> +    if (!nouveau_fence_new(&fence))
>>> +        nouveau_fence_emit(fence, dmem->migrate.chan);
>>>       migrate_vma_pages(&args);
>>>       nouveau_dmem_fence_done(&fence);
>>>       dma_unmap_page(drm->dev->dev, dma_addr, PAGE_SIZE, 
>>> DMA_BIDIRECTIONAL);
>>> @@ -402,7 +403,8 @@ nouveau_dmem_evict_chunk(struct 
>>> nouveau_dmem_chunk *chunk)
>>>           }
>>>       }
>>> - nouveau_fence_new(chunk->drm->dmem->migrate.chan, false, &fence);
>>> +    if (!nouveau_fence_new(&fence))
>>> +        nouveau_fence_emit(fence, chunk->drm->dmem->migrate.chan);
>>>       migrate_device_pages(src_pfns, dst_pfns, npages);
>>>       nouveau_dmem_fence_done(&fence);
>>>       migrate_device_finalize(src_pfns, dst_pfns, npages);
>>> @@ -675,7 +677,8 @@ static void nouveau_dmem_migrate_chunk(struct 
>>> nouveau_drm *drm,
>>>           addr += PAGE_SIZE;
>>>       }
>>> -    nouveau_fence_new(drm->dmem->migrate.chan, false, &fence);
>>> +    if (!nouveau_fence_new(&fence))
>>> +        nouveau_fence_emit(fence, chunk->drm->dmem->migrate.chan);
>>>       migrate_vma_pages(args);
>>>       nouveau_dmem_fence_done(&fence);
>>>       nouveau_pfns_map(svmm, args->vma->vm_mm, args->start, pfns, i);
>>> diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c 
>>> b/drivers/gpu/drm/nouveau/nouveau_fence.c
>>> index ee5e9d40c166..e946408f945b 100644
>>> --- a/drivers/gpu/drm/nouveau/nouveau_fence.c
>>> +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
>>> @@ -210,6 +210,9 @@ nouveau_fence_emit(struct nouveau_fence *fence, 
>>> struct nouveau_channel *chan)
>>>       struct nouveau_fence_priv *priv = (void*)chan->drm->fence;
>>>       int ret;
>>> +    if (unlikely(!chan->fence))
>>> +        return -ENODEV;
>>> +
>>>       fence->channel  = chan;
>>>       fence->timeout  = jiffies + (15 * HZ);
>>> @@ -396,25 +399,16 @@ nouveau_fence_unref(struct nouveau_fence 
>>> **pfence)
>>>   }
>>>   int
>>> -nouveau_fence_new(struct nouveau_channel *chan, bool sysmem,
>>> -          struct nouveau_fence **pfence)
>>> +nouveau_fence_new(struct nouveau_fence **pfence)
>>>   {
>>>       struct nouveau_fence *fence;
>>> -    int ret = 0;
>>> -
>>> -    if (unlikely(!chan->fence))
>>> -        return -ENODEV;
>>>       fence = kzalloc(sizeof(*fence), GFP_KERNEL);
>>>       if (!fence)
>>>           return -ENOMEM;
>>> -    ret = nouveau_fence_emit(fence, chan);
>>> -    if (ret)
>>> -        nouveau_fence_unref(&fence);
>>> -
>>>       *pfence = fence;
>>> -    return ret;
>>> +    return 0;
>>>   }
>>>   static const char *nouveau_fence_get_get_driver_name(struct 
>>> dma_fence *fence)
>>> diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h 
>>> b/drivers/gpu/drm/nouveau/nouveau_fence.h
>>> index 0ca2bc85adf6..7c73c7c9834a 100644
>>> --- a/drivers/gpu/drm/nouveau/nouveau_fence.h
>>> +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
>>> @@ -17,8 +17,7 @@ struct nouveau_fence {
>>>       unsigned long timeout;
>>>   };
>>> -int  nouveau_fence_new(struct nouveau_channel *, bool sysmem,
>>> -               struct nouveau_fence **);
>>> +int  nouveau_fence_new(struct nouveau_fence **);
>>>   void nouveau_fence_unref(struct nouveau_fence **);
>>>   int  nouveau_fence_emit(struct nouveau_fence *, struct 
>>> nouveau_channel *);
>>> diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c 
>>> b/drivers/gpu/drm/nouveau/nouveau_gem.c
>>> index a48f42aaeab9..9c8d1b911a01 100644
>>> --- a/drivers/gpu/drm/nouveau/nouveau_gem.c
>>> +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
>>> @@ -873,8 +873,11 @@ nouveau_gem_ioctl_pushbuf(struct drm_device 
>>> *dev, void *data,
>>>           }
>>>       }
>>> -    ret = nouveau_fence_new(chan, false, &fence);
>>> +    ret = nouveau_fence_new(&fence);
>>> +    if (!ret)
>>> +        ret = nouveau_fence_emit(fence, chan);
>>>       if (ret) {
>>> +        nouveau_fence_unref(&fence);
>>>           NV_PRINTK(err, cli, "error fencing pushbuf: %d\n", ret);
>>>           WIND_RING(chan);
>>>           goto out;
>>
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/nouveau/dispnv04/crtc.c b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
index a6f2e681bde9..a34924523133 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/crtc.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
@@ -1122,11 +1122,18 @@  nv04_page_flip_emit(struct nouveau_channel *chan,
 	PUSH_NVSQ(push, NV_SW, NV_SW_PAGE_FLIP, 0x00000000);
 	PUSH_KICK(push);
 
-	ret = nouveau_fence_new(chan, false, pfence);
+	ret = nouveau_fence_new(pfence);
 	if (ret)
 		goto fail;
 
+	ret = nouveau_fence_emit(*pfence, chan);
+	if (ret)
+		goto fail_fence_unref;
+
 	return 0;
+
+fail_fence_unref:
+	nouveau_fence_unref(pfence);
 fail:
 	spin_lock_irqsave(&dev->event_lock, flags);
 	list_del(&s->head);
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 057bc995f19b..e9cbbf594e6f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -820,29 +820,39 @@  nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict,
 		mutex_lock(&cli->mutex);
 	else
 		mutex_lock_nested(&cli->mutex, SINGLE_DEPTH_NESTING);
+
 	ret = nouveau_fence_sync(nouveau_bo(bo), chan, true, ctx->interruptible);
-	if (ret == 0) {
-		ret = drm->ttm.move(chan, bo, bo->resource, new_reg);
-		if (ret == 0) {
-			ret = nouveau_fence_new(chan, false, &fence);
-			if (ret == 0) {
-				/* TODO: figure out a better solution here
-				 *
-				 * wait on the fence here explicitly as going through
-				 * ttm_bo_move_accel_cleanup somehow doesn't seem to do it.
-				 *
-				 * Without this the operation can timeout and we'll fallback to a
-				 * software copy, which might take several minutes to finish.
-				 */
-				nouveau_fence_wait(fence, false, false);
-				ret = ttm_bo_move_accel_cleanup(bo,
-								&fence->base,
-								evict, false,
-								new_reg);
-				nouveau_fence_unref(&fence);
-			}
-		}
+	if (ret)
+		goto out_unlock;
+
+	ret = drm->ttm.move(chan, bo, bo->resource, new_reg);
+	if (ret)
+		goto out_unlock;
+
+	ret = nouveau_fence_new(&fence);
+	if (ret)
+		goto out_unlock;
+
+	ret = nouveau_fence_emit(fence, chan);
+	if (ret) {
+		nouveau_fence_unref(&fence);
+		goto out_unlock;
 	}
+
+	/* TODO: figure out a better solution here
+	 *
+	 * wait on the fence here explicitly as going through
+	 * ttm_bo_move_accel_cleanup somehow doesn't seem to do it.
+	 *
+	 * Without this the operation can timeout and we'll fallback to a
+	 * software copy, which might take several minutes to finish.
+	 */
+	nouveau_fence_wait(fence, false, false);
+	ret = ttm_bo_move_accel_cleanup(bo, &fence->base, evict, false,
+					new_reg);
+	nouveau_fence_unref(&fence);
+
+out_unlock:
 	mutex_unlock(&cli->mutex);
 	return ret;
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c
index 6d639314250a..f69be4c8f9f2 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.c
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
@@ -62,9 +62,11 @@  nouveau_channel_idle(struct nouveau_channel *chan)
 		struct nouveau_fence *fence = NULL;
 		int ret;
 
-		ret = nouveau_fence_new(chan, false, &fence);
+		ret = nouveau_fence_new(&fence);
 		if (!ret) {
-			ret = nouveau_fence_wait(fence, false, false);
+			ret = nouveau_fence_emit(fence, chan);
+			if (!ret)
+				ret = nouveau_fence_wait(fence, false, false);
 			nouveau_fence_unref(&fence);
 		}
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
index 789857faa048..4ad40e42cae1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -209,7 +209,8 @@  static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
 		goto done;
 	}
 
-	nouveau_fence_new(dmem->migrate.chan, false, &fence);
+	if (!nouveau_fence_new(&fence))
+		nouveau_fence_emit(fence, dmem->migrate.chan);
 	migrate_vma_pages(&args);
 	nouveau_dmem_fence_done(&fence);
 	dma_unmap_page(drm->dev->dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
@@ -402,7 +403,8 @@  nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk *chunk)
 		}
 	}
 
-	nouveau_fence_new(chunk->drm->dmem->migrate.chan, false, &fence);
+	if (!nouveau_fence_new(&fence))
+		nouveau_fence_emit(fence, chunk->drm->dmem->migrate.chan);
 	migrate_device_pages(src_pfns, dst_pfns, npages);
 	nouveau_dmem_fence_done(&fence);
 	migrate_device_finalize(src_pfns, dst_pfns, npages);
@@ -675,7 +677,8 @@  static void nouveau_dmem_migrate_chunk(struct nouveau_drm *drm,
 		addr += PAGE_SIZE;
 	}
 
-	nouveau_fence_new(drm->dmem->migrate.chan, false, &fence);
+	if (!nouveau_fence_new(&fence))
+		nouveau_fence_emit(fence, chunk->drm->dmem->migrate.chan);
 	migrate_vma_pages(args);
 	nouveau_dmem_fence_done(&fence);
 	nouveau_pfns_map(svmm, args->vma->vm_mm, args->start, pfns, i);
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index ee5e9d40c166..e946408f945b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -210,6 +210,9 @@  nouveau_fence_emit(struct nouveau_fence *fence, struct nouveau_channel *chan)
 	struct nouveau_fence_priv *priv = (void*)chan->drm->fence;
 	int ret;
 
+	if (unlikely(!chan->fence))
+		return -ENODEV;
+
 	fence->channel  = chan;
 	fence->timeout  = jiffies + (15 * HZ);
 
@@ -396,25 +399,16 @@  nouveau_fence_unref(struct nouveau_fence **pfence)
 }
 
 int
-nouveau_fence_new(struct nouveau_channel *chan, bool sysmem,
-		  struct nouveau_fence **pfence)
+nouveau_fence_new(struct nouveau_fence **pfence)
 {
 	struct nouveau_fence *fence;
-	int ret = 0;
-
-	if (unlikely(!chan->fence))
-		return -ENODEV;
 
 	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
 	if (!fence)
 		return -ENOMEM;
 
-	ret = nouveau_fence_emit(fence, chan);
-	if (ret)
-		nouveau_fence_unref(&fence);
-
 	*pfence = fence;
-	return ret;
+	return 0;
 }
 
 static const char *nouveau_fence_get_get_driver_name(struct dma_fence *fence)
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h
index 0ca2bc85adf6..7c73c7c9834a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
@@ -17,8 +17,7 @@  struct nouveau_fence {
 	unsigned long timeout;
 };
 
-int  nouveau_fence_new(struct nouveau_channel *, bool sysmem,
-		       struct nouveau_fence **);
+int  nouveau_fence_new(struct nouveau_fence **);
 void nouveau_fence_unref(struct nouveau_fence **);
 
 int  nouveau_fence_emit(struct nouveau_fence *, struct nouveau_channel *);
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index a48f42aaeab9..9c8d1b911a01 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -873,8 +873,11 @@  nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
 		}
 	}
 
-	ret = nouveau_fence_new(chan, false, &fence);
+	ret = nouveau_fence_new(&fence);
+	if (!ret)
+		ret = nouveau_fence_emit(fence, chan);
 	if (ret) {
+		nouveau_fence_unref(&fence);
 		NV_PRINTK(err, cli, "error fencing pushbuf: %d\n", ret);
 		WIND_RING(chan);
 		goto out;