@@ -116,6 +116,9 @@ struct dma_fence *sync_file_get_fence(int fd)
}
EXPORT_SYMBOL(sync_file_get_fence);
+const char *sync_fence_signaled_obj_name = "signaled-timeline";
+const char *sync_fence_signaled_driver_name = "signaled-driver";
+
/**
* sync_file_get_name - get the name of the sync_file
* @sync_file: sync_file to get the fence from
@@ -136,11 +139,18 @@ char *sync_file_get_name(struct sync_file *sync_file, char *buf, int len)
} else {
struct dma_fence *fence = sync_file->fence;
- snprintf(buf, len, "%s-%s%llu-%lld",
- fence->ops->get_driver_name(fence),
- fence->ops->get_timeline_name(fence),
- fence->context,
- fence->seqno);
+ if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+ snprintf(buf, len, "%s-%s%llu-%lld",
+ sync_fence_signaled_driver_name,
+ sync_fence_signaled_obj_name,
+ fence->context,
+ fence->seqno);
+ else
+ snprintf(buf, len, "%s-%s%llu-%lld",
+ fence->ops->get_driver_name(fence),
+ fence->ops->get_timeline_name(fence),
+ fence->context,
+ fence->seqno);
}
return buf;
@@ -262,6 +272,15 @@ static long sync_file_ioctl_merge(struct sync_file *sync_file,
static int sync_fill_fence_info(struct dma_fence *fence,
struct sync_fence_info *info)
{
+ if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
+ info->status = fence->error ?: 1;
+ info->timestamp_ns = ktime_to_ns(dma_fence_timestamp(fence));
+ strscpy(info->obj_name, sync_fence_signaled_obj_name);
+ strscpy(info->driver_name, sync_fence_signaled_driver_name);
+
+ return info->status;
+ }
+
strscpy(info->obj_name, fence->ops->get_timeline_name(fence),
sizeof(info->obj_name));
strscpy(info->driver_name, fence->ops->get_driver_name(fence),
Xe and probably some other drivers can tear down the internal state referenced by exported sync file fence which then causes a null pointer derefences on accessing said fence. This is somewhat related to DRM scheduler design where sched fence is supposed to be allowed to outlive the scheduler instance itself, in which case either the fence->ops, or just the timeline name may go away and the fact driver has no real visibility if someone had converted the syncobj into sync file in the meantime. Bug can be triggered easily from IGT: [IGT] xe_sync_file: starting subtest sync_file_race ================================================================== BUG: KASAN: slab-use-after-free in drm_sched_fence_get_timeline_name+0xa1/0xb0 [gpu_sched] Read of size 8 at addr ffff888126726020 by task xe_sync_file/2931 ... Call Trace: <TASK> kasan_report+0xeb/0x130 drm_sched_fence_get_timeline_name+0xa1/0xb0 [gpu_sched] sync_file_ioctl+0x3cb/0xb00 ... Allocated by task 2931: __kmalloc_cache_noprof+0x1c2/0x410 guc_exec_queue_init+0x1a8/0x1240 [xe] xe_exec_queue_create+0xe72/0x13b0 [xe] xe_exec_queue_create_ioctl+0x10d9/0x1770 [xe] drm_ioctl_kernel+0x179/0x300 drm_ioctl+0x58f/0xcf0 xe_drm_ioctl+0xe8/0x140 [xe] ... Freed by task 1689: kfree+0x106/0x3e0 __guc_exec_queue_fini_async+0x144/0x2d0 [xe] process_one_work+0x610/0xdf0 worker_thread+0x7c8/0x14b0 This patch papers over it weakly by guarding one entry points with the signaled check. Race is still there just the window is smaller. As an alternative we could remove the timeline description from sched fence altogether, but the name is unfortunately not the only route to disaster. There is also the dma fence deadline setting ioctl. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com> Cc: Christian König <christian.koenig@amd.com> Cc: Danilo Krummrich <dakr@kernel.org> Cc: Lucas De Marchi <lucas.demarchi@intel.com> Cc: Matthew Brost <matthew.brost@intel.com> Cc: Philipp Stanner <phasta@kernel.org> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> --- drivers/dma-buf/sync_file.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-)