Message ID | 20181023075754.13826-1-david1.zhou@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm: fix deadlock of syncobj v5 | expand |
Quoting Chunming Zhou (2018-10-23 08:57:54) > v2: > add a mutex between sync_cb execution and free. > v3: > clearly separating the roles for pt_lock and cb_mutex (Chris) > v4: > the cb_mutex should be taken outside of the pt_lock around this if() block. (Chris) > v5: > fix a corner case > > Tested by syncobj_basic and syncobj_wait of igt. > > Signed-off-by: Chunming Zhou <david1.zhou@amd.com> > Cc: Daniel Vetter <daniel@ffwll.ch> > Cc: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Christian König <christian.koenig@amd.com> > Cc: intel-gfx@lists.freedesktop.org > Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/drm_syncobj.c | 55 +++++++++++++++++++---------------- > include/drm/drm_syncobj.h | 8 +++-- > 2 files changed, 36 insertions(+), 27 deletions(-) > > diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c > index 57bf6006394d..679a56791e34 100644 > --- a/drivers/gpu/drm/drm_syncobj.c > +++ b/drivers/gpu/drm/drm_syncobj.c > @@ -125,23 +125,26 @@ static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj, > if (!ret) > return 1; > > - spin_lock(&syncobj->lock); > + mutex_lock(&syncobj->cb_mutex); > /* We've already tried once to get a fence and failed. Now that we > * have the lock, try one more time just to be sure we don't add a > * callback when a fence has already been set. > */ > + spin_lock(&syncobj->pt_lock); > if (!list_empty(&syncobj->signal_pt_list)) { > - spin_unlock(&syncobj->lock); > + spin_unlock(&syncobj->pt_lock); > drm_syncobj_search_fence(syncobj, 0, 0, fence); Hmm, just thinking of other ways of tidying this up mutex_lock(cb_lock); spin_lock(pt_lock); *fence = drm_syncobj_find_signal_pt_for_point(); spin_unlock(pt_list); if (*!fence) drm_syncobj_add_callback_locked(syncobj, cb, func); mutex_unlock(cb_lock); i.e. get rid of the early return and we can even drop the int return here as it is unimportant and unused. -Chris
Quoting zhoucm1 (2018-10-23 10:09:01) > > > On 2018年10月23日 17:01, Chris Wilson wrote: > > Quoting Chunming Zhou (2018-10-23 08:57:54) > >> v2: > >> add a mutex between sync_cb execution and free. > >> v3: > >> clearly separating the roles for pt_lock and cb_mutex (Chris) > >> v4: > >> the cb_mutex should be taken outside of the pt_lock around this if() block. (Chris) > >> v5: > >> fix a corner case > >> > >> Tested by syncobj_basic and syncobj_wait of igt. > >> > >> Signed-off-by: Chunming Zhou <david1.zhou@amd.com> > >> Cc: Daniel Vetter <daniel@ffwll.ch> > >> Cc: Chris Wilson <chris@chris-wilson.co.uk> > >> Cc: Christian König <christian.koenig@amd.com> > >> Cc: intel-gfx@lists.freedesktop.org > >> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> > >> --- > >> drivers/gpu/drm/drm_syncobj.c | 55 +++++++++++++++++++---------------- > >> include/drm/drm_syncobj.h | 8 +++-- > >> 2 files changed, 36 insertions(+), 27 deletions(-) > >> > >> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c > >> index 57bf6006394d..679a56791e34 100644 > >> --- a/drivers/gpu/drm/drm_syncobj.c > >> +++ b/drivers/gpu/drm/drm_syncobj.c > >> @@ -125,23 +125,26 @@ static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj, > >> if (!ret) > >> return 1; > >> > >> - spin_lock(&syncobj->lock); > >> + mutex_lock(&syncobj->cb_mutex); > >> /* We've already tried once to get a fence and failed. Now that we > >> * have the lock, try one more time just to be sure we don't add a > >> * callback when a fence has already been set. > >> */ > >> + spin_lock(&syncobj->pt_lock); > >> if (!list_empty(&syncobj->signal_pt_list)) { > >> - spin_unlock(&syncobj->lock); > >> + spin_unlock(&syncobj->pt_lock); > >> drm_syncobj_search_fence(syncobj, 0, 0, fence); > > Hmm, just thinking of other ways of tidying this up > > > > mutex_lock(cb_lock); > > spin_lock(pt_lock); > > *fence = drm_syncobj_find_signal_pt_for_point(); > > spin_unlock(pt_list); > > if (*!fence) > > drm_syncobj_add_callback_locked(syncobj, cb, func); > > mutex_unlock(cb_lock); > > > > i.e. get rid of the early return and we can even drop the int return here > > as it is unimportant and unused. > Yes, do you need I send v6? or you make a separate patch as a improvment? Send it in reply, we still have some time before the shards catch up with the ml ;) -Chris
Am 23.10.18 um 11:11 schrieb Chris Wilson: > Quoting zhoucm1 (2018-10-23 10:09:01) >> >> On 2018年10月23日 17:01, Chris Wilson wrote: >>> Quoting Chunming Zhou (2018-10-23 08:57:54) >>>> v2: >>>> add a mutex between sync_cb execution and free. >>>> v3: >>>> clearly separating the roles for pt_lock and cb_mutex (Chris) >>>> v4: >>>> the cb_mutex should be taken outside of the pt_lock around this if() block. (Chris) >>>> v5: >>>> fix a corner case >>>> >>>> Tested by syncobj_basic and syncobj_wait of igt. >>>> >>>> Signed-off-by: Chunming Zhou <david1.zhou@amd.com> >>>> Cc: Daniel Vetter <daniel@ffwll.ch> >>>> Cc: Chris Wilson <chris@chris-wilson.co.uk> >>>> Cc: Christian König <christian.koenig@amd.com> >>>> Cc: intel-gfx@lists.freedesktop.org >>>> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> >>>> --- >>>> drivers/gpu/drm/drm_syncobj.c | 55 +++++++++++++++++++---------------- >>>> include/drm/drm_syncobj.h | 8 +++-- >>>> 2 files changed, 36 insertions(+), 27 deletions(-) >>>> >>>> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c >>>> index 57bf6006394d..679a56791e34 100644 >>>> --- a/drivers/gpu/drm/drm_syncobj.c >>>> +++ b/drivers/gpu/drm/drm_syncobj.c >>>> @@ -125,23 +125,26 @@ static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj, >>>> if (!ret) >>>> return 1; >>>> >>>> - spin_lock(&syncobj->lock); >>>> + mutex_lock(&syncobj->cb_mutex); >>>> /* We've already tried once to get a fence and failed. Now that we >>>> * have the lock, try one more time just to be sure we don't add a >>>> * callback when a fence has already been set. >>>> */ >>>> + spin_lock(&syncobj->pt_lock); >>>> if (!list_empty(&syncobj->signal_pt_list)) { >>>> - spin_unlock(&syncobj->lock); >>>> + spin_unlock(&syncobj->pt_lock); >>>> drm_syncobj_search_fence(syncobj, 0, 0, fence); >>> Hmm, just thinking of other ways of tidying this up >>> >>> mutex_lock(cb_lock); >>> spin_lock(pt_lock); >>> *fence = drm_syncobj_find_signal_pt_for_point(); >>> spin_unlock(pt_list); >>> if (*!fence) >>> drm_syncobj_add_callback_locked(syncobj, cb, func); >>> mutex_unlock(cb_lock); >>> >>> i.e. get rid of the early return and we can even drop the int return here >>> as it is unimportant and unused. >> Yes, do you need I send v6? or you make a separate patch as a improvment? > Send it in reply, we still have some time before the shards catch up > with the ml ;) I'm idle anyway because I've locked myself out of the AMD VPN accidentally. So just send me a ping when the v6 is ready to be committed and I can push it to drm-misc-next. Christian. > -Chris
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 57bf6006394d..679a56791e34 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -125,23 +125,26 @@ static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj, if (!ret) return 1; - spin_lock(&syncobj->lock); + mutex_lock(&syncobj->cb_mutex); /* We've already tried once to get a fence and failed. Now that we * have the lock, try one more time just to be sure we don't add a * callback when a fence has already been set. */ + spin_lock(&syncobj->pt_lock); if (!list_empty(&syncobj->signal_pt_list)) { - spin_unlock(&syncobj->lock); + spin_unlock(&syncobj->pt_lock); drm_syncobj_search_fence(syncobj, 0, 0, fence); - if (*fence) + if (*fence) { + mutex_unlock(&syncobj->cb_mutex); return 1; - spin_lock(&syncobj->lock); + } } else { + spin_unlock(&syncobj->pt_lock); *fence = NULL; drm_syncobj_add_callback_locked(syncobj, cb, func); ret = 0; } - spin_unlock(&syncobj->lock); + mutex_unlock(&syncobj->cb_mutex); return ret; } @@ -150,43 +153,43 @@ void drm_syncobj_add_callback(struct drm_syncobj *syncobj, struct drm_syncobj_cb *cb, drm_syncobj_func_t func) { - spin_lock(&syncobj->lock); + mutex_lock(&syncobj->cb_mutex); drm_syncobj_add_callback_locked(syncobj, cb, func); - spin_unlock(&syncobj->lock); + mutex_unlock(&syncobj->cb_mutex); } void drm_syncobj_remove_callback(struct drm_syncobj *syncobj, struct drm_syncobj_cb *cb) { - spin_lock(&syncobj->lock); + mutex_lock(&syncobj->cb_mutex); list_del_init(&cb->node); - spin_unlock(&syncobj->lock); + mutex_unlock(&syncobj->cb_mutex); } static void drm_syncobj_init(struct drm_syncobj *syncobj) { - spin_lock(&syncobj->lock); + spin_lock(&syncobj->pt_lock); syncobj->timeline_context = dma_fence_context_alloc(1); syncobj->timeline = 0; syncobj->signal_point = 0; init_waitqueue_head(&syncobj->wq); INIT_LIST_HEAD(&syncobj->signal_pt_list); - spin_unlock(&syncobj->lock); + spin_unlock(&syncobj->pt_lock); } static void drm_syncobj_fini(struct drm_syncobj *syncobj) { struct drm_syncobj_signal_pt *signal_pt = NULL, *tmp; - spin_lock(&syncobj->lock); + spin_lock(&syncobj->pt_lock); list_for_each_entry_safe(signal_pt, tmp, &syncobj->signal_pt_list, list) { list_del(&signal_pt->list); dma_fence_put(&signal_pt->fence_array->base); kfree(signal_pt); } - spin_unlock(&syncobj->lock); + spin_unlock(&syncobj->pt_lock); } static struct dma_fence @@ -249,14 +252,14 @@ static int drm_syncobj_create_signal_pt(struct drm_syncobj *syncobj, fences[num_fences++] = dma_fence_get(fence); /* timeline syncobj must take this dependency */ if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) { - spin_lock(&syncobj->lock); + spin_lock(&syncobj->pt_lock); if (!list_empty(&syncobj->signal_pt_list)) { tail_pt = list_last_entry(&syncobj->signal_pt_list, struct drm_syncobj_signal_pt, list); fences[num_fences++] = dma_fence_get(&tail_pt->fence_array->base); } - spin_unlock(&syncobj->lock); + spin_unlock(&syncobj->pt_lock); } signal_pt->fence_array = dma_fence_array_create(num_fences, fences, syncobj->timeline_context, @@ -266,16 +269,16 @@ static int drm_syncobj_create_signal_pt(struct drm_syncobj *syncobj, goto fail; } - spin_lock(&syncobj->lock); + spin_lock(&syncobj->pt_lock); if (syncobj->signal_point >= point) { DRM_WARN("A later signal is ready!"); - spin_unlock(&syncobj->lock); + spin_unlock(&syncobj->pt_lock); goto exist; } signal_pt->value = point; list_add_tail(&signal_pt->list, &syncobj->signal_pt_list); syncobj->signal_point = point; - spin_unlock(&syncobj->lock); + spin_unlock(&syncobj->pt_lock); wake_up_all(&syncobj->wq); return 0; @@ -294,7 +297,7 @@ static void drm_syncobj_garbage_collection(struct drm_syncobj *syncobj) { struct drm_syncobj_signal_pt *signal_pt, *tmp, *tail_pt; - spin_lock(&syncobj->lock); + spin_lock(&syncobj->pt_lock); tail_pt = list_last_entry(&syncobj->signal_pt_list, struct drm_syncobj_signal_pt, list); @@ -315,7 +318,7 @@ static void drm_syncobj_garbage_collection(struct drm_syncobj *syncobj) } } - spin_unlock(&syncobj->lock); + spin_unlock(&syncobj->pt_lock); } /** * drm_syncobj_replace_fence - replace fence in a sync object. @@ -344,13 +347,14 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, drm_syncobj_create_signal_pt(syncobj, fence, pt_value); if (fence) { struct drm_syncobj_cb *cur, *tmp; + LIST_HEAD(cb_list); - spin_lock(&syncobj->lock); + mutex_lock(&syncobj->cb_mutex); list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) { list_del_init(&cur->node); cur->func(syncobj, cur); } - spin_unlock(&syncobj->lock); + mutex_unlock(&syncobj->cb_mutex); } } EXPORT_SYMBOL(drm_syncobj_replace_fence); @@ -386,11 +390,11 @@ drm_syncobj_point_get(struct drm_syncobj *syncobj, u64 point, u64 flags, if (ret < 0) return ret; } - spin_lock(&syncobj->lock); + spin_lock(&syncobj->pt_lock); *fence = drm_syncobj_find_signal_pt_for_point(syncobj, point); if (!*fence) ret = -EINVAL; - spin_unlock(&syncobj->lock); + spin_unlock(&syncobj->pt_lock); return ret; } @@ -500,7 +504,8 @@ int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags, kref_init(&syncobj->refcount); INIT_LIST_HEAD(&syncobj->cb_list); - spin_lock_init(&syncobj->lock); + spin_lock_init(&syncobj->pt_lock); + mutex_init(&syncobj->cb_mutex); if (flags & DRM_SYNCOBJ_CREATE_TYPE_TIMELINE) syncobj->type = DRM_SYNCOBJ_TYPE_TIMELINE; else diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h index 5e8c5c027e09..29244cbcd05e 100644 --- a/include/drm/drm_syncobj.h +++ b/include/drm/drm_syncobj.h @@ -75,9 +75,13 @@ struct drm_syncobj { */ struct list_head cb_list; /** - * @lock: Protects syncobj list and write-locks &fence. + * @pt_lock: Protects pt list. */ - spinlock_t lock; + spinlock_t pt_lock; + /** + * @cb_mutex: Protects syncobj cb list. + */ + struct mutex cb_mutex; /** * @file: A file backing for this syncobj. */