Message ID | 20190401222635.25013-8-eric@anholt.net (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | DRM fence list helpers, V3D CSD support. | expand |
On Tue, Apr 2, 2019 at 6:26 AM Eric Anholt <eric@anholt.net> wrote: > > I haven't tested this, but it's a pretty direct port of what I did for > v3d. > > Signed-off-by: Eric Anholt <eric@anholt.net> > --- > drivers/gpu/drm/lima/lima_gem.c | 37 +---------------- > drivers/gpu/drm/lima/lima_sched.c | 66 ++++++------------------------- > drivers/gpu/drm/lima/lima_sched.h | 6 +-- > 3 files changed, 16 insertions(+), 93 deletions(-) > > diff --git a/drivers/gpu/drm/lima/lima_gem.c b/drivers/gpu/drm/lima/lima_gem.c > index 2d3cf96f6c58..8f80286c80b4 100644 > --- a/drivers/gpu/drm/lima/lima_gem.c > +++ b/drivers/gpu/drm/lima/lima_gem.c > @@ -144,40 +144,7 @@ static int lima_gem_sync_bo(struct lima_sched_task *task, struct lima_bo *bo, > if (explicit) > return 0; > > - /* implicit sync use bo fence in resv obj */ > - if (write) { > - unsigned nr_fences; > - struct dma_fence **fences; > - int i; > - > - err = reservation_object_get_fences_rcu( > - bo->gem.resv, NULL, &nr_fences, &fences); > - if (err || !nr_fences) > - return err; > - > - for (i = 0; i < nr_fences; i++) { > - err = lima_sched_task_add_dep(task, fences[i]); > - if (err) > - break; > - } > - > - /* for error case free remaining fences */ > - for ( ; i < nr_fences; i++) > - dma_fence_put(fences[i]); > - > - kfree(fences); > - } else { > - struct dma_fence *fence; > - > - fence = reservation_object_get_excl_rcu(bo->gem.resv); > - if (fence) { > - err = lima_sched_task_add_dep(task, fence); > - if (err) > - dma_fence_put(fence); > - } > - } > - > - return err; > + return drm_gem_fence_array_add_implicit(&task->deps, &bo->gem, write); > } > > static int lima_gem_lock_bos(struct lima_bo **bos, u32 nr_bos, > @@ -250,7 +217,7 @@ static int lima_gem_add_deps(struct drm_file *file, struct lima_submit *submit) > if (err) > return err; > > - err = lima_sched_task_add_dep(submit->task, fence); > + err = drm_gem_fence_array_add(&submit->task->deps, fence); > if (err) { > dma_fence_put(fence); > return err; > diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c > index 97bd9c1deb87..e253d031fb3d 100644 > --- a/drivers/gpu/drm/lima/lima_sched.c > +++ b/drivers/gpu/drm/lima/lima_sched.c > @@ -3,6 +3,7 @@ > > #include <linux/kthread.h> > #include <linux/slab.h> > +#include <linux/xarray.h> > > #include "lima_drv.h" > #include "lima_sched.h" > @@ -126,19 +127,24 @@ int lima_sched_task_init(struct lima_sched_task *task, > > task->num_bos = num_bos; > task->vm = lima_vm_get(vm); > + > + xa_init_flags(&task->deps, XA_FLAGS_ALLOC); > + > return 0; > } > > void lima_sched_task_fini(struct lima_sched_task *task) > { > + struct dma_fence *fence; > + unsigned long index; > int i; > > drm_sched_job_cleanup(&task->base); > > - for (i = 0; i < task->num_dep; i++) > - dma_fence_put(task->dep[i]); > - > - kfree(task->dep); > + xa_for_each(&task->deps, index, fence) { > + dma_fence_put(fence); > + } > + xa_destroy(&task->deps); > > if (task->bos) { > for (i = 0; i < task->num_bos; i++) > @@ -149,42 +155,6 @@ void lima_sched_task_fini(struct lima_sched_task *task) > lima_vm_put(task->vm); > } > > -int lima_sched_task_add_dep(struct lima_sched_task *task, struct dma_fence *fence) > -{ > - int i, new_dep = 4; > - > - /* same context's fence is definitly earlier then this task */ > - if (fence->context == task->base.s_fence->finished.context) { > - dma_fence_put(fence); > - return 0; > - } Seems you dropped this check in the drm_gem_fence_array_add, no bug if we don't have this, but redundant fence will be added in the deps array. Maybe we can add a context parameter to drm_gem_fence_array_add and drm_gem_fence_array_add_implicit to filter out fences from same drm_sched_entity. Regards, Qiang > - > - if (task->dep && task->num_dep == task->max_dep) > - new_dep = task->max_dep * 2; > - > - if (task->max_dep < new_dep) { > - void *dep = krealloc(task->dep, sizeof(*task->dep) * new_dep, GFP_KERNEL); > - > - if (!dep) > - return -ENOMEM; > - > - task->max_dep = new_dep; > - task->dep = dep; > - } > - > - for (i = 0; i < task->num_dep; i++) { > - if (task->dep[i]->context == fence->context && > - dma_fence_is_later(fence, task->dep[i])) { > - dma_fence_put(task->dep[i]); > - task->dep[i] = fence; > - return 0; > - } > - } > - > - task->dep[task->num_dep++] = fence; > - return 0; > -} > - > int lima_sched_context_init(struct lima_sched_pipe *pipe, > struct lima_sched_context *context, > atomic_t *guilty) > @@ -213,21 +183,9 @@ static struct dma_fence *lima_sched_dependency(struct drm_sched_job *job, > struct drm_sched_entity *entity) > { > struct lima_sched_task *task = to_lima_task(job); > - int i; > - > - for (i = 0; i < task->num_dep; i++) { > - struct dma_fence *fence = task->dep[i]; > - > - if (!task->dep[i]) > - continue; > - > - task->dep[i] = NULL; > > - if (!dma_fence_is_signaled(fence)) > - return fence; > - > - dma_fence_put(fence); > - } > + if (!xa_empty(&task->deps)) > + return xa_erase(&task->deps, task->last_dep++); > > return NULL; > } > diff --git a/drivers/gpu/drm/lima/lima_sched.h b/drivers/gpu/drm/lima/lima_sched.h > index b017cfa7e327..928af91c1118 100644 > --- a/drivers/gpu/drm/lima/lima_sched.h > +++ b/drivers/gpu/drm/lima/lima_sched.h > @@ -14,9 +14,8 @@ struct lima_sched_task { > struct lima_vm *vm; > void *frame; > > - struct dma_fence **dep; > - int num_dep; > - int max_dep; > + struct xarray deps; > + unsigned long last_dep; > > struct lima_bo **bos; > int num_bos; > @@ -78,7 +77,6 @@ int lima_sched_task_init(struct lima_sched_task *task, > struct lima_bo **bos, int num_bos, > struct lima_vm *vm); > void lima_sched_task_fini(struct lima_sched_task *task); > -int lima_sched_task_add_dep(struct lima_sched_task *task, struct dma_fence *fence); > > int lima_sched_context_init(struct lima_sched_pipe *pipe, > struct lima_sched_context *context, > -- > 2.20.1 >
Qiang Yu <yuq825@gmail.com> writes: > On Tue, Apr 2, 2019 at 6:26 AM Eric Anholt <eric@anholt.net> wrote: >> >> I haven't tested this, but it's a pretty direct port of what I did for >> v3d. >> >> Signed-off-by: Eric Anholt <eric@anholt.net> >> --- >> drivers/gpu/drm/lima/lima_gem.c | 37 +---------------- >> drivers/gpu/drm/lima/lima_sched.c | 66 ++++++------------------------- >> drivers/gpu/drm/lima/lima_sched.h | 6 +-- >> 3 files changed, 16 insertions(+), 93 deletions(-) >> >> diff --git a/drivers/gpu/drm/lima/lima_gem.c b/drivers/gpu/drm/lima/lima_gem.c >> index 2d3cf96f6c58..8f80286c80b4 100644 >> --- a/drivers/gpu/drm/lima/lima_gem.c >> +++ b/drivers/gpu/drm/lima/lima_gem.c >> @@ -144,40 +144,7 @@ static int lima_gem_sync_bo(struct lima_sched_task *task, struct lima_bo *bo, >> if (explicit) >> return 0; >> >> - /* implicit sync use bo fence in resv obj */ >> - if (write) { >> - unsigned nr_fences; >> - struct dma_fence **fences; >> - int i; >> - >> - err = reservation_object_get_fences_rcu( >> - bo->gem.resv, NULL, &nr_fences, &fences); >> - if (err || !nr_fences) >> - return err; >> - >> - for (i = 0; i < nr_fences; i++) { >> - err = lima_sched_task_add_dep(task, fences[i]); >> - if (err) >> - break; >> - } >> - >> - /* for error case free remaining fences */ >> - for ( ; i < nr_fences; i++) >> - dma_fence_put(fences[i]); >> - >> - kfree(fences); >> - } else { >> - struct dma_fence *fence; >> - >> - fence = reservation_object_get_excl_rcu(bo->gem.resv); >> - if (fence) { >> - err = lima_sched_task_add_dep(task, fence); >> - if (err) >> - dma_fence_put(fence); >> - } >> - } >> - >> - return err; >> + return drm_gem_fence_array_add_implicit(&task->deps, &bo->gem, write); >> } >> >> static int lima_gem_lock_bos(struct lima_bo **bos, u32 nr_bos, >> @@ -250,7 +217,7 @@ static int lima_gem_add_deps(struct drm_file *file, struct lima_submit *submit) >> if (err) >> return err; >> >> - err = lima_sched_task_add_dep(submit->task, fence); >> + err = drm_gem_fence_array_add(&submit->task->deps, fence); >> if (err) { >> dma_fence_put(fence); >> return err; >> diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c >> index 97bd9c1deb87..e253d031fb3d 100644 >> --- a/drivers/gpu/drm/lima/lima_sched.c >> +++ b/drivers/gpu/drm/lima/lima_sched.c >> @@ -3,6 +3,7 @@ >> >> #include <linux/kthread.h> >> #include <linux/slab.h> >> +#include <linux/xarray.h> >> >> #include "lima_drv.h" >> #include "lima_sched.h" >> @@ -126,19 +127,24 @@ int lima_sched_task_init(struct lima_sched_task *task, >> >> task->num_bos = num_bos; >> task->vm = lima_vm_get(vm); >> + >> + xa_init_flags(&task->deps, XA_FLAGS_ALLOC); >> + >> return 0; >> } >> >> void lima_sched_task_fini(struct lima_sched_task *task) >> { >> + struct dma_fence *fence; >> + unsigned long index; >> int i; >> >> drm_sched_job_cleanup(&task->base); >> >> - for (i = 0; i < task->num_dep; i++) >> - dma_fence_put(task->dep[i]); >> - >> - kfree(task->dep); >> + xa_for_each(&task->deps, index, fence) { >> + dma_fence_put(fence); >> + } >> + xa_destroy(&task->deps); >> >> if (task->bos) { >> for (i = 0; i < task->num_bos; i++) >> @@ -149,42 +155,6 @@ void lima_sched_task_fini(struct lima_sched_task *task) >> lima_vm_put(task->vm); >> } >> >> -int lima_sched_task_add_dep(struct lima_sched_task *task, struct dma_fence *fence) >> -{ >> - int i, new_dep = 4; >> - >> - /* same context's fence is definitly earlier then this task */ >> - if (fence->context == task->base.s_fence->finished.context) { >> - dma_fence_put(fence); >> - return 0; >> - } > > Seems you dropped this check in the drm_gem_fence_array_add, no bug if we > don't have this, but redundant fence will be added in the deps array. Maybe we > can add a context parameter to drm_gem_fence_array_add and > drm_gem_fence_array_add_implicit to filter out fences from same > drm_sched_entity. Does this feel important to you? To me, one extra slot in the array and a trip through the top of drm_sched_entity_add_dependency_cb() doesn't like it's feel worth making the API clumsier.
Indeed not that important, so patch 5&7 is: Reviewed-and-tested-by: Qiang Yu <yuq825@gmail.com> Regards, Qiang On Wed, Apr 3, 2019 at 12:57 AM Eric Anholt <eric@anholt.net> wrote: > > Qiang Yu <yuq825@gmail.com> writes: > > > On Tue, Apr 2, 2019 at 6:26 AM Eric Anholt <eric@anholt.net> wrote: > >> > >> I haven't tested this, but it's a pretty direct port of what I did for > >> v3d. > >> > >> Signed-off-by: Eric Anholt <eric@anholt.net> > >> --- > >> drivers/gpu/drm/lima/lima_gem.c | 37 +---------------- > >> drivers/gpu/drm/lima/lima_sched.c | 66 ++++++------------------------- > >> drivers/gpu/drm/lima/lima_sched.h | 6 +-- > >> 3 files changed, 16 insertions(+), 93 deletions(-) > >> > >> diff --git a/drivers/gpu/drm/lima/lima_gem.c b/drivers/gpu/drm/lima/lima_gem.c > >> index 2d3cf96f6c58..8f80286c80b4 100644 > >> --- a/drivers/gpu/drm/lima/lima_gem.c > >> +++ b/drivers/gpu/drm/lima/lima_gem.c > >> @@ -144,40 +144,7 @@ static int lima_gem_sync_bo(struct lima_sched_task *task, struct lima_bo *bo, > >> if (explicit) > >> return 0; > >> > >> - /* implicit sync use bo fence in resv obj */ > >> - if (write) { > >> - unsigned nr_fences; > >> - struct dma_fence **fences; > >> - int i; > >> - > >> - err = reservation_object_get_fences_rcu( > >> - bo->gem.resv, NULL, &nr_fences, &fences); > >> - if (err || !nr_fences) > >> - return err; > >> - > >> - for (i = 0; i < nr_fences; i++) { > >> - err = lima_sched_task_add_dep(task, fences[i]); > >> - if (err) > >> - break; > >> - } > >> - > >> - /* for error case free remaining fences */ > >> - for ( ; i < nr_fences; i++) > >> - dma_fence_put(fences[i]); > >> - > >> - kfree(fences); > >> - } else { > >> - struct dma_fence *fence; > >> - > >> - fence = reservation_object_get_excl_rcu(bo->gem.resv); > >> - if (fence) { > >> - err = lima_sched_task_add_dep(task, fence); > >> - if (err) > >> - dma_fence_put(fence); > >> - } > >> - } > >> - > >> - return err; > >> + return drm_gem_fence_array_add_implicit(&task->deps, &bo->gem, write); > >> } > >> > >> static int lima_gem_lock_bos(struct lima_bo **bos, u32 nr_bos, > >> @@ -250,7 +217,7 @@ static int lima_gem_add_deps(struct drm_file *file, struct lima_submit *submit) > >> if (err) > >> return err; > >> > >> - err = lima_sched_task_add_dep(submit->task, fence); > >> + err = drm_gem_fence_array_add(&submit->task->deps, fence); > >> if (err) { > >> dma_fence_put(fence); > >> return err; > >> diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c > >> index 97bd9c1deb87..e253d031fb3d 100644 > >> --- a/drivers/gpu/drm/lima/lima_sched.c > >> +++ b/drivers/gpu/drm/lima/lima_sched.c > >> @@ -3,6 +3,7 @@ > >> > >> #include <linux/kthread.h> > >> #include <linux/slab.h> > >> +#include <linux/xarray.h> > >> > >> #include "lima_drv.h" > >> #include "lima_sched.h" > >> @@ -126,19 +127,24 @@ int lima_sched_task_init(struct lima_sched_task *task, > >> > >> task->num_bos = num_bos; > >> task->vm = lima_vm_get(vm); > >> + > >> + xa_init_flags(&task->deps, XA_FLAGS_ALLOC); > >> + > >> return 0; > >> } > >> > >> void lima_sched_task_fini(struct lima_sched_task *task) > >> { > >> + struct dma_fence *fence; > >> + unsigned long index; > >> int i; > >> > >> drm_sched_job_cleanup(&task->base); > >> > >> - for (i = 0; i < task->num_dep; i++) > >> - dma_fence_put(task->dep[i]); > >> - > >> - kfree(task->dep); > >> + xa_for_each(&task->deps, index, fence) { > >> + dma_fence_put(fence); > >> + } > >> + xa_destroy(&task->deps); > >> > >> if (task->bos) { > >> for (i = 0; i < task->num_bos; i++) > >> @@ -149,42 +155,6 @@ void lima_sched_task_fini(struct lima_sched_task *task) > >> lima_vm_put(task->vm); > >> } > >> > >> -int lima_sched_task_add_dep(struct lima_sched_task *task, struct dma_fence *fence) > >> -{ > >> - int i, new_dep = 4; > >> - > >> - /* same context's fence is definitly earlier then this task */ > >> - if (fence->context == task->base.s_fence->finished.context) { > >> - dma_fence_put(fence); > >> - return 0; > >> - } > > > > Seems you dropped this check in the drm_gem_fence_array_add, no bug if we > > don't have this, but redundant fence will be added in the deps array. Maybe we > > can add a context parameter to drm_gem_fence_array_add and > > drm_gem_fence_array_add_implicit to filter out fences from same > > drm_sched_entity. > > Does this feel important to you? To me, one extra slot in the array and > a trip through the top of drm_sched_entity_add_dependency_cb() doesn't > like it's feel worth making the API clumsier.
Qiang Yu <yuq825@gmail.com> writes: > Indeed not that important, so patch 5&7 is: > Reviewed-and-tested-by: Qiang Yu <yuq825@gmail.com> Merged these two. Thanks for trying it out!
diff --git a/drivers/gpu/drm/lima/lima_gem.c b/drivers/gpu/drm/lima/lima_gem.c index 2d3cf96f6c58..8f80286c80b4 100644 --- a/drivers/gpu/drm/lima/lima_gem.c +++ b/drivers/gpu/drm/lima/lima_gem.c @@ -144,40 +144,7 @@ static int lima_gem_sync_bo(struct lima_sched_task *task, struct lima_bo *bo, if (explicit) return 0; - /* implicit sync use bo fence in resv obj */ - if (write) { - unsigned nr_fences; - struct dma_fence **fences; - int i; - - err = reservation_object_get_fences_rcu( - bo->gem.resv, NULL, &nr_fences, &fences); - if (err || !nr_fences) - return err; - - for (i = 0; i < nr_fences; i++) { - err = lima_sched_task_add_dep(task, fences[i]); - if (err) - break; - } - - /* for error case free remaining fences */ - for ( ; i < nr_fences; i++) - dma_fence_put(fences[i]); - - kfree(fences); - } else { - struct dma_fence *fence; - - fence = reservation_object_get_excl_rcu(bo->gem.resv); - if (fence) { - err = lima_sched_task_add_dep(task, fence); - if (err) - dma_fence_put(fence); - } - } - - return err; + return drm_gem_fence_array_add_implicit(&task->deps, &bo->gem, write); } static int lima_gem_lock_bos(struct lima_bo **bos, u32 nr_bos, @@ -250,7 +217,7 @@ static int lima_gem_add_deps(struct drm_file *file, struct lima_submit *submit) if (err) return err; - err = lima_sched_task_add_dep(submit->task, fence); + err = drm_gem_fence_array_add(&submit->task->deps, fence); if (err) { dma_fence_put(fence); return err; diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c index 97bd9c1deb87..e253d031fb3d 100644 --- a/drivers/gpu/drm/lima/lima_sched.c +++ b/drivers/gpu/drm/lima/lima_sched.c @@ -3,6 +3,7 @@ #include <linux/kthread.h> #include <linux/slab.h> +#include <linux/xarray.h> #include "lima_drv.h" #include "lima_sched.h" @@ -126,19 +127,24 @@ int lima_sched_task_init(struct lima_sched_task *task, task->num_bos = num_bos; task->vm = lima_vm_get(vm); + + xa_init_flags(&task->deps, XA_FLAGS_ALLOC); + return 0; } void lima_sched_task_fini(struct lima_sched_task *task) { + struct dma_fence *fence; + unsigned long index; int i; drm_sched_job_cleanup(&task->base); - for (i = 0; i < task->num_dep; i++) - dma_fence_put(task->dep[i]); - - kfree(task->dep); + xa_for_each(&task->deps, index, fence) { + dma_fence_put(fence); + } + xa_destroy(&task->deps); if (task->bos) { for (i = 0; i < task->num_bos; i++) @@ -149,42 +155,6 @@ void lima_sched_task_fini(struct lima_sched_task *task) lima_vm_put(task->vm); } -int lima_sched_task_add_dep(struct lima_sched_task *task, struct dma_fence *fence) -{ - int i, new_dep = 4; - - /* same context's fence is definitly earlier then this task */ - if (fence->context == task->base.s_fence->finished.context) { - dma_fence_put(fence); - return 0; - } - - if (task->dep && task->num_dep == task->max_dep) - new_dep = task->max_dep * 2; - - if (task->max_dep < new_dep) { - void *dep = krealloc(task->dep, sizeof(*task->dep) * new_dep, GFP_KERNEL); - - if (!dep) - return -ENOMEM; - - task->max_dep = new_dep; - task->dep = dep; - } - - for (i = 0; i < task->num_dep; i++) { - if (task->dep[i]->context == fence->context && - dma_fence_is_later(fence, task->dep[i])) { - dma_fence_put(task->dep[i]); - task->dep[i] = fence; - return 0; - } - } - - task->dep[task->num_dep++] = fence; - return 0; -} - int lima_sched_context_init(struct lima_sched_pipe *pipe, struct lima_sched_context *context, atomic_t *guilty) @@ -213,21 +183,9 @@ static struct dma_fence *lima_sched_dependency(struct drm_sched_job *job, struct drm_sched_entity *entity) { struct lima_sched_task *task = to_lima_task(job); - int i; - - for (i = 0; i < task->num_dep; i++) { - struct dma_fence *fence = task->dep[i]; - - if (!task->dep[i]) - continue; - - task->dep[i] = NULL; - if (!dma_fence_is_signaled(fence)) - return fence; - - dma_fence_put(fence); - } + if (!xa_empty(&task->deps)) + return xa_erase(&task->deps, task->last_dep++); return NULL; } diff --git a/drivers/gpu/drm/lima/lima_sched.h b/drivers/gpu/drm/lima/lima_sched.h index b017cfa7e327..928af91c1118 100644 --- a/drivers/gpu/drm/lima/lima_sched.h +++ b/drivers/gpu/drm/lima/lima_sched.h @@ -14,9 +14,8 @@ struct lima_sched_task { struct lima_vm *vm; void *frame; - struct dma_fence **dep; - int num_dep; - int max_dep; + struct xarray deps; + unsigned long last_dep; struct lima_bo **bos; int num_bos; @@ -78,7 +77,6 @@ int lima_sched_task_init(struct lima_sched_task *task, struct lima_bo **bos, int num_bos, struct lima_vm *vm); void lima_sched_task_fini(struct lima_sched_task *task); -int lima_sched_task_add_dep(struct lima_sched_task *task, struct dma_fence *fence); int lima_sched_context_init(struct lima_sched_pipe *pipe, struct lima_sched_context *context,
I haven't tested this, but it's a pretty direct port of what I did for v3d. Signed-off-by: Eric Anholt <eric@anholt.net> --- drivers/gpu/drm/lima/lima_gem.c | 37 +---------------- drivers/gpu/drm/lima/lima_sched.c | 66 ++++++------------------------- drivers/gpu/drm/lima/lima_sched.h | 6 +-- 3 files changed, 16 insertions(+), 93 deletions(-)