Message ID | 20241031205140.541907-1-christian.gmeiner@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v2] drm/v3d: Add DRM_IOCTL_V3D_PERFMON_SET_GLOBAL | expand |
Hi Christian, Thanks for your patch! Just some small nits. On 31/10/24 17:51, Christian Gmeiner wrote: > From: Christian Gmeiner <cgmeiner@igalia.com> > > Add a new ioctl, DRM_IOCTL_V3D_PERFMON_SET_GLOBAL, to allow > configuration of a global performance monitor (perfmon). > Use the global perfmon for all jobs to ensure consistent > performance tracking across submissions. I'd mention how this feature can be useful for user-space (for example, the implementation of a Perfetto datasource). > > Signed-off-by: Christian Gmeiner <cgmeiner@igalia.com> > > --- > Changes in v2: > - Reworked commit message. > - Removed num_perfmon counter for tracking perfmon allocations. > - Allowing allocation of perfmons when the global perfmon is active. > - Return -EAGAIN for submissions with a per job perfmon if the global perfmon is active. > --- > drivers/gpu/drm/v3d/v3d_drv.c | 1 + > drivers/gpu/drm/v3d/v3d_drv.h | 8 ++++++++ > drivers/gpu/drm/v3d/v3d_perfmon.c | 34 +++++++++++++++++++++++++++++++ > drivers/gpu/drm/v3d/v3d_sched.c | 14 ++++++++++--- > drivers/gpu/drm/v3d/v3d_submit.c | 10 +++++++++ > include/uapi/drm/v3d_drm.h | 15 ++++++++++++++ > 6 files changed, 79 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c > index d7ff1f5fa481..3c89f0daa5b8 100644 > --- a/drivers/gpu/drm/v3d/v3d_drv.c > +++ b/drivers/gpu/drm/v3d/v3d_drv.c > @@ -214,6 +214,7 @@ static const struct drm_ioctl_desc v3d_drm_ioctls[] = { > DRM_IOCTL_DEF_DRV(V3D_PERFMON_GET_VALUES, v3d_perfmon_get_values_ioctl, DRM_RENDER_ALLOW), > DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CPU, v3d_submit_cpu_ioctl, DRM_RENDER_ALLOW | DRM_AUTH), > DRM_IOCTL_DEF_DRV(V3D_PERFMON_GET_COUNTER, v3d_perfmon_get_counter_ioctl, DRM_RENDER_ALLOW), > + DRM_IOCTL_DEF_DRV(V3D_PERFMON_SET_GLOBAL, v3d_perfmon_set_global_ioctl, DRM_RENDER_ALLOW), > }; > > static const struct drm_driver v3d_drm_driver = { > diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h > index cf4b23369dc4..a0d920ec2b1d 100644 > --- a/drivers/gpu/drm/v3d/v3d_drv.h > +++ b/drivers/gpu/drm/v3d/v3d_drv.h > @@ -179,6 +179,12 @@ struct v3d_dev { > u32 num_allocated; > u32 pages_allocated; > } bo_stats; > + > + /* To support a performance analysis tool in user space, we require > + * a single, globally configured performance monitor (perfmon) for > + * all jobs. > + */ > + struct v3d_perfmon *global_perfmon; > }; > > static inline struct v3d_dev * > @@ -584,6 +590,8 @@ int v3d_perfmon_get_values_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_priv); > int v3d_perfmon_get_counter_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_priv); > +int v3d_perfmon_set_global_ioctl(struct drm_device *dev, void *data, > + struct drm_file *file_priv); > > /* v3d_sysfs.c */ > int v3d_sysfs_init(struct device *dev); > diff --git a/drivers/gpu/drm/v3d/v3d_perfmon.c b/drivers/gpu/drm/v3d/v3d_perfmon.c > index 156be13ab2ef..bf42303c292b 100644 > --- a/drivers/gpu/drm/v3d/v3d_perfmon.c > +++ b/drivers/gpu/drm/v3d/v3d_perfmon.c > @@ -312,6 +312,9 @@ static int v3d_perfmon_idr_del(int id, void *elem, void *data) > if (perfmon == v3d->active_perfmon) > v3d_perfmon_stop(v3d, perfmon, false); > > + /* If the global perfmon is being destroyed, set it to NULL */ > + cmpxchg(&v3d->global_perfmon, perfmon, NULL); > + > v3d_perfmon_put(perfmon); > > return 0; > @@ -451,3 +454,34 @@ int v3d_perfmon_get_counter_ioctl(struct drm_device *dev, void *data, > > return 0; > } [...] > index 87fc5bb0a61e..709724fe28e6 100644 > --- a/include/uapi/drm/v3d_drm.h > +++ b/include/uapi/drm/v3d_drm.h > @@ -43,6 +43,7 @@ extern "C" { > #define DRM_V3D_PERFMON_GET_VALUES 0x0a > #define DRM_V3D_SUBMIT_CPU 0x0b > #define DRM_V3D_PERFMON_GET_COUNTER 0x0c > +#define DRM_V3D_PERFMON_SET_GLOBAL 0x0d > > #define DRM_IOCTL_V3D_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl) > #define DRM_IOCTL_V3D_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo) > @@ -61,6 +62,8 @@ extern "C" { > #define DRM_IOCTL_V3D_SUBMIT_CPU DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CPU, struct drm_v3d_submit_cpu) > #define DRM_IOCTL_V3D_PERFMON_GET_COUNTER DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_GET_COUNTER, \ > struct drm_v3d_perfmon_get_counter) > +#define DRM_IOCTL_V3D_PERFMON_SET_GLOBAL DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_PERFMON_SET_GLOBAL, \ > + struct drm_v3d_perfmon_set_global) > > #define DRM_V3D_SUBMIT_CL_FLUSH_CACHE 0x01 > #define DRM_V3D_SUBMIT_EXTENSION 0x02 > @@ -765,6 +768,18 @@ struct drm_v3d_perfmon_get_counter { > __u8 reserved[7]; > }; > > +#define DRM_V3D_PERFMON_CLEAR_GLOBAL 0x0001 > + > +/* Please, use kernel-doc [1]. So, here is /** [1] https://docs.kernel.org/doc-guide/kernel-doc.html > + * struct drm_v3d_perfmon_set_global - ioctl to define a The colunm width is 80. > + * global performance counter that is used if a job has s/counter/monitor > + * not assigned one on its own. This description isn't really precise. When the global performance monitor is enabled, the job perfmon doesn't matter. Currently, we don't even allow a job perfmon to be submitted, but if we did, the global perfmon would be used even if the job had a perfmon. Best Regards, - Maíra > + */ > +struct drm_v3d_perfmon_set_global { > + __u32 flags; > + __u32 id; > +}; > + > #if defined(__cplusplus) > } > #endif
Hi Christian, On 31/10/24 17:51, Christian Gmeiner wrote: > From: Christian Gmeiner <cgmeiner@igalia.com> > > Add a new ioctl, DRM_IOCTL_V3D_PERFMON_SET_GLOBAL, to allow > configuration of a global performance monitor (perfmon). > Use the global perfmon for all jobs to ensure consistent > performance tracking across submissions. > > Signed-off-by: Christian Gmeiner <cgmeiner@igalia.com> > > --- > Changes in v2: > - Reworked commit message. > - Removed num_perfmon counter for tracking perfmon allocations. > - Allowing allocation of perfmons when the global perfmon is active. > - Return -EAGAIN for submissions with a per job perfmon if the global perfmon is active. > --- > drivers/gpu/drm/v3d/v3d_drv.c | 1 + > drivers/gpu/drm/v3d/v3d_drv.h | 8 ++++++++ > drivers/gpu/drm/v3d/v3d_perfmon.c | 34 +++++++++++++++++++++++++++++++ > drivers/gpu/drm/v3d/v3d_sched.c | 14 ++++++++++--- > drivers/gpu/drm/v3d/v3d_submit.c | 10 +++++++++ > include/uapi/drm/v3d_drm.h | 15 ++++++++++++++ > 6 files changed, 79 insertions(+), 3 deletions(-) > [...] > diff --git a/drivers/gpu/drm/v3d/v3d_perfmon.c b/drivers/gpu/drm/v3d/v3d_perfmon.c > index 156be13ab2ef..bf42303c292b 100644 > --- a/drivers/gpu/drm/v3d/v3d_perfmon.c > +++ b/drivers/gpu/drm/v3d/v3d_perfmon.c > @@ -312,6 +312,9 @@ static int v3d_perfmon_idr_del(int id, void *elem, void *data) > if (perfmon == v3d->active_perfmon) > v3d_perfmon_stop(v3d, perfmon, false); > > + /* If the global perfmon is being destroyed, set it to NULL */ > + cmpxchg(&v3d->global_perfmon, perfmon, NULL); > + > v3d_perfmon_put(perfmon); > > return 0; > @@ -451,3 +454,34 @@ int v3d_perfmon_get_counter_ioctl(struct drm_device *dev, void *data, > > return 0; > } > + > +int v3d_perfmon_set_global_ioctl(struct drm_device *dev, void *data, > + struct drm_file *file_priv) > +{ > + struct v3d_file_priv *v3d_priv = file_priv->driver_priv; > + struct drm_v3d_perfmon_set_global *req = data; > + struct v3d_dev *v3d = to_v3d_dev(dev); > + struct v3d_perfmon *perfmon; > + > + if (req->flags & ~DRM_V3D_PERFMON_CLEAR_GLOBAL) > + return -EINVAL; > + > + perfmon = v3d_perfmon_find(v3d_priv, req->id); > + if (!perfmon) > + return -EINVAL; > + > + /* If the request is to clear the global performance monitor */ > + if (req->flags & DRM_V3D_PERFMON_CLEAR_GLOBAL) { > + if (!v3d->global_perfmon) > + return -EINVAL; > + > + xchg(&v3d->global_perfmon, NULL); I'm reading the userspace code now and I think you need to call `v3d_perfmon_stop` here to make sure that the active perfmon is no longer the global perfmon. Best Regards, - Maíra > + > + return 0; > + } > + > + if (cmpxchg(&v3d->global_perfmon, NULL, perfmon)) > + return -EBUSY; > + > + return 0; > +} > diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c > index 08d2a2739582..38690740f593 100644 > --- a/drivers/gpu/drm/v3d/v3d_sched.c > +++ b/drivers/gpu/drm/v3d/v3d_sched.c > @@ -120,11 +120,19 @@ v3d_cpu_job_free(struct drm_sched_job *sched_job) > static void > v3d_switch_perfmon(struct v3d_dev *v3d, struct v3d_job *job) > { > - if (job->perfmon != v3d->active_perfmon) > + struct v3d_perfmon *perfmon = v3d->global_perfmon; > + > + if (!perfmon) > + perfmon = job->perfmon; > + > + if (perfmon == v3d->active_perfmon) > + return; > + > + if (perfmon != v3d->active_perfmon) > v3d_perfmon_stop(v3d, v3d->active_perfmon, true); > > - if (job->perfmon && v3d->active_perfmon != job->perfmon) > - v3d_perfmon_start(v3d, job->perfmon); > + if (perfmon && v3d->active_perfmon != perfmon) > + v3d_perfmon_start(v3d, perfmon); > } > > static void > diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c > index d607aa9c4ec2..9e439c9f0a93 100644 > --- a/drivers/gpu/drm/v3d/v3d_submit.c > +++ b/drivers/gpu/drm/v3d/v3d_submit.c > @@ -981,6 +981,11 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, > goto fail; > > if (args->perfmon_id) { > + if (v3d->global_perfmon) { > + ret = -EAGAIN; > + goto fail_perfmon; > + } > + > render->base.perfmon = v3d_perfmon_find(v3d_priv, > args->perfmon_id); > > @@ -1196,6 +1201,11 @@ v3d_submit_csd_ioctl(struct drm_device *dev, void *data, > goto fail; > > if (args->perfmon_id) { > + if (v3d->global_perfmon) { > + ret = -EAGAIN; > + goto fail_perfmon; > + } > + > job->base.perfmon = v3d_perfmon_find(v3d_priv, > args->perfmon_id); > if (!job->base.perfmon) { > diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h > index 87fc5bb0a61e..709724fe28e6 100644 > --- a/include/uapi/drm/v3d_drm.h > +++ b/include/uapi/drm/v3d_drm.h > @@ -43,6 +43,7 @@ extern "C" { > #define DRM_V3D_PERFMON_GET_VALUES 0x0a > #define DRM_V3D_SUBMIT_CPU 0x0b > #define DRM_V3D_PERFMON_GET_COUNTER 0x0c > +#define DRM_V3D_PERFMON_SET_GLOBAL 0x0d > > #define DRM_IOCTL_V3D_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl) > #define DRM_IOCTL_V3D_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo) > @@ -61,6 +62,8 @@ extern "C" { > #define DRM_IOCTL_V3D_SUBMIT_CPU DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CPU, struct drm_v3d_submit_cpu) > #define DRM_IOCTL_V3D_PERFMON_GET_COUNTER DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_GET_COUNTER, \ > struct drm_v3d_perfmon_get_counter) > +#define DRM_IOCTL_V3D_PERFMON_SET_GLOBAL DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_PERFMON_SET_GLOBAL, \ > + struct drm_v3d_perfmon_set_global) > > #define DRM_V3D_SUBMIT_CL_FLUSH_CACHE 0x01 > #define DRM_V3D_SUBMIT_EXTENSION 0x02 > @@ -765,6 +768,18 @@ struct drm_v3d_perfmon_get_counter { > __u8 reserved[7]; > }; > > +#define DRM_V3D_PERFMON_CLEAR_GLOBAL 0x0001 > + > +/* > + * struct drm_v3d_perfmon_set_global - ioctl to define a > + * global performance counter that is used if a job has > + * not assigned one on its own. > + */ > +struct drm_v3d_perfmon_set_global { > + __u32 flags; > + __u32 id; > +}; > + > #if defined(__cplusplus) > } > #endif
Hi Maíra, > > Thanks for your patch! Just some small nits. > Thanks for your review. > On 31/10/24 17:51, Christian Gmeiner wrote: > > From: Christian Gmeiner <cgmeiner@igalia.com> > > > > Add a new ioctl, DRM_IOCTL_V3D_PERFMON_SET_GLOBAL, to allow > > configuration of a global performance monitor (perfmon). > > Use the global perfmon for all jobs to ensure consistent > > performance tracking across submissions. > > I'd mention how this feature can be useful for user-space (for example, > the implementation of a Perfetto datasource). > That's a great idea .. will be done in V3. > > > > Signed-off-by: Christian Gmeiner <cgmeiner@igalia.com> > > > > --- > > Changes in v2: > > - Reworked commit message. > > - Removed num_perfmon counter for tracking perfmon allocations. > > - Allowing allocation of perfmons when the global perfmon is active. > > - Return -EAGAIN for submissions with a per job perfmon if the global perfmon is active. > > --- > > drivers/gpu/drm/v3d/v3d_drv.c | 1 + > > drivers/gpu/drm/v3d/v3d_drv.h | 8 ++++++++ > > drivers/gpu/drm/v3d/v3d_perfmon.c | 34 +++++++++++++++++++++++++++++++ > > drivers/gpu/drm/v3d/v3d_sched.c | 14 ++++++++++--- > > drivers/gpu/drm/v3d/v3d_submit.c | 10 +++++++++ > > include/uapi/drm/v3d_drm.h | 15 ++++++++++++++ > > 6 files changed, 79 insertions(+), 3 deletions(-) > > > > diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c > > index d7ff1f5fa481..3c89f0daa5b8 100644 > > --- a/drivers/gpu/drm/v3d/v3d_drv.c > > +++ b/drivers/gpu/drm/v3d/v3d_drv.c > > @@ -214,6 +214,7 @@ static const struct drm_ioctl_desc v3d_drm_ioctls[] = { > > DRM_IOCTL_DEF_DRV(V3D_PERFMON_GET_VALUES, v3d_perfmon_get_values_ioctl, DRM_RENDER_ALLOW), > > DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CPU, v3d_submit_cpu_ioctl, DRM_RENDER_ALLOW | DRM_AUTH), > > DRM_IOCTL_DEF_DRV(V3D_PERFMON_GET_COUNTER, v3d_perfmon_get_counter_ioctl, DRM_RENDER_ALLOW), > > + DRM_IOCTL_DEF_DRV(V3D_PERFMON_SET_GLOBAL, v3d_perfmon_set_global_ioctl, DRM_RENDER_ALLOW), > > }; > > > > static const struct drm_driver v3d_drm_driver = { > > diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h > > index cf4b23369dc4..a0d920ec2b1d 100644 > > --- a/drivers/gpu/drm/v3d/v3d_drv.h > > +++ b/drivers/gpu/drm/v3d/v3d_drv.h > > @@ -179,6 +179,12 @@ struct v3d_dev { > > u32 num_allocated; > > u32 pages_allocated; > > } bo_stats; > > + > > + /* To support a performance analysis tool in user space, we require > > + * a single, globally configured performance monitor (perfmon) for > > + * all jobs. > > + */ > > + struct v3d_perfmon *global_perfmon; > > }; > > > > static inline struct v3d_dev * > > @@ -584,6 +590,8 @@ int v3d_perfmon_get_values_ioctl(struct drm_device *dev, void *data, > > struct drm_file *file_priv); > > int v3d_perfmon_get_counter_ioctl(struct drm_device *dev, void *data, > > struct drm_file *file_priv); > > +int v3d_perfmon_set_global_ioctl(struct drm_device *dev, void *data, > > + struct drm_file *file_priv); > > > > /* v3d_sysfs.c */ > > int v3d_sysfs_init(struct device *dev); > > diff --git a/drivers/gpu/drm/v3d/v3d_perfmon.c b/drivers/gpu/drm/v3d/v3d_perfmon.c > > index 156be13ab2ef..bf42303c292b 100644 > > --- a/drivers/gpu/drm/v3d/v3d_perfmon.c > > +++ b/drivers/gpu/drm/v3d/v3d_perfmon.c > > @@ -312,6 +312,9 @@ static int v3d_perfmon_idr_del(int id, void *elem, void *data) > > if (perfmon == v3d->active_perfmon) > > v3d_perfmon_stop(v3d, perfmon, false); > > > > + /* If the global perfmon is being destroyed, set it to NULL */ > > + cmpxchg(&v3d->global_perfmon, perfmon, NULL); > > + > > v3d_perfmon_put(perfmon); > > > > return 0; > > @@ -451,3 +454,34 @@ int v3d_perfmon_get_counter_ioctl(struct drm_device *dev, void *data, > > > > return 0; > > } > > [...] > > > index 87fc5bb0a61e..709724fe28e6 100644 > > --- a/include/uapi/drm/v3d_drm.h > > +++ b/include/uapi/drm/v3d_drm.h > > @@ -43,6 +43,7 @@ extern "C" { > > #define DRM_V3D_PERFMON_GET_VALUES 0x0a > > #define DRM_V3D_SUBMIT_CPU 0x0b > > #define DRM_V3D_PERFMON_GET_COUNTER 0x0c > > +#define DRM_V3D_PERFMON_SET_GLOBAL 0x0d > > > > #define DRM_IOCTL_V3D_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl) > > #define DRM_IOCTL_V3D_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo) > > @@ -61,6 +62,8 @@ extern "C" { > > #define DRM_IOCTL_V3D_SUBMIT_CPU DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CPU, struct drm_v3d_submit_cpu) > > #define DRM_IOCTL_V3D_PERFMON_GET_COUNTER DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_GET_COUNTER, \ > > struct drm_v3d_perfmon_get_counter) > > +#define DRM_IOCTL_V3D_PERFMON_SET_GLOBAL DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_PERFMON_SET_GLOBAL, \ > > + struct drm_v3d_perfmon_set_global) > > > > #define DRM_V3D_SUBMIT_CL_FLUSH_CACHE 0x01 > > #define DRM_V3D_SUBMIT_EXTENSION 0x02 > > @@ -765,6 +768,18 @@ struct drm_v3d_perfmon_get_counter { > > __u8 reserved[7]; > > }; > > > > +#define DRM_V3D_PERFMON_CLEAR_GLOBAL 0x0001 > > + > > +/* > > Please, use kernel-doc [1]. So, here is /** > > [1] https://docs.kernel.org/doc-guide/kernel-doc.html > > > + * struct drm_v3d_perfmon_set_global - ioctl to define a > > The colunm width is 80. > > > + * global performance counter that is used if a job has > > s/counter/monitor > > > + * not assigned one on its own. > > This description isn't really precise. When the global performance > monitor is enabled, the job perfmon doesn't matter. Currently, we don't > even allow a job perfmon to be submitted, but if we did, the global > perfmon would be used even if the job had a perfmon. > I have rephrased this comment and should be more precise now.
Hi Maíra, > On 31/10/24 17:51, Christian Gmeiner wrote: > > From: Christian Gmeiner <cgmeiner@igalia.com> > > > > Add a new ioctl, DRM_IOCTL_V3D_PERFMON_SET_GLOBAL, to allow > > configuration of a global performance monitor (perfmon). > > Use the global perfmon for all jobs to ensure consistent > > performance tracking across submissions. > > > > Signed-off-by: Christian Gmeiner <cgmeiner@igalia.com> > > > > --- > > Changes in v2: > > - Reworked commit message. > > - Removed num_perfmon counter for tracking perfmon allocations. > > - Allowing allocation of perfmons when the global perfmon is active. > > - Return -EAGAIN for submissions with a per job perfmon if the global perfmon is active. > > --- > > drivers/gpu/drm/v3d/v3d_drv.c | 1 + > > drivers/gpu/drm/v3d/v3d_drv.h | 8 ++++++++ > > drivers/gpu/drm/v3d/v3d_perfmon.c | 34 +++++++++++++++++++++++++++++++ > > drivers/gpu/drm/v3d/v3d_sched.c | 14 ++++++++++--- > > drivers/gpu/drm/v3d/v3d_submit.c | 10 +++++++++ > > include/uapi/drm/v3d_drm.h | 15 ++++++++++++++ > > 6 files changed, 79 insertions(+), 3 deletions(-) > > > > [...] > > > diff --git a/drivers/gpu/drm/v3d/v3d_perfmon.c b/drivers/gpu/drm/v3d/v3d_perfmon.c > > index 156be13ab2ef..bf42303c292b 100644 > > --- a/drivers/gpu/drm/v3d/v3d_perfmon.c > > +++ b/drivers/gpu/drm/v3d/v3d_perfmon.c > > @@ -312,6 +312,9 @@ static int v3d_perfmon_idr_del(int id, void *elem, void *data) > > if (perfmon == v3d->active_perfmon) > > v3d_perfmon_stop(v3d, perfmon, false); > > > > + /* If the global perfmon is being destroyed, set it to NULL */ > > + cmpxchg(&v3d->global_perfmon, perfmon, NULL); > > + > > v3d_perfmon_put(perfmon); > > > > return 0; > > @@ -451,3 +454,34 @@ int v3d_perfmon_get_counter_ioctl(struct drm_device *dev, void *data, > > > > return 0; > > } > > + > > +int v3d_perfmon_set_global_ioctl(struct drm_device *dev, void *data, > > + struct drm_file *file_priv) > > +{ > > + struct v3d_file_priv *v3d_priv = file_priv->driver_priv; > > + struct drm_v3d_perfmon_set_global *req = data; > > + struct v3d_dev *v3d = to_v3d_dev(dev); > > + struct v3d_perfmon *perfmon; > > + > > + if (req->flags & ~DRM_V3D_PERFMON_CLEAR_GLOBAL) > > + return -EINVAL; > > + > > + perfmon = v3d_perfmon_find(v3d_priv, req->id); > > + if (!perfmon) > > + return -EINVAL; > > + > > + /* If the request is to clear the global performance monitor */ > > + if (req->flags & DRM_V3D_PERFMON_CLEAR_GLOBAL) { > > + if (!v3d->global_perfmon) > > + return -EINVAL; > > + > > + xchg(&v3d->global_perfmon, NULL); > > I'm reading the userspace code now and I think you need to call > `v3d_perfmon_stop` here to make sure that the active perfmon is no > longer the global perfmon. > I will add a `v3d_perfmon_stop` call to `v3d_perfmon_destroy_ioctl` as it seems a much better place.
diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c index d7ff1f5fa481..3c89f0daa5b8 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.c +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -214,6 +214,7 @@ static const struct drm_ioctl_desc v3d_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(V3D_PERFMON_GET_VALUES, v3d_perfmon_get_values_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CPU, v3d_submit_cpu_ioctl, DRM_RENDER_ALLOW | DRM_AUTH), DRM_IOCTL_DEF_DRV(V3D_PERFMON_GET_COUNTER, v3d_perfmon_get_counter_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(V3D_PERFMON_SET_GLOBAL, v3d_perfmon_set_global_ioctl, DRM_RENDER_ALLOW), }; static const struct drm_driver v3d_drm_driver = { diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index cf4b23369dc4..a0d920ec2b1d 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -179,6 +179,12 @@ struct v3d_dev { u32 num_allocated; u32 pages_allocated; } bo_stats; + + /* To support a performance analysis tool in user space, we require + * a single, globally configured performance monitor (perfmon) for + * all jobs. + */ + struct v3d_perfmon *global_perfmon; }; static inline struct v3d_dev * @@ -584,6 +590,8 @@ int v3d_perfmon_get_values_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int v3d_perfmon_get_counter_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int v3d_perfmon_set_global_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); /* v3d_sysfs.c */ int v3d_sysfs_init(struct device *dev); diff --git a/drivers/gpu/drm/v3d/v3d_perfmon.c b/drivers/gpu/drm/v3d/v3d_perfmon.c index 156be13ab2ef..bf42303c292b 100644 --- a/drivers/gpu/drm/v3d/v3d_perfmon.c +++ b/drivers/gpu/drm/v3d/v3d_perfmon.c @@ -312,6 +312,9 @@ static int v3d_perfmon_idr_del(int id, void *elem, void *data) if (perfmon == v3d->active_perfmon) v3d_perfmon_stop(v3d, perfmon, false); + /* If the global perfmon is being destroyed, set it to NULL */ + cmpxchg(&v3d->global_perfmon, perfmon, NULL); + v3d_perfmon_put(perfmon); return 0; @@ -451,3 +454,34 @@ int v3d_perfmon_get_counter_ioctl(struct drm_device *dev, void *data, return 0; } + +int v3d_perfmon_set_global_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct v3d_file_priv *v3d_priv = file_priv->driver_priv; + struct drm_v3d_perfmon_set_global *req = data; + struct v3d_dev *v3d = to_v3d_dev(dev); + struct v3d_perfmon *perfmon; + + if (req->flags & ~DRM_V3D_PERFMON_CLEAR_GLOBAL) + return -EINVAL; + + perfmon = v3d_perfmon_find(v3d_priv, req->id); + if (!perfmon) + return -EINVAL; + + /* If the request is to clear the global performance monitor */ + if (req->flags & DRM_V3D_PERFMON_CLEAR_GLOBAL) { + if (!v3d->global_perfmon) + return -EINVAL; + + xchg(&v3d->global_perfmon, NULL); + + return 0; + } + + if (cmpxchg(&v3d->global_perfmon, NULL, perfmon)) + return -EBUSY; + + return 0; +} diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 08d2a2739582..38690740f593 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -120,11 +120,19 @@ v3d_cpu_job_free(struct drm_sched_job *sched_job) static void v3d_switch_perfmon(struct v3d_dev *v3d, struct v3d_job *job) { - if (job->perfmon != v3d->active_perfmon) + struct v3d_perfmon *perfmon = v3d->global_perfmon; + + if (!perfmon) + perfmon = job->perfmon; + + if (perfmon == v3d->active_perfmon) + return; + + if (perfmon != v3d->active_perfmon) v3d_perfmon_stop(v3d, v3d->active_perfmon, true); - if (job->perfmon && v3d->active_perfmon != job->perfmon) - v3d_perfmon_start(v3d, job->perfmon); + if (perfmon && v3d->active_perfmon != perfmon) + v3d_perfmon_start(v3d, perfmon); } static void diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c index d607aa9c4ec2..9e439c9f0a93 100644 --- a/drivers/gpu/drm/v3d/v3d_submit.c +++ b/drivers/gpu/drm/v3d/v3d_submit.c @@ -981,6 +981,11 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, goto fail; if (args->perfmon_id) { + if (v3d->global_perfmon) { + ret = -EAGAIN; + goto fail_perfmon; + } + render->base.perfmon = v3d_perfmon_find(v3d_priv, args->perfmon_id); @@ -1196,6 +1201,11 @@ v3d_submit_csd_ioctl(struct drm_device *dev, void *data, goto fail; if (args->perfmon_id) { + if (v3d->global_perfmon) { + ret = -EAGAIN; + goto fail_perfmon; + } + job->base.perfmon = v3d_perfmon_find(v3d_priv, args->perfmon_id); if (!job->base.perfmon) { diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h index 87fc5bb0a61e..709724fe28e6 100644 --- a/include/uapi/drm/v3d_drm.h +++ b/include/uapi/drm/v3d_drm.h @@ -43,6 +43,7 @@ extern "C" { #define DRM_V3D_PERFMON_GET_VALUES 0x0a #define DRM_V3D_SUBMIT_CPU 0x0b #define DRM_V3D_PERFMON_GET_COUNTER 0x0c +#define DRM_V3D_PERFMON_SET_GLOBAL 0x0d #define DRM_IOCTL_V3D_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl) #define DRM_IOCTL_V3D_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo) @@ -61,6 +62,8 @@ extern "C" { #define DRM_IOCTL_V3D_SUBMIT_CPU DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CPU, struct drm_v3d_submit_cpu) #define DRM_IOCTL_V3D_PERFMON_GET_COUNTER DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_GET_COUNTER, \ struct drm_v3d_perfmon_get_counter) +#define DRM_IOCTL_V3D_PERFMON_SET_GLOBAL DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_PERFMON_SET_GLOBAL, \ + struct drm_v3d_perfmon_set_global) #define DRM_V3D_SUBMIT_CL_FLUSH_CACHE 0x01 #define DRM_V3D_SUBMIT_EXTENSION 0x02 @@ -765,6 +768,18 @@ struct drm_v3d_perfmon_get_counter { __u8 reserved[7]; }; +#define DRM_V3D_PERFMON_CLEAR_GLOBAL 0x0001 + +/* + * struct drm_v3d_perfmon_set_global - ioctl to define a + * global performance counter that is used if a job has + * not assigned one on its own. + */ +struct drm_v3d_perfmon_set_global { + __u32 flags; + __u32 id; +}; + #if defined(__cplusplus) } #endif