Message ID | 20241020204156.113853-1-christian.gmeiner@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/v3d: Add DRM_IOCTL_V3D_PERFMON_SET_GLOBAL | expand |
Hi Christian! Em 20/10/2024 17:41, Christian Gmeiner escreveu: > From: Christian Gmeiner <cgmeiner@igalia.com> > > This patch adds a new ioctl, DRM_IOCTL_V3D_PERFMON_SET_GLOBAL, which > allows the configuration of a global performance monitor (perfmon). > The global perfmon is used for all jobs, ensuring consistent performance > tracking across submissions. Usually we write in the imperative form: Add a new ioctl, ... > > Signed-off-by: Christian Gmeiner <cgmeiner@igalia.com> > --- > drivers/gpu/drm/v3d/v3d_drv.c | 3 ++ > drivers/gpu/drm/v3d/v3d_drv.h | 10 ++++ > drivers/gpu/drm/v3d/v3d_perfmon.c | 49 +++++++++++++++++++ > .../gpu/drm/v3d/v3d_performance_counters.h | 6 +++ > drivers/gpu/drm/v3d/v3d_sched.c | 10 +++- > include/uapi/drm/v3d_drm.h | 15 ++++++ > 6 files changed, 91 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c > index d7ff1f5fa481..f1753ee2af25 100644 > --- a/drivers/gpu/drm/v3d/v3d_drv.c > +++ b/drivers/gpu/drm/v3d/v3d_drv.c > @@ -214,6 +214,7 @@ static const struct drm_ioctl_desc v3d_drm_ioctls[] = { > DRM_IOCTL_DEF_DRV(V3D_PERFMON_GET_VALUES, v3d_perfmon_get_values_ioctl, DRM_RENDER_ALLOW), > DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CPU, v3d_submit_cpu_ioctl, DRM_RENDER_ALLOW | DRM_AUTH), > DRM_IOCTL_DEF_DRV(V3D_PERFMON_GET_COUNTER, v3d_perfmon_get_counter_ioctl, DRM_RENDER_ALLOW), > + DRM_IOCTL_DEF_DRV(V3D_PERFMON_SET_GLOBAL, v3d_perfmon_set_global_ioctl, DRM_RENDER_ALLOW), > }; > > static const struct drm_driver v3d_drm_driver = { > @@ -350,6 +351,8 @@ static int v3d_platform_drm_probe(struct platform_device *pdev) > if (ret) > goto drm_unregister; > > + atomic_set(&v3d->num_perfmon, 0); > + > return 0; > > drm_unregister: > diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h > index cf4b23369dc4..9491d730d99f 100644 > --- a/drivers/gpu/drm/v3d/v3d_drv.h > +++ b/drivers/gpu/drm/v3d/v3d_drv.h > @@ -61,6 +61,8 @@ struct v3d_queue_state { > struct v3d_stats stats; > }; > > +struct v3d_dev; > + Forward declarations go in the beginning of the file, along with the other ones: struct clk; struct platform_device; struct reset_control; +struct v3d_dev; > /* Performance monitor object. The perform lifetime is controlled by userspace > * using perfmon related ioctls. A perfmon can be attached to a submit_cl > * request, and when this is the case, HW perf counters will be activated just > @@ -68,6 +70,9 @@ struct v3d_queue_state { > * done. This way, only events related to a specific job will be counted. > */ > struct v3d_perfmon { > + /* Pointer back to v3d instance this perfmon belongs. */ > + struct v3d_dev *v3d; > + > /* Tracks the number of users of the perfmon, when this counter reaches > * zero the perfmon is destroyed. > */ > @@ -179,6 +184,9 @@ struct v3d_dev { > u32 num_allocated; > u32 pages_allocated; > } bo_stats; > + > + /* Keep track of current number of allocated perfmons. */ > + atomic_t num_perfmon; > }; > > static inline struct v3d_dev * > @@ -584,6 +592,8 @@ int v3d_perfmon_get_values_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_priv); > int v3d_perfmon_get_counter_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_priv); > +int v3d_perfmon_set_global_ioctl(struct drm_device *dev, void *data, > + struct drm_file *file_priv); > > /* v3d_sysfs.c */ > int v3d_sysfs_init(struct device *dev); [...] > diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h > index 87fc5bb0a61e..960d392d75a3 100644 > --- a/include/uapi/drm/v3d_drm.h > +++ b/include/uapi/drm/v3d_drm.h > @@ -43,6 +43,7 @@ extern "C" { > #define DRM_V3D_PERFMON_GET_VALUES 0x0a > #define DRM_V3D_SUBMIT_CPU 0x0b > #define DRM_V3D_PERFMON_GET_COUNTER 0x0c > +#define DRM_V3D_PERFMON_SET_GLOBAL 0x0d > > #define DRM_IOCTL_V3D_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl) > #define DRM_IOCTL_V3D_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo) > @@ -61,6 +62,8 @@ extern "C" { > #define DRM_IOCTL_V3D_SUBMIT_CPU DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CPU, struct drm_v3d_submit_cpu) > #define DRM_IOCTL_V3D_PERFMON_GET_COUNTER DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_GET_COUNTER, \ > struct drm_v3d_perfmon_get_counter) > +#define DRM_IOCTL_V3D_PERFMON_SET_GLOBAL DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_PERFMON_SET_GLOBAL, \ > + struct drm_v3d_perfmon_set_global) > > #define DRM_V3D_SUBMIT_CL_FLUSH_CACHE 0x01 > #define DRM_V3D_SUBMIT_EXTENSION 0x02 > @@ -765,6 +768,18 @@ struct drm_v3d_perfmon_get_counter { > __u8 reserved[7]; > }; > > +/** Using /** means that you are writting a kernel-doc comment [1], so make sure to describe each struct member, otherwise it's going to generate build warnings with W=1. > + * struct drm_v3d_perfmon_set_global - ioctl to define a > + * global performance counter that is used if a job has > + * not assigned one on its own. > + */ > + > +#define DRM_V3D_PERFMON_CLEAR_GLOBAL 0x0001 I would keep this define above the struct comment. > +struct drm_v3d_perfmon_set_global { > + __u32 flags; > + __u32 id; > +}; > + > #if defined(__cplusplus) > } > #endif [1] https://www.kernel.org/doc/html/latest/doc-guide/kernel-doc.html
On 10/20/24 17:41, Christian Gmeiner wrote: > From: Christian Gmeiner <cgmeiner@igalia.com> > > This patch adds a new ioctl, DRM_IOCTL_V3D_PERFMON_SET_GLOBAL, which > allows the configuration of a global performance monitor (perfmon). > The global perfmon is used for all jobs, ensuring consistent performance > tracking across submissions. > > Signed-off-by: Christian Gmeiner <cgmeiner@igalia.com> > --- > drivers/gpu/drm/v3d/v3d_drv.c | 3 ++ > drivers/gpu/drm/v3d/v3d_drv.h | 10 ++++ > drivers/gpu/drm/v3d/v3d_perfmon.c | 49 +++++++++++++++++++ > .../gpu/drm/v3d/v3d_performance_counters.h | 6 +++ > drivers/gpu/drm/v3d/v3d_sched.c | 10 +++- > include/uapi/drm/v3d_drm.h | 15 ++++++ > 6 files changed, 91 insertions(+), 2 deletions(-) > Hi Christian, I have one major issue with this approach: I don't think we should introduce a `global_perfmon` in `struct v3d_perfmon_info`. `struct v3d_perfmon_info` was created to store information about the counters, such as total number of perfcnts supported and the description of the counters. I believe you should use `active_perfmon` in your implementation and don't create `global_perfmon`. This is going to make the code less tricky to understand and it's going to make sure that the hardware inner working is transparent in software. Only one perfmon can be active in a given moment of time, therefore, let's use `active_perfmon` to represent it. I couple more things came to my attention. First, I don't think we need to limit the creation of other perfmons. We can create perfmons and don't use it for a while. We only need to make sure that the user can't attach perfmons to jobs, when the global perfmon is enabled. For sure, if we go through this strategy, there is no need to have a count of all the perfmons that V3D has. I would prefer to treat the global perfmon as a state. Ideally, we would enable and disable this state through the IOCTL. One last thing is: don't forget to stop the perfmons when you don't use it anymore :) Best Regards, - Maíra
Hi Maíra, > > I have one major issue with this approach: I don't think we should > introduce a `global_perfmon` in `struct v3d_perfmon_info`. `struct > v3d_perfmon_info` was created to store information about the counters, > such as total number of perfcnts supported and the description of the > counters. > Ah okay.. got the idea of global_perfmon. > > I believe you should use `active_perfmon` in your implementation and > don't create `global_perfmon`. This is going to make the code less > tricky to understand and it's going to make sure that the hardware inner > working is transparent in software. > > > Only one perfmon can be active in a given moment of time, therefore, > let's use `active_perfmon` to represent it. > Relying solely on active_perfmon makes the code hard to follow. I need at least a flag to indicate whether we are in global perfmon mode. > I couple more things came to my attention. First, I don't think we need > to limit the creation of other perfmons. We can create perfmons and > don't use it for a while. We only need to make sure that the user can't > attach perfmons to jobs, when the global perfmon is enabled. > > For sure, if we go through this strategy, there is no need to have a > count of all the perfmons that V3D has. > That is a fantastic idea. > I would prefer to treat the global perfmon as a state. Ideally, we would > enable and disable this state through the IOCTL. > > One last thing is: don't forget to stop the perfmons when you don't use > it anymore :) > I've sent v2 of this patch and hope I've addressed all your comments.
Hi André > > Em 20/10/2024 17:41, Christian Gmeiner escreveu: > > From: Christian Gmeiner <cgmeiner@igalia.com> > > > > This patch adds a new ioctl, DRM_IOCTL_V3D_PERFMON_SET_GLOBAL, which > > allows the configuration of a global performance monitor (perfmon). > > The global perfmon is used for all jobs, ensuring consistent performance > > tracking across submissions. > > Usually we write in the imperative form: > > Add a new ioctl, ... > I switched to imperative from v2. > > > > Signed-off-by: Christian Gmeiner <cgmeiner@igalia.com> > > --- > > drivers/gpu/drm/v3d/v3d_drv.c | 3 ++ > > drivers/gpu/drm/v3d/v3d_drv.h | 10 ++++ > > drivers/gpu/drm/v3d/v3d_perfmon.c | 49 +++++++++++++++++++ > > .../gpu/drm/v3d/v3d_performance_counters.h | 6 +++ > > drivers/gpu/drm/v3d/v3d_sched.c | 10 +++- > > include/uapi/drm/v3d_drm.h | 15 ++++++ > > 6 files changed, 91 insertions(+), 2 deletions(-) > > > > diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c > > index d7ff1f5fa481..f1753ee2af25 100644 > > --- a/drivers/gpu/drm/v3d/v3d_drv.c > > +++ b/drivers/gpu/drm/v3d/v3d_drv.c > > @@ -214,6 +214,7 @@ static const struct drm_ioctl_desc v3d_drm_ioctls[] = { > > DRM_IOCTL_DEF_DRV(V3D_PERFMON_GET_VALUES, v3d_perfmon_get_values_ioctl, DRM_RENDER_ALLOW), > > DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CPU, v3d_submit_cpu_ioctl, DRM_RENDER_ALLOW | DRM_AUTH), > > DRM_IOCTL_DEF_DRV(V3D_PERFMON_GET_COUNTER, v3d_perfmon_get_counter_ioctl, DRM_RENDER_ALLOW), > > + DRM_IOCTL_DEF_DRV(V3D_PERFMON_SET_GLOBAL, v3d_perfmon_set_global_ioctl, DRM_RENDER_ALLOW), > > }; > > > > static const struct drm_driver v3d_drm_driver = { > > @@ -350,6 +351,8 @@ static int v3d_platform_drm_probe(struct platform_device *pdev) > > if (ret) > > goto drm_unregister; > > > > + atomic_set(&v3d->num_perfmon, 0); > > + > > return 0; > > > > drm_unregister: > > diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h > > index cf4b23369dc4..9491d730d99f 100644 > > --- a/drivers/gpu/drm/v3d/v3d_drv.h > > +++ b/drivers/gpu/drm/v3d/v3d_drv.h > > @@ -61,6 +61,8 @@ struct v3d_queue_state { > > struct v3d_stats stats; > > }; > > > > +struct v3d_dev; > > + > > Forward declarations go in the beginning of the file, along with the > other ones: > > struct clk; > struct platform_device; > struct reset_control; > +struct v3d_dev; > I am happy that I do not need this in v2 anymore. > > /* Performance monitor object. The perform lifetime is controlled by userspace > > * using perfmon related ioctls. A perfmon can be attached to a submit_cl > > * request, and when this is the case, HW perf counters will be activated just > > @@ -68,6 +70,9 @@ struct v3d_queue_state { > > * done. This way, only events related to a specific job will be counted. > > */ > > struct v3d_perfmon { > > + /* Pointer back to v3d instance this perfmon belongs. */ > > + struct v3d_dev *v3d; > > + > > /* Tracks the number of users of the perfmon, when this counter reaches > > * zero the perfmon is destroyed. > > */ > > @@ -179,6 +184,9 @@ struct v3d_dev { > > u32 num_allocated; > > u32 pages_allocated; > > } bo_stats; > > + > > + /* Keep track of current number of allocated perfmons. */ > > + atomic_t num_perfmon; > > }; > > > > static inline struct v3d_dev * > > @@ -584,6 +592,8 @@ int v3d_perfmon_get_values_ioctl(struct drm_device *dev, void *data, > > struct drm_file *file_priv); > > int v3d_perfmon_get_counter_ioctl(struct drm_device *dev, void *data, > > struct drm_file *file_priv); > > +int v3d_perfmon_set_global_ioctl(struct drm_device *dev, void *data, > > + struct drm_file *file_priv); > > > > /* v3d_sysfs.c */ > > int v3d_sysfs_init(struct device *dev); > > [...] > > > diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h > > index 87fc5bb0a61e..960d392d75a3 100644 > > --- a/include/uapi/drm/v3d_drm.h > > +++ b/include/uapi/drm/v3d_drm.h > > @@ -43,6 +43,7 @@ extern "C" { > > #define DRM_V3D_PERFMON_GET_VALUES 0x0a > > #define DRM_V3D_SUBMIT_CPU 0x0b > > #define DRM_V3D_PERFMON_GET_COUNTER 0x0c > > +#define DRM_V3D_PERFMON_SET_GLOBAL 0x0d > > > > #define DRM_IOCTL_V3D_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl) > > #define DRM_IOCTL_V3D_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo) > > @@ -61,6 +62,8 @@ extern "C" { > > #define DRM_IOCTL_V3D_SUBMIT_CPU DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CPU, struct drm_v3d_submit_cpu) > > #define DRM_IOCTL_V3D_PERFMON_GET_COUNTER DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_GET_COUNTER, \ > > struct drm_v3d_perfmon_get_counter) > > +#define DRM_IOCTL_V3D_PERFMON_SET_GLOBAL DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_PERFMON_SET_GLOBAL, \ > > + struct drm_v3d_perfmon_set_global) > > > > #define DRM_V3D_SUBMIT_CL_FLUSH_CACHE 0x01 > > #define DRM_V3D_SUBMIT_EXTENSION 0x02 > > @@ -765,6 +768,18 @@ struct drm_v3d_perfmon_get_counter { > > __u8 reserved[7]; > > }; > > > > +/** > > Using /** means that you are writting a kernel-doc comment [1], so make > sure to describe each struct member, otherwise it's going to generate > build warnings with W=1. > Learned something new - thanks for sharing. > > + * struct drm_v3d_perfmon_set_global - ioctl to define a > > + * global performance counter that is used if a job has > > + * not assigned one on its own. > > + */ > > + > > +#define DRM_V3D_PERFMON_CLEAR_GLOBAL 0x0001 > > I would keep this define above the struct comment. > Sure .. have done it in v2 of the patch. > > +struct drm_v3d_perfmon_set_global { > > + __u32 flags; > > + __u32 id; > > +}; > > + > > #if defined(__cplusplus) > > } > > #endif > > [1] https://www.kernel.org/doc/html/latest/doc-guide/kernel-doc.html >
diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c index d7ff1f5fa481..f1753ee2af25 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.c +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -214,6 +214,7 @@ static const struct drm_ioctl_desc v3d_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(V3D_PERFMON_GET_VALUES, v3d_perfmon_get_values_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CPU, v3d_submit_cpu_ioctl, DRM_RENDER_ALLOW | DRM_AUTH), DRM_IOCTL_DEF_DRV(V3D_PERFMON_GET_COUNTER, v3d_perfmon_get_counter_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(V3D_PERFMON_SET_GLOBAL, v3d_perfmon_set_global_ioctl, DRM_RENDER_ALLOW), }; static const struct drm_driver v3d_drm_driver = { @@ -350,6 +351,8 @@ static int v3d_platform_drm_probe(struct platform_device *pdev) if (ret) goto drm_unregister; + atomic_set(&v3d->num_perfmon, 0); + return 0; drm_unregister: diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index cf4b23369dc4..9491d730d99f 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -61,6 +61,8 @@ struct v3d_queue_state { struct v3d_stats stats; }; +struct v3d_dev; + /* Performance monitor object. The perform lifetime is controlled by userspace * using perfmon related ioctls. A perfmon can be attached to a submit_cl * request, and when this is the case, HW perf counters will be activated just @@ -68,6 +70,9 @@ struct v3d_queue_state { * done. This way, only events related to a specific job will be counted. */ struct v3d_perfmon { + /* Pointer back to v3d instance this perfmon belongs. */ + struct v3d_dev *v3d; + /* Tracks the number of users of the perfmon, when this counter reaches * zero the perfmon is destroyed. */ @@ -179,6 +184,9 @@ struct v3d_dev { u32 num_allocated; u32 pages_allocated; } bo_stats; + + /* Keep track of current number of allocated perfmons. */ + atomic_t num_perfmon; }; static inline struct v3d_dev * @@ -584,6 +592,8 @@ int v3d_perfmon_get_values_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int v3d_perfmon_get_counter_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int v3d_perfmon_set_global_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); /* v3d_sysfs.c */ int v3d_sysfs_init(struct device *dev); diff --git a/drivers/gpu/drm/v3d/v3d_perfmon.c b/drivers/gpu/drm/v3d/v3d_perfmon.c index 156be13ab2ef..15ccdbf2c4d6 100644 --- a/drivers/gpu/drm/v3d/v3d_perfmon.c +++ b/drivers/gpu/drm/v3d/v3d_perfmon.c @@ -221,6 +221,7 @@ void v3d_perfmon_get(struct v3d_perfmon *perfmon) void v3d_perfmon_put(struct v3d_perfmon *perfmon) { if (perfmon && refcount_dec_and_test(&perfmon->refcnt)) { + atomic_dec(&perfmon->v3d->num_perfmon); mutex_destroy(&perfmon->lock); kfree(perfmon); } @@ -312,6 +313,9 @@ static int v3d_perfmon_idr_del(int id, void *elem, void *data) if (perfmon == v3d->active_perfmon) v3d_perfmon_stop(v3d, perfmon, false); + /* If this perfmon is global one, set global_perfmon to NULL */ + cmpxchg(&v3d->perfmon_info.global_perfmon, perfmon, NULL); + v3d_perfmon_put(perfmon); return 0; @@ -349,6 +353,12 @@ int v3d_perfmon_create_ioctl(struct drm_device *dev, void *data, return -EINVAL; } + /* If there is a global perfmon. block the creation of perfmons. */ + if (v3d->perfmon_info.global_perfmon) { + dev_info_ratelimited(dev->dev, "global perfmon is active\n"); + return -EAGAIN; + } + perfmon = kzalloc(struct_size(perfmon, values, req->ncounters), GFP_KERNEL); if (!perfmon) @@ -358,6 +368,7 @@ int v3d_perfmon_create_ioctl(struct drm_device *dev, void *data, perfmon->counters[i] = req->counters[i]; perfmon->ncounters = req->ncounters; + perfmon->v3d = v3d; refcount_set(&perfmon->refcnt, 1); mutex_init(&perfmon->lock); @@ -373,6 +384,7 @@ int v3d_perfmon_create_ioctl(struct drm_device *dev, void *data, return ret; } + atomic_inc(&perfmon->v3d->num_perfmon); req->id = ret; return 0; @@ -451,3 +463,40 @@ int v3d_perfmon_get_counter_ioctl(struct drm_device *dev, void *data, return 0; } + +int v3d_perfmon_set_global_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct v3d_file_priv *v3d_priv = file_priv->driver_priv; + struct drm_v3d_perfmon_set_global *req = data; + struct v3d_dev *v3d = to_v3d_dev(dev); + struct v3d_perfmon *perfmon; + + if (req->flags & ~DRM_V3D_PERFMON_CLEAR_GLOBAL) + return -EINVAL; + + /* If the request is to clear the global performance monitor. */ + if (req->flags & DRM_V3D_PERFMON_CLEAR_GLOBAL) { + if (cmpxchg(&v3d->perfmon_info.global_perfmon, v3d->perfmon_info.global_perfmon, NULL)) + return 0; + else + return -EINVAL; + } + + guard(mutex)(&v3d_priv->perfmon.lock); + + perfmon = idr_find(&v3d_priv->perfmon.idr, req->id); + if (!perfmon) + return -EINVAL; + + /* Only permit this ioctl(..) if there is exactly one perfmon - the one + * we want to make global. + */ + if (atomic_read(&v3d->num_perfmon) != 1) + return -EPERM; + + if (cmpxchg(&v3d->perfmon_info.global_perfmon, NULL, perfmon)) + return -EBUSY; + + return 0; +} diff --git a/drivers/gpu/drm/v3d/v3d_performance_counters.h b/drivers/gpu/drm/v3d/v3d_performance_counters.h index d919a2fc9449..3d1c64eeb2da 100644 --- a/drivers/gpu/drm/v3d/v3d_performance_counters.h +++ b/drivers/gpu/drm/v3d/v3d_performance_counters.h @@ -30,6 +30,12 @@ struct v3d_perfmon_info { * Array of counters valid for the platform. */ const struct v3d_perf_counter_desc *counters; + + /* + * For some performance analysis tool in user space, we need + * to use one global configured perfmon for all jobs. + */ + struct v3d_perfmon *global_perfmon; }; #endif diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 08d2a2739582..686248d15329 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -120,11 +120,17 @@ v3d_cpu_job_free(struct drm_sched_job *sched_job) static void v3d_switch_perfmon(struct v3d_dev *v3d, struct v3d_job *job) { + struct v3d_perfmon *perfmon = v3d->perfmon_info.global_perfmon; + + if (!perfmon) + perfmon = job->perfmon; + if (job->perfmon != v3d->active_perfmon) v3d_perfmon_stop(v3d, v3d->active_perfmon, true); - if (job->perfmon && v3d->active_perfmon != job->perfmon) - v3d_perfmon_start(v3d, job->perfmon); + if (perfmon && v3d->active_perfmon != perfmon) + v3d_perfmon_start(v3d, perfmon); + } static void diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h index 87fc5bb0a61e..960d392d75a3 100644 --- a/include/uapi/drm/v3d_drm.h +++ b/include/uapi/drm/v3d_drm.h @@ -43,6 +43,7 @@ extern "C" { #define DRM_V3D_PERFMON_GET_VALUES 0x0a #define DRM_V3D_SUBMIT_CPU 0x0b #define DRM_V3D_PERFMON_GET_COUNTER 0x0c +#define DRM_V3D_PERFMON_SET_GLOBAL 0x0d #define DRM_IOCTL_V3D_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl) #define DRM_IOCTL_V3D_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo) @@ -61,6 +62,8 @@ extern "C" { #define DRM_IOCTL_V3D_SUBMIT_CPU DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CPU, struct drm_v3d_submit_cpu) #define DRM_IOCTL_V3D_PERFMON_GET_COUNTER DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_GET_COUNTER, \ struct drm_v3d_perfmon_get_counter) +#define DRM_IOCTL_V3D_PERFMON_SET_GLOBAL DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_PERFMON_SET_GLOBAL, \ + struct drm_v3d_perfmon_set_global) #define DRM_V3D_SUBMIT_CL_FLUSH_CACHE 0x01 #define DRM_V3D_SUBMIT_EXTENSION 0x02 @@ -765,6 +768,18 @@ struct drm_v3d_perfmon_get_counter { __u8 reserved[7]; }; +/** + * struct drm_v3d_perfmon_set_global - ioctl to define a + * global performance counter that is used if a job has + * not assigned one on its own. + */ + +#define DRM_V3D_PERFMON_CLEAR_GLOBAL 0x0001 +struct drm_v3d_perfmon_set_global { + __u32 flags; + __u32 id; +}; + #if defined(__cplusplus) } #endif