Message ID | 20170609102123.2417-3-christian.gmeiner@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Am Freitag, den 09.06.2017, 12:21 +0200 schrieb Christian Gmeiner: > Sadly we can not read any registers via command stream so we need > to extend the drm_etnaviv_gem_submit struct with performance monitor > requests. Those requests gets process before and/or after the actual > submitted command stream. > > The Vivante kernel driver has a special ioctl to read all perfmon > registers at once and return it. There is no connection between > a specifc command stream and the read perf values. > > Signed-off-by: Christian Gmeiner <christian.gmeiner@gmail.com> > --- > include/uapi/drm/etnaviv_drm.h | 16 ++++++++++++++++ > 1 file changed, 16 insertions(+) > > diff --git a/include/uapi/drm/etnaviv_drm.h > b/include/uapi/drm/etnaviv_drm.h > index 9e488a0..b67a7dd 100644 > --- a/include/uapi/drm/etnaviv_drm.h > +++ b/include/uapi/drm/etnaviv_drm.h > @@ -150,6 +150,19 @@ struct drm_etnaviv_gem_submit_bo { > __u64 presumed; /* in/out, presumed buffer address */ > }; > > +/* performance monitor request (pmr) */ > +#define ETNA_PM_PROCESS_PRE 0x0001 > +#define ETNA_PM_PROCESS_POST 0x0002 > +struct drm_etnaviv_gem_submit_pmr { > + __u32 flags; /* in, when to process request > (ETNA_PM_PROCESS_x) */ > + __u8 domain; /* in, pm domain */ > + __u8 signal; /* in, pm signal */ Are we sure that no domain will ever have more than 256 signals? > + __u16 pad[3]; Unnecessary large pad. A single u16 is enough to naturally align the next member. > + __u32 sequence; /* in, sequence number */ > + __u32 read_offset; /* in, offset from read_bo */ > + __u32 read_idx; /* in, index of read_bo buffer */ > +}; > + > /* Each cmdstream submit consists of a table of buffers involved, > and > * one or more cmdstream buffers. This allows for conditional > execution > * (context-restore), and IB buffers needed for per tile/bin draw > cmds. > @@ -175,6 +188,9 @@ struct drm_etnaviv_gem_submit { > __u64 stream; /* in, ptr to cmdstream */ > __u32 flags; /* in, mask of ETNA_SUBMIT_x */ > __s32 fence_fd; /* in/out, fence fd (see > ETNA_SUBMIT_FENCE_FD_x) */ > + __u64 pmrs; /* in, ptr to array of submit_pmr's */ > + __u32 nr_pmrs; /* in, number of submit_pmr's */ > + __u32 pad; This pad isn't needed. > }; > > /* The normal way to synchronize with the GPU is just to CPU_PREP on
2017-06-26 14:56 GMT+02:00 Lucas Stach <l.stach@pengutronix.de>: > Am Freitag, den 09.06.2017, 12:21 +0200 schrieb Christian Gmeiner: >> Sadly we can not read any registers via command stream so we need >> to extend the drm_etnaviv_gem_submit struct with performance monitor >> requests. Those requests gets process before and/or after the actual >> submitted command stream. >> >> The Vivante kernel driver has a special ioctl to read all perfmon >> registers at once and return it. There is no connection between >> a specifc command stream and the read perf values. >> >> Signed-off-by: Christian Gmeiner <christian.gmeiner@gmail.com> >> --- >> include/uapi/drm/etnaviv_drm.h | 16 ++++++++++++++++ >> 1 file changed, 16 insertions(+) >> >> diff --git a/include/uapi/drm/etnaviv_drm.h >> b/include/uapi/drm/etnaviv_drm.h >> index 9e488a0..b67a7dd 100644 >> --- a/include/uapi/drm/etnaviv_drm.h >> +++ b/include/uapi/drm/etnaviv_drm.h >> @@ -150,6 +150,19 @@ struct drm_etnaviv_gem_submit_bo { >> __u64 presumed; /* in/out, presumed buffer address */ >> }; >> >> +/* performance monitor request (pmr) */ >> +#define ETNA_PM_PROCESS_PRE 0x0001 >> +#define ETNA_PM_PROCESS_POST 0x0002 >> +struct drm_etnaviv_gem_submit_pmr { >> + __u32 flags; /* in, when to process request >> (ETNA_PM_PROCESS_x) */ >> + __u8 domain; /* in, pm domain */ >> + __u8 signal; /* in, pm signal */ > > Are we sure that no domain will ever have more than 256 signals? > My crystal ball is not working as expected so I don't know - but I will change it to a __u16. >> + __u16 pad[3]; > > Unnecessary large pad. A single u16 is enough to naturally align the > next member. > Correct. >> + __u32 sequence; /* in, sequence number */ >> + __u32 read_offset; /* in, offset from read_bo */ >> + __u32 read_idx; /* in, index of read_bo buffer */ >> +}; >> + >> /* Each cmdstream submit consists of a table of buffers involved, >> and >> * one or more cmdstream buffers. This allows for conditional >> execution >> * (context-restore), and IB buffers needed for per tile/bin draw >> cmds. >> @@ -175,6 +188,9 @@ struct drm_etnaviv_gem_submit { >> __u64 stream; /* in, ptr to cmdstream */ >> __u32 flags; /* in, mask of ETNA_SUBMIT_x */ >> __s32 fence_fd; /* in/out, fence fd (see >> ETNA_SUBMIT_FENCE_FD_x) */ >> + __u64 pmrs; /* in, ptr to array of submit_pmr's */ >> + __u32 nr_pmrs; /* in, number of submit_pmr's */ >> + __u32 pad; > > This pad isn't needed. > * Pad the entire struct to a multiple of 64-bits if the structure contains 64-bit types - the structure size will otherwise differ on 32-bit versus 64-bit. Having a different structure size hurts when passing arrays of structures to the kernel, or if the kernel checks the structure size, which e.g. the drm core does. $ pahole drivers/gpu/drm/etnaviv/etnaviv.o -C drm_etnaviv_gem_submit struct drm_etnaviv_gem_submit { __u32 fence; /* 0 4 */ __u32 pipe; /* 4 4 */ __u32 exec_state; /* 8 4 */ __u32 nr_bos; /* 12 4 */ __u32 nr_relocs; /* 16 4 */ __u32 stream_size; /* 20 4 */ __u64 bos; /* 24 8 */ __u64 relocs; /* 32 8 */ __u64 stream; /* 40 8 */ __u32 flags; /* 48 4 */ __s32 fence_fd; /* 52 4 */ __u64 pmrs; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ __u32 nr_pmrs; /* 64 4 */ __u32 pad; /* 68 4 */ /* size: 72, cachelines: 2, members: 14 */ /* last cacheline: 8 bytes */ }; >> }; >> >> /* The normal way to synchronize with the GPU is just to CPU_PREP on greets -- Christian Gmeiner, MSc https://www.youtube.com/user/AloryOFFICIAL https://soundcloud.com/christian-gmeiner
Am Sonntag, den 02.07.2017, 12:04 +0200 schrieb Christian Gmeiner: > 2017-06-26 14:56 GMT+02:00 Lucas Stach <l.stach@pengutronix.de>: > > Am Freitag, den 09.06.2017, 12:21 +0200 schrieb Christian Gmeiner: > >> Sadly we can not read any registers via command stream so we need > >> to extend the drm_etnaviv_gem_submit struct with performance monitor > >> requests. Those requests gets process before and/or after the actual > >> submitted command stream. > >> > >> The Vivante kernel driver has a special ioctl to read all perfmon > >> registers at once and return it. There is no connection between > >> a specifc command stream and the read perf values. > >> > >> Signed-off-by: Christian Gmeiner <christian.gmeiner@gmail.com> > >> --- > >> include/uapi/drm/etnaviv_drm.h | 16 ++++++++++++++++ > >> 1 file changed, 16 insertions(+) > >> > >> diff --git a/include/uapi/drm/etnaviv_drm.h > >> b/include/uapi/drm/etnaviv_drm.h > >> index 9e488a0..b67a7dd 100644 > >> --- a/include/uapi/drm/etnaviv_drm.h > >> +++ b/include/uapi/drm/etnaviv_drm.h > >> @@ -150,6 +150,19 @@ struct drm_etnaviv_gem_submit_bo { > >> __u64 presumed; /* in/out, presumed buffer address */ > >> }; > >> > >> +/* performance monitor request (pmr) */ > >> +#define ETNA_PM_PROCESS_PRE 0x0001 > >> +#define ETNA_PM_PROCESS_POST 0x0002 > >> +struct drm_etnaviv_gem_submit_pmr { > >> + __u32 flags; /* in, when to process request > >> (ETNA_PM_PROCESS_x) */ > >> + __u8 domain; /* in, pm domain */ > >> + __u8 signal; /* in, pm signal */ > > > > Are we sure that no domain will ever have more than 256 signals? > > > > My crystal ball is not working as expected so I don't know - but I will > change it to a __u16. > > >> + __u16 pad[3]; > > > > Unnecessary large pad. A single u16 is enough to naturally align the > > next member. > > > > Correct. > > >> + __u32 sequence; /* in, sequence number */ > >> + __u32 read_offset; /* in, offset from read_bo */ > >> + __u32 read_idx; /* in, index of read_bo buffer */ > >> +}; > >> + > >> /* Each cmdstream submit consists of a table of buffers involved, > >> and > >> * one or more cmdstream buffers. This allows for conditional > >> execution > >> * (context-restore), and IB buffers needed for per tile/bin draw > >> cmds. > >> @@ -175,6 +188,9 @@ struct drm_etnaviv_gem_submit { > >> __u64 stream; /* in, ptr to cmdstream */ > >> __u32 flags; /* in, mask of ETNA_SUBMIT_x */ > >> __s32 fence_fd; /* in/out, fence fd (see > >> ETNA_SUBMIT_FENCE_FD_x) */ > >> + __u64 pmrs; /* in, ptr to array of submit_pmr's */ > >> + __u32 nr_pmrs; /* in, number of submit_pmr's */ > >> + __u32 pad; > > > > This pad isn't needed. > > > > * Pad the entire struct to a multiple of 64-bits if the structure contains > 64-bit types - the structure size will otherwise differ on 32-bit versus > 64-bit. Having a different structure size hurts when passing arrays of > structures to the kernel, or if the kernel checks the structure size, which > e.g. the drm core does. Okay it is still not really necessary (we don't ever have pass an array of struct drm_etnaviv_gem_submit and the worst that could happen is that the DRM core needs to zero the added struct padding on 64bit), but it also doesn't hurt and skipping the zero fill on 64bit sounds good, so please keep as-is. Regards, Lucas
diff --git a/include/uapi/drm/etnaviv_drm.h b/include/uapi/drm/etnaviv_drm.h index 9e488a0..b67a7dd 100644 --- a/include/uapi/drm/etnaviv_drm.h +++ b/include/uapi/drm/etnaviv_drm.h @@ -150,6 +150,19 @@ struct drm_etnaviv_gem_submit_bo { __u64 presumed; /* in/out, presumed buffer address */ }; +/* performance monitor request (pmr) */ +#define ETNA_PM_PROCESS_PRE 0x0001 +#define ETNA_PM_PROCESS_POST 0x0002 +struct drm_etnaviv_gem_submit_pmr { + __u32 flags; /* in, when to process request (ETNA_PM_PROCESS_x) */ + __u8 domain; /* in, pm domain */ + __u8 signal; /* in, pm signal */ + __u16 pad[3]; + __u32 sequence; /* in, sequence number */ + __u32 read_offset; /* in, offset from read_bo */ + __u32 read_idx; /* in, index of read_bo buffer */ +}; + /* Each cmdstream submit consists of a table of buffers involved, and * one or more cmdstream buffers. This allows for conditional execution * (context-restore), and IB buffers needed for per tile/bin draw cmds. @@ -175,6 +188,9 @@ struct drm_etnaviv_gem_submit { __u64 stream; /* in, ptr to cmdstream */ __u32 flags; /* in, mask of ETNA_SUBMIT_x */ __s32 fence_fd; /* in/out, fence fd (see ETNA_SUBMIT_FENCE_FD_x) */ + __u64 pmrs; /* in, ptr to array of submit_pmr's */ + __u32 nr_pmrs; /* in, number of submit_pmr's */ + __u32 pad; }; /* The normal way to synchronize with the GPU is just to CPU_PREP on
Sadly we can not read any registers via command stream so we need to extend the drm_etnaviv_gem_submit struct with performance monitor requests. Those requests gets process before and/or after the actual submitted command stream. The Vivante kernel driver has a special ioctl to read all perfmon registers at once and return it. There is no connection between a specifc command stream and the read perf values. Signed-off-by: Christian Gmeiner <christian.gmeiner@gmail.com> --- include/uapi/drm/etnaviv_drm.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+)