Message ID | 20181120185814.13362-5-Kenny.Ho@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | DRM cgroup controller | expand |
Kenny Ho <Kenny.Ho@amd.com> writes: > Account for the number of command submitted to amdgpu by type on a per > cgroup basis, for the purpose of profiling/monitoring applications. For profiling other drivers, I've used perf tracepoints, which let you get useful timelines of multiple events in the driver. Have you made use of this stat for productive profiling?
Am 20.11.18 um 19:58 schrieb Kenny Ho: > Account for the number of command submitted to amdgpu by type on a per > cgroup basis, for the purpose of profiling/monitoring applications. > > x prefix in the control file name x.cmd_submitted.amd.stat signify > experimental. > > Change-Id: Ibc22e5bda600f54fe820fe0af5400ca348691550 > Signed-off-by: Kenny Ho <Kenny.Ho@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 5 ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.c | 54 +++++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.h | 5 ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 15 ++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 +- > 5 files changed, 83 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > index 663043c8f0f5..b448160aed89 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > @@ -33,6 +33,7 @@ > #include "amdgpu_trace.h" > #include "amdgpu_gmc.h" > #include "amdgpu_gem.h" > +#include "amdgpu_drmcgrp.h" > > static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, > struct drm_amdgpu_cs_chunk_fence *data, > @@ -1275,6 +1276,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) > union drm_amdgpu_cs *cs = data; > struct amdgpu_cs_parser parser = {}; > bool reserved_buffers = false; > + struct amdgpu_ring *ring; > int i, r; > > if (!adev->accel_working) > @@ -1317,6 +1319,9 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) > if (r) > goto out; > > + ring = to_amdgpu_ring(parser.entity->rq->sched); > + amdgpu_drmcgrp_count_cs(current, dev, ring->funcs->type); > + > r = amdgpu_cs_submit(&parser, cs); > > out: > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.c > index ed8aac17769c..853b77532428 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.c > @@ -1,11 +1,65 @@ > // SPDX-License-Identifier: MIT > // Copyright 2018 Advanced Micro Devices, Inc. > #include <linux/slab.h> > +#include <linux/mutex.h> > #include <linux/cgroup_drm.h> > #include <drm/drm_device.h> > +#include "amdgpu_ring.h" > #include "amdgpu_drmcgrp.h" > > +void amdgpu_drmcgrp_count_cs(struct task_struct *task, struct drm_device *dev, > + enum amdgpu_ring_type r_type) > +{ > + struct drmcgrp *drmcgrp = get_drmcgrp(task); > + struct drmcgrp_device_resource *ddr; > + struct drmcgrp *p; > + struct amd_drmcgrp_dev_resource *a_ddr; > + > + if (drmcgrp == NULL) > + return; > + > + ddr = drmcgrp->dev_resources[dev->primary->index]; > + > + mutex_lock(&ddr->ddev->mutex); > + for (p = drmcgrp; p != NULL; p = parent_drmcgrp(drmcgrp)) { > + a_ddr = ddr_amdddr(p->dev_resources[dev->primary->index]); > + > + a_ddr->cs_count[r_type]++; > + } > + mutex_unlock(&ddr->ddev->mutex); > +} > + > +int amd_drmcgrp_cmd_submit_accounting_read(struct seq_file *sf, void *v) > +{ > + struct drmcgrp *drmcgrp = css_drmcgrp(seq_css(sf)); > + struct drmcgrp_device_resource *ddr = NULL; > + struct amd_drmcgrp_dev_resource *a_ddr = NULL; > + int i, j; > + > + seq_puts(sf, "---\n"); > + for (i = 0; i < MAX_DRM_DEV; i++) { > + ddr = drmcgrp->dev_resources[i]; > + > + if (ddr == NULL || ddr->ddev->vid != amd_drmcgrp_vendor_id) > + continue; > + > + a_ddr = ddr_amdddr(ddr); > + > + seq_printf(sf, "card%d:\n", i); > + for (j = 0; j < __MAX_AMDGPU_RING_TYPE; j++) > + seq_printf(sf, " %s: %llu\n", amdgpu_ring_names[j], a_ddr->cs_count[j]); > + } > + > + return 0; > +} > + > + > struct cftype files[] = { > + { > + .name = "x.cmd_submitted.amd.stat", > + .seq_show = amd_drmcgrp_cmd_submit_accounting_read, > + .flags = CFTYPE_NOT_ON_ROOT, > + }, > { } /* terminate */ > }; > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.h > index e2934b7a49f5..f894a9a1059f 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.h > @@ -5,12 +5,17 @@ > #define _AMDGPU_DRMCGRP_H > > #include <linux/cgroup_drm.h> > +#include "amdgpu_ring.h" > > /* for AMD specific DRM resources */ > struct amd_drmcgrp_dev_resource { > struct drmcgrp_device_resource ddr; > + u64 cs_count[__MAX_AMDGPU_RING_TYPE]; > }; > > +void amdgpu_drmcgrp_count_cs(struct task_struct *task, struct drm_device *dev, > + enum amdgpu_ring_type r_type); > + > static inline struct amd_drmcgrp_dev_resource *ddr_amdddr(struct drmcgrp_device_resource *ddr) > { > return ddr ? container_of(ddr, struct amd_drmcgrp_dev_resource, ddr) : NULL; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c > index b70e85ec147d..1606f84d2334 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c > @@ -34,6 +34,21 @@ > #include "amdgpu.h" > #include "atom.h" > > +char const *amdgpu_ring_names[] = { > + [AMDGPU_RING_TYPE_GFX] = "gfx", > + [AMDGPU_RING_TYPE_COMPUTE] = "compute", > + [AMDGPU_RING_TYPE_SDMA] = "sdma", > + [AMDGPU_RING_TYPE_UVD] = "uvd", > + [AMDGPU_RING_TYPE_VCE] = "vce", > + [AMDGPU_RING_TYPE_KIQ] = "kiq", > + [AMDGPU_RING_TYPE_UVD_ENC] = "uvd_enc", > + [AMDGPU_RING_TYPE_VCN_DEC] = "vcn_dec", > + [AMDGPU_RING_TYPE_VCN_ENC] = "vcn_enc", > + [AMDGPU_RING_TYPE_VCN_JPEG] = "vcn_jpeg", > + [__MAX_AMDGPU_RING_TYPE] = "_max" > + > +}; > + Each ring already has a dedicated name, so that looks like a duplication to me. What could be is that we need this for the ring type name, but then you need to rename it. And please don't use something like "__MAX_AMDGPU....", just name that AMDGPU_RING_TYPE_LAST. Christian. > /* > * Rings > * Most engines on the GPU are fed via ring buffers. Ring > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > index 4caa301ce454..e292b7e89c6a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > @@ -56,9 +56,12 @@ enum amdgpu_ring_type { > AMDGPU_RING_TYPE_UVD_ENC, > AMDGPU_RING_TYPE_VCN_DEC, > AMDGPU_RING_TYPE_VCN_ENC, > - AMDGPU_RING_TYPE_VCN_JPEG > + AMDGPU_RING_TYPE_VCN_JPEG, > + __MAX_AMDGPU_RING_TYPE > }; > > +extern char const *amdgpu_ring_names[]; > + > struct amdgpu_device; > struct amdgpu_ring; > struct amdgpu_ib;
Am 20.11.18 um 21:57 schrieb Eric Anholt: > Kenny Ho <Kenny.Ho@amd.com> writes: > >> Account for the number of command submitted to amdgpu by type on a per >> cgroup basis, for the purpose of profiling/monitoring applications. > For profiling other drivers, I've used perf tracepoints, which let you > get useful timelines of multiple events in the driver. Have you made > use of this stat for productive profiling? Yes, but this is not related to profiling at all. What we want to do is to limit the resource usage of processes. Regards, Christian. > > _______________________________________________ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx <html> <head> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> </head> <body text="#000000" bgcolor="#FFFFFF"> <div class="moz-cite-prefix">Am 20.11.18 um 21:57 schrieb Eric Anholt:<br> </div> <blockquote type="cite" cite="mid:87r2ff79he.fsf@anholt.net"> <pre class="moz-quote-pre" wrap="">Kenny Ho <a class="moz-txt-link-rfc2396E" href="mailto:Kenny.Ho@amd.com"><Kenny.Ho@amd.com></a> writes: </pre> <blockquote type="cite"> <pre class="moz-quote-pre" wrap="">Account for the number of command submitted to amdgpu by type on a per cgroup basis, for the purpose of profiling/monitoring applications. </pre> </blockquote> <pre class="moz-quote-pre" wrap=""> For profiling other drivers, I've used perf tracepoints, which let you get useful timelines of multiple events in the driver. Have you made use of this stat for productive profiling? </pre> </blockquote> <br> Yes, but this is not related to profiling at all.<br> <br> What we want to do is to limit the resource usage of processes.<br> <br> Regards,<br> Christian.<br> <br> <blockquote type="cite" cite="mid:87r2ff79he.fsf@anholt.net"><br> <fieldset class="mimeAttachmentHeader"></fieldset> <pre class="moz-quote-pre" wrap="">_______________________________________________ amd-gfx mailing list <a class="moz-txt-link-abbreviated" href="mailto:amd-gfx@lists.freedesktop.org">amd-gfx@lists.freedesktop.org</a> <a class="moz-txt-link-freetext" href="https://lists.freedesktop.org/mailman/listinfo/amd-gfx">https://lists.freedesktop.org/mailman/listinfo/amd-gfx</a> </pre> </blockquote> <br> </body> </html>
Christian König <ckoenig.leichtzumerken@gmail.com> writes: > Am 20.11.18 um 21:57 schrieb Eric Anholt: >> Kenny Ho <Kenny.Ho@amd.com> writes: >> >>> Account for the number of command submitted to amdgpu by type on a per >>> cgroup basis, for the purpose of profiling/monitoring applications. >> For profiling other drivers, I've used perf tracepoints, which let you >> get useful timelines of multiple events in the driver. Have you made >> use of this stat for productive profiling? > > Yes, but this is not related to profiling at all. > > What we want to do is to limit the resource usage of processes. That sounds great, and something I'd be interested in for vc4. However, as far as I saw explained here, this patch doesn't let you limit resource usage of a process and is only useful for "profiling/monitoring" so I'm wondering how it is useful for that purpose.
Am 23.11.18 um 18:36 schrieb Eric Anholt: > Christian König <ckoenig.leichtzumerken@gmail.com> writes: > >> Am 20.11.18 um 21:57 schrieb Eric Anholt: >>> Kenny Ho <Kenny.Ho@amd.com> writes: >>> >>>> Account for the number of command submitted to amdgpu by type on a per >>>> cgroup basis, for the purpose of profiling/monitoring applications. >>> For profiling other drivers, I've used perf tracepoints, which let you >>> get useful timelines of multiple events in the driver. Have you made >>> use of this stat for productive profiling? >> Yes, but this is not related to profiling at all. >> >> What we want to do is to limit the resource usage of processes. > That sounds great, and something I'd be interested in for vc4. However, > as far as I saw explained here, this patch doesn't let you limit > resource usage of a process and is only useful for > "profiling/monitoring" so I'm wondering how it is useful for that > purpose. Ok, good to know. I haven't looked at this in deep, but if this is just for accounting that would certainly be missing the goal. Christian.
On Fri, Nov 23, 2018 at 1:13 PM Koenig, Christian <Christian.Koenig@amd.com> wrote: > Am 23.11.18 um 18:36 schrieb Eric Anholt: > > Christian König <ckoenig.leichtzumerken@gmail.com> writes: > >> Am 20.11.18 um 21:57 schrieb Eric Anholt: > >>> Kenny Ho <Kenny.Ho@amd.com> writes: > >>>> Account for the number of command submitted to amdgpu by type on a per > >>>> cgroup basis, for the purpose of profiling/monitoring applications. > >>> For profiling other drivers, I've used perf tracepoints, which let you > >>> get useful timelines of multiple events in the driver. Have you made > >>> use of this stat for productive profiling? > >> Yes, but this is not related to profiling at all. > >> > >> What we want to do is to limit the resource usage of processes. > > That sounds great, and something I'd be interested in for vc4. However, > > as far as I saw explained here, this patch doesn't let you limit > > resource usage of a process and is only useful for > > "profiling/monitoring" so I'm wondering how it is useful for that > > purpose. > > Ok, good to know. I haven't looked at this in deep, but if this is just > for accounting that would certainly be missing the goal. The end goal is to have limit in place. The current patch is mostly to illustrate the structure of the controller and get some early feedback. I will have more soon. Regards, Kenny
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 663043c8f0f5..b448160aed89 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -33,6 +33,7 @@ #include "amdgpu_trace.h" #include "amdgpu_gmc.h" #include "amdgpu_gem.h" +#include "amdgpu_drmcgrp.h" static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, struct drm_amdgpu_cs_chunk_fence *data, @@ -1275,6 +1276,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) union drm_amdgpu_cs *cs = data; struct amdgpu_cs_parser parser = {}; bool reserved_buffers = false; + struct amdgpu_ring *ring; int i, r; if (!adev->accel_working) @@ -1317,6 +1319,9 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (r) goto out; + ring = to_amdgpu_ring(parser.entity->rq->sched); + amdgpu_drmcgrp_count_cs(current, dev, ring->funcs->type); + r = amdgpu_cs_submit(&parser, cs); out: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.c index ed8aac17769c..853b77532428 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.c @@ -1,11 +1,65 @@ // SPDX-License-Identifier: MIT // Copyright 2018 Advanced Micro Devices, Inc. #include <linux/slab.h> +#include <linux/mutex.h> #include <linux/cgroup_drm.h> #include <drm/drm_device.h> +#include "amdgpu_ring.h" #include "amdgpu_drmcgrp.h" +void amdgpu_drmcgrp_count_cs(struct task_struct *task, struct drm_device *dev, + enum amdgpu_ring_type r_type) +{ + struct drmcgrp *drmcgrp = get_drmcgrp(task); + struct drmcgrp_device_resource *ddr; + struct drmcgrp *p; + struct amd_drmcgrp_dev_resource *a_ddr; + + if (drmcgrp == NULL) + return; + + ddr = drmcgrp->dev_resources[dev->primary->index]; + + mutex_lock(&ddr->ddev->mutex); + for (p = drmcgrp; p != NULL; p = parent_drmcgrp(drmcgrp)) { + a_ddr = ddr_amdddr(p->dev_resources[dev->primary->index]); + + a_ddr->cs_count[r_type]++; + } + mutex_unlock(&ddr->ddev->mutex); +} + +int amd_drmcgrp_cmd_submit_accounting_read(struct seq_file *sf, void *v) +{ + struct drmcgrp *drmcgrp = css_drmcgrp(seq_css(sf)); + struct drmcgrp_device_resource *ddr = NULL; + struct amd_drmcgrp_dev_resource *a_ddr = NULL; + int i, j; + + seq_puts(sf, "---\n"); + for (i = 0; i < MAX_DRM_DEV; i++) { + ddr = drmcgrp->dev_resources[i]; + + if (ddr == NULL || ddr->ddev->vid != amd_drmcgrp_vendor_id) + continue; + + a_ddr = ddr_amdddr(ddr); + + seq_printf(sf, "card%d:\n", i); + for (j = 0; j < __MAX_AMDGPU_RING_TYPE; j++) + seq_printf(sf, " %s: %llu\n", amdgpu_ring_names[j], a_ddr->cs_count[j]); + } + + return 0; +} + + struct cftype files[] = { + { + .name = "x.cmd_submitted.amd.stat", + .seq_show = amd_drmcgrp_cmd_submit_accounting_read, + .flags = CFTYPE_NOT_ON_ROOT, + }, { } /* terminate */ }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.h index e2934b7a49f5..f894a9a1059f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.h @@ -5,12 +5,17 @@ #define _AMDGPU_DRMCGRP_H #include <linux/cgroup_drm.h> +#include "amdgpu_ring.h" /* for AMD specific DRM resources */ struct amd_drmcgrp_dev_resource { struct drmcgrp_device_resource ddr; + u64 cs_count[__MAX_AMDGPU_RING_TYPE]; }; +void amdgpu_drmcgrp_count_cs(struct task_struct *task, struct drm_device *dev, + enum amdgpu_ring_type r_type); + static inline struct amd_drmcgrp_dev_resource *ddr_amdddr(struct drmcgrp_device_resource *ddr) { return ddr ? container_of(ddr, struct amd_drmcgrp_dev_resource, ddr) : NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index b70e85ec147d..1606f84d2334 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -34,6 +34,21 @@ #include "amdgpu.h" #include "atom.h" +char const *amdgpu_ring_names[] = { + [AMDGPU_RING_TYPE_GFX] = "gfx", + [AMDGPU_RING_TYPE_COMPUTE] = "compute", + [AMDGPU_RING_TYPE_SDMA] = "sdma", + [AMDGPU_RING_TYPE_UVD] = "uvd", + [AMDGPU_RING_TYPE_VCE] = "vce", + [AMDGPU_RING_TYPE_KIQ] = "kiq", + [AMDGPU_RING_TYPE_UVD_ENC] = "uvd_enc", + [AMDGPU_RING_TYPE_VCN_DEC] = "vcn_dec", + [AMDGPU_RING_TYPE_VCN_ENC] = "vcn_enc", + [AMDGPU_RING_TYPE_VCN_JPEG] = "vcn_jpeg", + [__MAX_AMDGPU_RING_TYPE] = "_max" + +}; + /* * Rings * Most engines on the GPU are fed via ring buffers. Ring diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 4caa301ce454..e292b7e89c6a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -56,9 +56,12 @@ enum amdgpu_ring_type { AMDGPU_RING_TYPE_UVD_ENC, AMDGPU_RING_TYPE_VCN_DEC, AMDGPU_RING_TYPE_VCN_ENC, - AMDGPU_RING_TYPE_VCN_JPEG + AMDGPU_RING_TYPE_VCN_JPEG, + __MAX_AMDGPU_RING_TYPE }; +extern char const *amdgpu_ring_names[]; + struct amdgpu_device; struct amdgpu_ring; struct amdgpu_ib;
Account for the number of command submitted to amdgpu by type on a per cgroup basis, for the purpose of profiling/monitoring applications. x prefix in the control file name x.cmd_submitted.amd.stat signify experimental. Change-Id: Ibc22e5bda600f54fe820fe0af5400ca348691550 Signed-off-by: Kenny Ho <Kenny.Ho@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 5 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.c | 54 +++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_drmcgrp.h | 5 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 15 ++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 +- 5 files changed, 83 insertions(+), 1 deletion(-)