diff mbox series

[v3] drm: Funnel drm logs to tracepoints

Message ID 20191212203301.142437-1-sean@poorly.run (mailing list archive)
State New, archived
Headers show
Series [v3] drm: Funnel drm logs to tracepoints | expand

Commit Message

Sean Paul Dec. 12, 2019, 8:32 p.m. UTC
From: Sean Paul <seanpaul@chromium.org>

For a long while now, we (ChromeOS) have been struggling getting any
value out of user feedback reports of display failures (notably external
displays not working). The problem is that all logging, even fatal
errors (well, fatal in the sense that a display won't light up) are
logged at DEBUG log level. So in order to extract these logs, users need
to be able to turn on logging, and reproduce the issue with debug
enabled. Unfortunately, this isn't really something we can ask CrOS users
to do. I spoke with airlied about this and RHEL has similar issues. After
a few more people piped up on previous versions of this patch, it is a
Real Issue.

So why don't we just enable DRM_UT_BLAH? Here are the reasons in
ascending order of severity:
 1- People aren't consistent with their categories, so we'd have to
    enable a bunch to get proper coverage
 2- We don't want to overwhelm syslog with drm spam, others have to use
    it too
 3- Console logging is slow

So what we really want is a ringbuffer of the most recent logs
(filtered by categories we're interested in) exposed via debugfs so the
logs can be extracted when users file feedback.

It just so happens that there is something which does _exactly_ this!
This patch dumps drm logs into tracepoints, which allows us to turn tracing
on and off depending on which category is useful, and pull them from
tracefs on demand.

What about trace_printk()? It doesn't give us the control we get from using
tracepoints and it's not meant to be left sprinkled around in code.

Cc: David Airlie <airlied@gmail.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Pekka Paalanen <ppaalanen@gmail.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Rob Clark <robdclark@gmail.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20191010204823.195540-1-sean@poorly.run #v1

Changes in v2:
- Went with a completely different approach: https://lists.freedesktop.org/archives/dri-devel/2019-November/243230.html

Changes in v3:
- Changed commit message to be a bit less RFC-y
- Make class_drm_category_log an actual trace class
---

Even though we don't want it to be, this is UAPI. So here's some userspace
code which uses it:
https://chromium-review.googlesource.com/c/chromiumos/platform2/+/1965562


 drivers/gpu/drm/drm_print.c      | 143 ++++++++++++++++++++++++++-----
 include/trace/events/drm_print.h | 116 +++++++++++++++++++++++++
 2 files changed, 239 insertions(+), 20 deletions(-)
 create mode 100644 include/trace/events/drm_print.h

Comments

Steven Rostedt Dec. 13, 2019, 3:20 a.m. UTC | #1
On Thu, 12 Dec 2019 15:32:35 -0500
Sean Paul <sean@poorly.run> wrote:

> 
> What about trace_printk()? It doesn't give us the control we get from using
> tracepoints and it's not meant to be left sprinkled around in code.

Not to mention that trace_printk() is not for production use (only for
developers debugging purposes).

> 
> Cc: David Airlie <airlied@gmail.com>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Pekka Paalanen <ppaalanen@gmail.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Thomas Zimmermann <tzimmermann@suse.de>
> Cc: Rob Clark <robdclark@gmail.com>
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> Signed-off-by: Sean Paul <seanpaul@chromium.org>
> Link: https://patchwork.freedesktop.org/patch/msgid/20191010204823.195540-1-sean@poorly.run #v1
> 
> Changes in v2:
> - Went with a completely different approach: https://lists.freedesktop.org/archives/dri-devel/2019-November/243230.html
> 
> Changes in v3:
> - Changed commit message to be a bit less RFC-y
> - Make class_drm_category_log an actual trace class
> ---
> 
> Even though we don't want it to be, this is UAPI. So here's some userspace
> code which uses it:
> https://chromium-review.googlesource.com/c/chromiumos/platform2/+/1965562
> 
> 
>  drivers/gpu/drm/drm_print.c      | 143 ++++++++++++++++++++++++++-----
>  include/trace/events/drm_print.h | 116 +++++++++++++++++++++++++
>  2 files changed, 239 insertions(+), 20 deletions(-)
>  create mode 100644 include/trace/events/drm_print.h
> 
> diff --git a/drivers/gpu/drm/drm_print.c b/drivers/gpu/drm/drm_print.c
> index 9a25d73c155c..f591292811aa 100644
> --- a/drivers/gpu/drm/drm_print.c
> +++ b/drivers/gpu/drm/drm_print.c
> @@ -27,11 +27,15 @@
>  
>  #include <stdarg.h>
>  
> +#include <linux/bitops.h>
>  #include <linux/io.h>
>  #include <linux/moduleparam.h>
>  #include <linux/seq_file.h>
>  #include <linux/slab.h>
>  
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/drm_print.h>
> +
>  #include <drm/drm.h>
>  #include <drm/drm_drv.h>
>  #include <drm/drm_print.h>
> @@ -241,10 +245,10 @@ void drm_dev_printk(const struct device *dev, const char *level,
>  	struct va_format vaf;
>  	va_list args;
>  
> -	va_start(args, format);
>  	vaf.fmt = format;
>  	vaf.va = &args;
>  
> +	va_start(args, format);
>  	if (dev)
>  		dev_printk(level, dev, "[" DRM_NAME ":%ps] %pV",
>  			   __builtin_return_address(0), &vaf);
> @@ -253,49 +257,145 @@ void drm_dev_printk(const struct device *dev, const char *level,
>  		       level, __builtin_return_address(0), &vaf);
>  
>  	va_end(args);
> +
> +	va_start(args, format);
> +	trace_drm_log(level, dev, &vaf);
> +	va_end(args);
>  }
>  EXPORT_SYMBOL(drm_dev_printk);
>  
> +static unsigned int drm_trace_enabled(unsigned int category)
> +{
> +	unsigned int bit;
> +
> +	for_each_set_bit(bit, (unsigned long*)&category, sizeof(category) * 8) {
> +		switch (BIT(bit)) {
> +		case DRM_UT_NONE:
> +			return trace_drm_dbg_none_enabled();
> +		case DRM_UT_CORE:
> +			return trace_drm_dbg_core_enabled();
> +		case DRM_UT_DRIVER:
> +			return trace_drm_dbg_driver_enabled();
> +		case DRM_UT_KMS:
> +			return trace_drm_dbg_kms_enabled();
> +		case DRM_UT_PRIME:
> +			return trace_drm_dbg_prime_enabled();
> +		case DRM_UT_ATOMIC:
> +			return trace_drm_dbg_atomic_enabled();
> +		case DRM_UT_VBL:
> +			return trace_drm_dbg_vbl_enabled();
> +		case DRM_UT_STATE:
> +			return trace_drm_dbg_state_enabled();
> +		case DRM_UT_LEASE:
> +			return trace_drm_dbg_lease_enabled();
> +		case DRM_UT_DP:
> +			return trace_drm_dbg_dp_enabled();
> +		default:
> +			return trace_drm_dbg_unknown_enabled();
> +		}

Why this double loop?

> +	}
> +	return false;
> +}
> +
> +static void drm_do_trace(const struct device *dev, unsigned int category,
> +			 struct va_format *vaf)
> +{
> +	WARN_ON(hweight32(category) > 1);
> +
> +	switch (category) {
> +	case DRM_UT_NONE:
> +		trace_drm_dbg_none(dev, vaf);
> +		break;
> +	case DRM_UT_CORE:
> +		trace_drm_dbg_core(dev, vaf);
> +		break;
> +	case DRM_UT_DRIVER:
> +		trace_drm_dbg_driver(dev, vaf);
> +		break;
> +	case DRM_UT_KMS:
> +		trace_drm_dbg_kms(dev, vaf);
> +		break;
> +	case DRM_UT_PRIME:
> +		trace_drm_dbg_prime(dev, vaf);
> +		break;
> +	case DRM_UT_ATOMIC:
> +		trace_drm_dbg_atomic(dev, vaf);
> +		break;
> +	case DRM_UT_VBL:
> +		trace_drm_dbg_vbl(dev, vaf);
> +		break;
> +	case DRM_UT_STATE:
> +		trace_drm_dbg_state(dev, vaf);
> +		break;
> +	case DRM_UT_LEASE:
> +		trace_drm_dbg_lease(dev, vaf);
> +		break;
> +	case DRM_UT_DP:
> +		trace_drm_dbg_dp(dev, vaf);
> +		break;
> +	default:
> +		trace_drm_dbg_unknown(dev, vaf);
> +		break;

Why is there a separate trace event for each of these?

> +	}
> +}
> +
>  void drm_dev_dbg(const struct device *dev, unsigned int category,
>  		 const char *format, ...)
>  {
>  	struct va_format vaf;
> +	unsigned int bit;
>  	va_list args;
>  
> -	if (!drm_debug_enabled(category))
> -		return;
> -
> -	va_start(args, format);
>  	vaf.fmt = format;
>  	vaf.va = &args;
>  
> -	if (dev)
> -		dev_printk(KERN_DEBUG, dev, "[" DRM_NAME ":%ps] %pV",
> -			   __builtin_return_address(0), &vaf);
> -	else
> -		printk(KERN_DEBUG "[" DRM_NAME ":%ps] %pV",
> -		       __builtin_return_address(0), &vaf);
> +	if (drm_debug_enabled(category)) {
> +		va_start(args, format);
> +		if (dev)
> +			dev_printk(KERN_DEBUG, dev, "[" DRM_NAME ":%ps] %pV",
> +				   __builtin_return_address(0), &vaf);
> +		else
> +			printk(KERN_DEBUG "[" DRM_NAME ":%ps] %pV",
> +			       __builtin_return_address(0), &vaf);
> +		va_end(args);
> +	}
> +
> +	if (!drm_trace_enabled(category))
> +		return;
> +
> +	for_each_set_bit(bit, (unsigned long*)&category, sizeof(category) * 8) {
> +		va_start(args, format);
> +		drm_do_trace(dev, BIT(bit), &vaf);
> +		va_end(args);

I'm thinking we could find a way to move the logic here to have a
single loop.

Note, the current approach is racy. The state can change between the
"drm_trace_enabled()" and this for loop.

-- Steve


> +	}
>  
> -	va_end(args);
>  }
>  EXPORT_SYMBOL(drm_dev_dbg);
>  
>  void drm_dbg(unsigned int category, const char *format, ...)
>  {
>  	struct va_format vaf;
> +	unsigned int bit;
>  	va_list args;
>  
> -	if (!drm_debug_enabled(category))
> -		return;
> -
> -	va_start(args, format);
>  	vaf.fmt = format;
>  	vaf.va = &args;
>  
> -	printk(KERN_DEBUG "[" DRM_NAME ":%ps] %pV",
> -	       __builtin_return_address(0), &vaf);
> +	if (drm_debug_enabled(category)) {
> +		va_start(args, format);
> +		printk(KERN_DEBUG "[" DRM_NAME ":%ps] %pV",
> +		       __builtin_return_address(0), &vaf);
> +		va_end(args);
> +	}
>  
> -	va_end(args);
> +	if (!drm_trace_enabled(category))
> +		return;
> +
> +	for_each_set_bit(bit, (unsigned long*)&category, sizeof(category) * 8) {
> +		va_start(args, format);
> +		drm_do_trace(NULL, BIT(bit), &vaf);
> +		va_end(args);
> +	}
>  }
>  EXPORT_SYMBOL(drm_dbg);
>  
> @@ -304,13 +404,16 @@ void drm_err(const char *format, ...)
>  	struct va_format vaf;
>  	va_list args;
>  
> -	va_start(args, format);
>  	vaf.fmt = format;
>  	vaf.va = &args;
>  
> +	va_start(args, format);
>  	printk(KERN_ERR "[" DRM_NAME ":%ps] *ERROR* %pV",
>  	       __builtin_return_address(0), &vaf);
> +	va_end(args);
>  
> +	va_start(args, format);
> +	trace_drm_err(NULL, &vaf);
>  	va_end(args);
>  }
>  EXPORT_SYMBOL(drm_err);
> diff --git a/include/trace/events/drm_print.h b/include/trace/events/drm_print.h
> new file mode 100644
> index 000000000000..862728fe0f89
> --- /dev/null
> +++ b/include/trace/events/drm_print.h
> @@ -0,0 +1,116 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright (C) 2019 Google, Inc.
> + *
> + * Authors:
> + * Sean Paul <seanpaul@chromium.org>
> + */
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM drm_print
> +
> +#if !defined(_TRACE_DRM_PRINT_H) || defined(TRACE_HEADER_MULTI_READ)
> +
> +#include <linux/device.h>
> +#include <linux/tracepoint.h>
> +
> +#define DRM_PRINT_MAX 256
> +
> +#define _TRACE_DRM_PRINT_H
> +
> +TRACE_EVENT(drm_log,
> +	TP_PROTO(const char * level, const struct device *dev,
> +		 struct va_format *vaf),
> +	TP_ARGS(level, dev, vaf),
> +	TP_STRUCT__entry(
> +		__field(const char *,		level			)
> +		__field(const struct device *,	dev			)
> +		__dynamic_array(char,		msg,	DRM_PRINT_MAX	)
> +		__field(int,			len			)
> +	),
> +	TP_fast_assign(
> +		__entry->level = level;
> +		__entry->dev = dev;
> +		__entry->len = vsnprintf(__get_str(msg), DRM_PRINT_MAX,
> +					 vaf->fmt, *vaf->va);
> +		if (__entry->len > 0 &&
> +		    __get_str(msg)[__entry->len - 1] == '\n')
> +			__get_str(msg)[--__entry->len] = '\0';
> +	),
> +	TP_printk("%s %s %s: %s", __entry->level,
> +		  __entry->dev ? dev_driver_string(__entry->dev) : "(NULL):",
> +		  __entry->dev ? dev_name(__entry->dev) : "", __get_str(msg))
> +);
> +
> +DECLARE_EVENT_CLASS(class_drm_category_log,
> +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> +	TP_ARGS(dev, vaf),
> +	TP_STRUCT__entry(
> +		__field(const struct device *,	dev			)
> +		__dynamic_array(char,		msg,	DRM_PRINT_MAX	)
> +		__field(int,			len			)
> +	),
> +	TP_fast_assign(
> +		__entry->dev = dev;
> +		__entry->len = vsnprintf(__get_str(msg), DRM_PRINT_MAX,
> +					 vaf->fmt, *vaf->va);
> +		if (__entry->len > 0 &&
> +		    __get_str(msg)[__entry->len - 1] == '\n')
> +			__get_str(msg)[--__entry->len] = '\0';
> +	),
> +	TP_printk("%s %s%s%s",
> +		  __entry->dev ? dev_driver_string(__entry->dev) : "",
> +		  __entry->dev ? dev_name(__entry->dev) : "",
> +		  __entry->dev ? ": " : "", __get_str(msg))
> +);
> +DEFINE_EVENT(class_drm_category_log, drm_err,
> +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> +	TP_ARGS(dev, vaf)
> +);
> +DEFINE_EVENT(class_drm_category_log, drm_dbg_none,
> +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> +	TP_ARGS(dev, vaf)
> +);
> +DEFINE_EVENT(class_drm_category_log, drm_dbg_core,
> +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> +	TP_ARGS(dev, vaf)
> +);
> +DEFINE_EVENT(class_drm_category_log, drm_dbg_driver,
> +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> +	TP_ARGS(dev, vaf)
> +);
> +DEFINE_EVENT(class_drm_category_log, drm_dbg_kms,
> +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> +	TP_ARGS(dev, vaf)
> +);
> +DEFINE_EVENT(class_drm_category_log, drm_dbg_prime,
> +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> +	TP_ARGS(dev, vaf)
> +);
> +DEFINE_EVENT(class_drm_category_log, drm_dbg_atomic,
> +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> +	TP_ARGS(dev, vaf)
> +);
> +DEFINE_EVENT(class_drm_category_log, drm_dbg_vbl,
> +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> +	TP_ARGS(dev, vaf)
> +);
> +DEFINE_EVENT(class_drm_category_log, drm_dbg_state,
> +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> +	TP_ARGS(dev, vaf)
> +);
> +DEFINE_EVENT(class_drm_category_log, drm_dbg_lease,
> +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> +	TP_ARGS(dev, vaf)
> +);
> +DEFINE_EVENT(class_drm_category_log, drm_dbg_dp,
> +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> +	TP_ARGS(dev, vaf)
> +);
> +DEFINE_EVENT(class_drm_category_log, drm_dbg_unknown,
> +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> +	TP_ARGS(dev, vaf)
> +);
> +#endif
> +
> +/* This part must be outside protection */
> +#include <trace/define_trace.h>
Pekka Paalanen Dec. 13, 2019, 11:34 a.m. UTC | #2
On Thu, 12 Dec 2019 15:32:35 -0500
Sean Paul <sean@poorly.run> wrote:

> From: Sean Paul <seanpaul@chromium.org>
> 
> For a long while now, we (ChromeOS) have been struggling getting any
> value out of user feedback reports of display failures (notably external
> displays not working). The problem is that all logging, even fatal
> errors (well, fatal in the sense that a display won't light up) are
> logged at DEBUG log level. So in order to extract these logs, users need
> to be able to turn on logging, and reproduce the issue with debug
> enabled. Unfortunately, this isn't really something we can ask CrOS users
> to do. I spoke with airlied about this and RHEL has similar issues. After
> a few more people piped up on previous versions of this patch, it is a
> Real Issue.
> 
> So why don't we just enable DRM_UT_BLAH? Here are the reasons in
> ascending order of severity:
>  1- People aren't consistent with their categories, so we'd have to
>     enable a bunch to get proper coverage
>  2- We don't want to overwhelm syslog with drm spam, others have to use
>     it too
>  3- Console logging is slow
> 
> So what we really want is a ringbuffer of the most recent logs
> (filtered by categories we're interested in) exposed via debugfs so the
> logs can be extracted when users file feedback.
> 
> It just so happens that there is something which does _exactly_ this!
> This patch dumps drm logs into tracepoints, which allows us to turn tracing
> on and off depending on which category is useful, and pull them from
> tracefs on demand.
> 
> What about trace_printk()? It doesn't give us the control we get from using
> tracepoints and it's not meant to be left sprinkled around in code.
> 
> Cc: David Airlie <airlied@gmail.com>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Pekka Paalanen <ppaalanen@gmail.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Thomas Zimmermann <tzimmermann@suse.de>
> Cc: Rob Clark <robdclark@gmail.com>
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> Signed-off-by: Sean Paul <seanpaul@chromium.org>
> Link: https://patchwork.freedesktop.org/patch/msgid/20191010204823.195540-1-sean@poorly.run #v1
> 
> Changes in v2:
> - Went with a completely different approach: https://lists.freedesktop.org/archives/dri-devel/2019-November/243230.html
> 
> Changes in v3:
> - Changed commit message to be a bit less RFC-y
> - Make class_drm_category_log an actual trace class
> ---
> 
> Even though we don't want it to be, this is UAPI. So here's some userspace
> code which uses it:
> https://chromium-review.googlesource.com/c/chromiumos/platform2/+/1965562
> 
> 
>  drivers/gpu/drm/drm_print.c      | 143 ++++++++++++++++++++++++++-----
>  include/trace/events/drm_print.h | 116 +++++++++++++++++++++++++
>  2 files changed, 239 insertions(+), 20 deletions(-)
>  create mode 100644 include/trace/events/drm_print.h

Hi,

reading the userspace patch is very enlightening, thanks.

It uses debugfs, and it uses the generic tracing UAPI. When all
distributions will enable this debug logging like you do in your
userspace patch (I really want that to be the end result, since
otherwise we are back to asking people to manually enable debug and then
reproduce the failure), does that scale?

What if V4L2 is the next one deciding they need a similar logging
framework to debug camera issues? If the trace log is already flooded
with DRM messages, it will be useless for them?

Or maybe someone else wants their piece and flood it even more
aggressively than DRM, making the DRM messages disappear before they
can be saved?

Is there a way to pull out messages
from /sys/kernel/debug/tracing/trace and filter them on reading instead
of on writing?


Thanks,
pq
Steven Rostedt Dec. 13, 2019, 2:55 p.m. UTC | #3
On Fri, 13 Dec 2019 13:34:46 +0200
Pekka Paalanen <ppaalanen@gmail.com> wrote:

> Is there a way to pull out messages
> from /sys/kernel/debug/tracing/trace and filter them on reading instead
> of on writing?

FYI, you can mount the tracing directory without having to mount the
debug directory.

 mount -t tracefs nodev /sys/kernel/tracing

This way you don't need to mount everyone's debugging features just to
access tracing.

-- Steve
Jani Nikula Dec. 13, 2019, 3:34 p.m. UTC | #4
On Thu, 12 Dec 2019, Sean Paul <sean@poorly.run> wrote:
> From: Sean Paul <seanpaul@chromium.org>
>
> For a long while now, we (ChromeOS) have been struggling getting any
> value out of user feedback reports of display failures (notably external
> displays not working). The problem is that all logging, even fatal
> errors (well, fatal in the sense that a display won't light up) are
> logged at DEBUG log level. So in order to extract these logs, users need
> to be able to turn on logging, and reproduce the issue with debug
> enabled. Unfortunately, this isn't really something we can ask CrOS users
> to do. I spoke with airlied about this and RHEL has similar issues. After
> a few more people piped up on previous versions of this patch, it is a
> Real Issue.
>
> So why don't we just enable DRM_UT_BLAH? Here are the reasons in
> ascending order of severity:
>  1- People aren't consistent with their categories, so we'd have to
>     enable a bunch to get proper coverage
>  2- We don't want to overwhelm syslog with drm spam, others have to use
>     it too
>  3- Console logging is slow
>
> So what we really want is a ringbuffer of the most recent logs
> (filtered by categories we're interested in) exposed via debugfs so the
> logs can be extracted when users file feedback.
>
> It just so happens that there is something which does _exactly_ this!
> This patch dumps drm logs into tracepoints, which allows us to turn tracing
> on and off depending on which category is useful, and pull them from
> tracefs on demand.
>
> What about trace_printk()? It doesn't give us the control we get from using
> tracepoints and it's not meant to be left sprinkled around in code.
>
> Cc: David Airlie <airlied@gmail.com>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Pekka Paalanen <ppaalanen@gmail.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Thomas Zimmermann <tzimmermann@suse.de>
> Cc: Rob Clark <robdclark@gmail.com>
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> Signed-off-by: Sean Paul <seanpaul@chromium.org>
> Link: https://patchwork.freedesktop.org/patch/msgid/20191010204823.195540-1-sean@poorly.run #v1
>
> Changes in v2:
> - Went with a completely different approach: https://lists.freedesktop.org/archives/dri-devel/2019-November/243230.html
>
> Changes in v3:
> - Changed commit message to be a bit less RFC-y
> - Make class_drm_category_log an actual trace class
> ---
>
> Even though we don't want it to be, this is UAPI. So here's some userspace
> code which uses it:
> https://chromium-review.googlesource.com/c/chromiumos/platform2/+/1965562
>
>
>  drivers/gpu/drm/drm_print.c      | 143 ++++++++++++++++++++++++++-----
>  include/trace/events/drm_print.h | 116 +++++++++++++++++++++++++
>  2 files changed, 239 insertions(+), 20 deletions(-)
>  create mode 100644 include/trace/events/drm_print.h
>
> diff --git a/drivers/gpu/drm/drm_print.c b/drivers/gpu/drm/drm_print.c
> index 9a25d73c155c..f591292811aa 100644
> --- a/drivers/gpu/drm/drm_print.c
> +++ b/drivers/gpu/drm/drm_print.c
> @@ -27,11 +27,15 @@
>  
>  #include <stdarg.h>
>  
> +#include <linux/bitops.h>
>  #include <linux/io.h>
>  #include <linux/moduleparam.h>
>  #include <linux/seq_file.h>
>  #include <linux/slab.h>
>  
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/drm_print.h>
> +
>  #include <drm/drm.h>
>  #include <drm/drm_drv.h>
>  #include <drm/drm_print.h>
> @@ -241,10 +245,10 @@ void drm_dev_printk(const struct device *dev, const char *level,
>  	struct va_format vaf;
>  	va_list args;
>  
> -	va_start(args, format);
>  	vaf.fmt = format;
>  	vaf.va = &args;
>  
> +	va_start(args, format);
>  	if (dev)
>  		dev_printk(level, dev, "[" DRM_NAME ":%ps] %pV",
>  			   __builtin_return_address(0), &vaf);
> @@ -253,49 +257,145 @@ void drm_dev_printk(const struct device *dev, const char *level,
>  		       level, __builtin_return_address(0), &vaf);
>  
>  	va_end(args);
> +
> +	va_start(args, format);
> +	trace_drm_log(level, dev, &vaf);
> +	va_end(args);
>  }
>  EXPORT_SYMBOL(drm_dev_printk);
>  
> +static unsigned int drm_trace_enabled(unsigned int category)
> +{
> +	unsigned int bit;
> +
> +	for_each_set_bit(bit, (unsigned long*)&category, sizeof(category) * 8) {

You'll want to use BITS_PER_TYPE().

But wait, I've switched category to an enum upstream, and there should
only ever be one bit set anyway?

> +		switch (BIT(bit)) {
> +		case DRM_UT_NONE:
> +			return trace_drm_dbg_none_enabled();
> +		case DRM_UT_CORE:
> +			return trace_drm_dbg_core_enabled();
> +		case DRM_UT_DRIVER:
> +			return trace_drm_dbg_driver_enabled();
> +		case DRM_UT_KMS:
> +			return trace_drm_dbg_kms_enabled();
> +		case DRM_UT_PRIME:
> +			return trace_drm_dbg_prime_enabled();
> +		case DRM_UT_ATOMIC:
> +			return trace_drm_dbg_atomic_enabled();
> +		case DRM_UT_VBL:
> +			return trace_drm_dbg_vbl_enabled();
> +		case DRM_UT_STATE:
> +			return trace_drm_dbg_state_enabled();
> +		case DRM_UT_LEASE:
> +			return trace_drm_dbg_lease_enabled();
> +		case DRM_UT_DP:
> +			return trace_drm_dbg_dp_enabled();
> +		default:
> +			return trace_drm_dbg_unknown_enabled();
> +		}
> +	}
> +	return false;
> +}
> +
> +static void drm_do_trace(const struct device *dev, unsigned int category,
> +			 struct va_format *vaf)
> +{
> +	WARN_ON(hweight32(category) > 1);
> +
> +	switch (category) {
> +	case DRM_UT_NONE:
> +		trace_drm_dbg_none(dev, vaf);
> +		break;
> +	case DRM_UT_CORE:
> +		trace_drm_dbg_core(dev, vaf);
> +		break;
> +	case DRM_UT_DRIVER:
> +		trace_drm_dbg_driver(dev, vaf);
> +		break;
> +	case DRM_UT_KMS:
> +		trace_drm_dbg_kms(dev, vaf);
> +		break;
> +	case DRM_UT_PRIME:
> +		trace_drm_dbg_prime(dev, vaf);
> +		break;
> +	case DRM_UT_ATOMIC:
> +		trace_drm_dbg_atomic(dev, vaf);
> +		break;
> +	case DRM_UT_VBL:
> +		trace_drm_dbg_vbl(dev, vaf);
> +		break;
> +	case DRM_UT_STATE:
> +		trace_drm_dbg_state(dev, vaf);
> +		break;
> +	case DRM_UT_LEASE:
> +		trace_drm_dbg_lease(dev, vaf);
> +		break;
> +	case DRM_UT_DP:
> +		trace_drm_dbg_dp(dev, vaf);
> +		break;
> +	default:
> +		trace_drm_dbg_unknown(dev, vaf);
> +		break;
> +	}
> +}
> +
>  void drm_dev_dbg(const struct device *dev, unsigned int category,
>  		 const char *format, ...)
>  {
>  	struct va_format vaf;
> +	unsigned int bit;
>  	va_list args;
>  
> -	if (!drm_debug_enabled(category))
> -		return;
> -
> -	va_start(args, format);
>  	vaf.fmt = format;
>  	vaf.va = &args;
>  
> -	if (dev)
> -		dev_printk(KERN_DEBUG, dev, "[" DRM_NAME ":%ps] %pV",
> -			   __builtin_return_address(0), &vaf);
> -	else
> -		printk(KERN_DEBUG "[" DRM_NAME ":%ps] %pV",
> -		       __builtin_return_address(0), &vaf);
> +	if (drm_debug_enabled(category)) {

Ville wants to move this check outside of the functions in the macro
level in the header file. Apparently it's pretty bad for performance on
some (older Atom) machines to do the call for nothing.

BR,
Jani.

> +		va_start(args, format);
> +		if (dev)
> +			dev_printk(KERN_DEBUG, dev, "[" DRM_NAME ":%ps] %pV",
> +				   __builtin_return_address(0), &vaf);
> +		else
> +			printk(KERN_DEBUG "[" DRM_NAME ":%ps] %pV",
> +			       __builtin_return_address(0), &vaf);
> +		va_end(args);
> +	}
> +
> +	if (!drm_trace_enabled(category))
> +		return;
> +
> +	for_each_set_bit(bit, (unsigned long*)&category, sizeof(category) * 8) {
> +		va_start(args, format);
> +		drm_do_trace(dev, BIT(bit), &vaf);
> +		va_end(args);
> +	}
>  
> -	va_end(args);
>  }
>  EXPORT_SYMBOL(drm_dev_dbg);
>  
>  void drm_dbg(unsigned int category, const char *format, ...)
>  {
>  	struct va_format vaf;
> +	unsigned int bit;
>  	va_list args;
>  
> -	if (!drm_debug_enabled(category))
> -		return;
> -
> -	va_start(args, format);
>  	vaf.fmt = format;
>  	vaf.va = &args;
>  
> -	printk(KERN_DEBUG "[" DRM_NAME ":%ps] %pV",
> -	       __builtin_return_address(0), &vaf);
> +	if (drm_debug_enabled(category)) {
> +		va_start(args, format);
> +		printk(KERN_DEBUG "[" DRM_NAME ":%ps] %pV",
> +		       __builtin_return_address(0), &vaf);
> +		va_end(args);
> +	}
>  
> -	va_end(args);
> +	if (!drm_trace_enabled(category))
> +		return;
> +
> +	for_each_set_bit(bit, (unsigned long*)&category, sizeof(category) * 8) {
> +		va_start(args, format);
> +		drm_do_trace(NULL, BIT(bit), &vaf);
> +		va_end(args);
> +	}
>  }
>  EXPORT_SYMBOL(drm_dbg);
>  
> @@ -304,13 +404,16 @@ void drm_err(const char *format, ...)
>  	struct va_format vaf;
>  	va_list args;
>  
> -	va_start(args, format);
>  	vaf.fmt = format;
>  	vaf.va = &args;
>  
> +	va_start(args, format);
>  	printk(KERN_ERR "[" DRM_NAME ":%ps] *ERROR* %pV",
>  	       __builtin_return_address(0), &vaf);
> +	va_end(args);
>  
> +	va_start(args, format);
> +	trace_drm_err(NULL, &vaf);
>  	va_end(args);
>  }
>  EXPORT_SYMBOL(drm_err);
> diff --git a/include/trace/events/drm_print.h b/include/trace/events/drm_print.h
> new file mode 100644
> index 000000000000..862728fe0f89
> --- /dev/null
> +++ b/include/trace/events/drm_print.h
> @@ -0,0 +1,116 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright (C) 2019 Google, Inc.
> + *
> + * Authors:
> + * Sean Paul <seanpaul@chromium.org>
> + */
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM drm_print
> +
> +#if !defined(_TRACE_DRM_PRINT_H) || defined(TRACE_HEADER_MULTI_READ)
> +
> +#include <linux/device.h>
> +#include <linux/tracepoint.h>
> +
> +#define DRM_PRINT_MAX 256
> +
> +#define _TRACE_DRM_PRINT_H
> +
> +TRACE_EVENT(drm_log,
> +	TP_PROTO(const char * level, const struct device *dev,
> +		 struct va_format *vaf),
> +	TP_ARGS(level, dev, vaf),
> +	TP_STRUCT__entry(
> +		__field(const char *,		level			)
> +		__field(const struct device *,	dev			)
> +		__dynamic_array(char,		msg,	DRM_PRINT_MAX	)
> +		__field(int,			len			)
> +	),
> +	TP_fast_assign(
> +		__entry->level = level;
> +		__entry->dev = dev;
> +		__entry->len = vsnprintf(__get_str(msg), DRM_PRINT_MAX,
> +					 vaf->fmt, *vaf->va);
> +		if (__entry->len > 0 &&
> +		    __get_str(msg)[__entry->len - 1] == '\n')
> +			__get_str(msg)[--__entry->len] = '\0';
> +	),
> +	TP_printk("%s %s %s: %s", __entry->level,
> +		  __entry->dev ? dev_driver_string(__entry->dev) : "(NULL):",
> +		  __entry->dev ? dev_name(__entry->dev) : "", __get_str(msg))
> +);
> +
> +DECLARE_EVENT_CLASS(class_drm_category_log,
> +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> +	TP_ARGS(dev, vaf),
> +	TP_STRUCT__entry(
> +		__field(const struct device *,	dev			)
> +		__dynamic_array(char,		msg,	DRM_PRINT_MAX	)
> +		__field(int,			len			)
> +	),
> +	TP_fast_assign(
> +		__entry->dev = dev;
> +		__entry->len = vsnprintf(__get_str(msg), DRM_PRINT_MAX,
> +					 vaf->fmt, *vaf->va);
> +		if (__entry->len > 0 &&
> +		    __get_str(msg)[__entry->len - 1] == '\n')
> +			__get_str(msg)[--__entry->len] = '\0';
> +	),
> +	TP_printk("%s %s%s%s",
> +		  __entry->dev ? dev_driver_string(__entry->dev) : "",
> +		  __entry->dev ? dev_name(__entry->dev) : "",
> +		  __entry->dev ? ": " : "", __get_str(msg))
> +);
> +DEFINE_EVENT(class_drm_category_log, drm_err,
> +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> +	TP_ARGS(dev, vaf)
> +);
> +DEFINE_EVENT(class_drm_category_log, drm_dbg_none,
> +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> +	TP_ARGS(dev, vaf)
> +);
> +DEFINE_EVENT(class_drm_category_log, drm_dbg_core,
> +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> +	TP_ARGS(dev, vaf)
> +);
> +DEFINE_EVENT(class_drm_category_log, drm_dbg_driver,
> +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> +	TP_ARGS(dev, vaf)
> +);
> +DEFINE_EVENT(class_drm_category_log, drm_dbg_kms,
> +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> +	TP_ARGS(dev, vaf)
> +);
> +DEFINE_EVENT(class_drm_category_log, drm_dbg_prime,
> +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> +	TP_ARGS(dev, vaf)
> +);
> +DEFINE_EVENT(class_drm_category_log, drm_dbg_atomic,
> +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> +	TP_ARGS(dev, vaf)
> +);
> +DEFINE_EVENT(class_drm_category_log, drm_dbg_vbl,
> +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> +	TP_ARGS(dev, vaf)
> +);
> +DEFINE_EVENT(class_drm_category_log, drm_dbg_state,
> +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> +	TP_ARGS(dev, vaf)
> +);
> +DEFINE_EVENT(class_drm_category_log, drm_dbg_lease,
> +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> +	TP_ARGS(dev, vaf)
> +);
> +DEFINE_EVENT(class_drm_category_log, drm_dbg_dp,
> +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> +	TP_ARGS(dev, vaf)
> +);
> +DEFINE_EVENT(class_drm_category_log, drm_dbg_unknown,
> +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> +	TP_ARGS(dev, vaf)
> +);
> +#endif
> +
> +/* This part must be outside protection */
> +#include <trace/define_trace.h>
Ville Syrjälä Dec. 13, 2019, 3:48 p.m. UTC | #5
On Fri, Dec 13, 2019 at 05:34:25PM +0200, Jani Nikula wrote:
> On Thu, 12 Dec 2019, Sean Paul <sean@poorly.run> wrote:
> > From: Sean Paul <seanpaul@chromium.org>
> >
> > For a long while now, we (ChromeOS) have been struggling getting any
> > value out of user feedback reports of display failures (notably external
> > displays not working). The problem is that all logging, even fatal
> > errors (well, fatal in the sense that a display won't light up) are
> > logged at DEBUG log level. So in order to extract these logs, users need
> > to be able to turn on logging, and reproduce the issue with debug
> > enabled. Unfortunately, this isn't really something we can ask CrOS users
> > to do. I spoke with airlied about this and RHEL has similar issues. After
> > a few more people piped up on previous versions of this patch, it is a
> > Real Issue.
> >
> > So why don't we just enable DRM_UT_BLAH? Here are the reasons in
> > ascending order of severity:
> >  1- People aren't consistent with their categories, so we'd have to
> >     enable a bunch to get proper coverage
> >  2- We don't want to overwhelm syslog with drm spam, others have to use
> >     it too
> >  3- Console logging is slow
> >
> > So what we really want is a ringbuffer of the most recent logs
> > (filtered by categories we're interested in) exposed via debugfs so the
> > logs can be extracted when users file feedback.
> >
> > It just so happens that there is something which does _exactly_ this!
> > This patch dumps drm logs into tracepoints, which allows us to turn tracing
> > on and off depending on which category is useful, and pull them from
> > tracefs on demand.
> >
> > What about trace_printk()? It doesn't give us the control we get from using
> > tracepoints and it's not meant to be left sprinkled around in code.
> >
> > Cc: David Airlie <airlied@gmail.com>
> > Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> > Cc: Pekka Paalanen <ppaalanen@gmail.com>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > Cc: Thomas Zimmermann <tzimmermann@suse.de>
> > Cc: Rob Clark <robdclark@gmail.com>
> > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> > Signed-off-by: Sean Paul <seanpaul@chromium.org>
> > Link: https://patchwork.freedesktop.org/patch/msgid/20191010204823.195540-1-sean@poorly.run #v1
> >
> > Changes in v2:
> > - Went with a completely different approach: https://lists.freedesktop.org/archives/dri-devel/2019-November/243230.html
> >
> > Changes in v3:
> > - Changed commit message to be a bit less RFC-y
> > - Make class_drm_category_log an actual trace class
> > ---
> >
> > Even though we don't want it to be, this is UAPI. So here's some userspace
> > code which uses it:
> > https://chromium-review.googlesource.com/c/chromiumos/platform2/+/1965562
> >
> >
> >  drivers/gpu/drm/drm_print.c      | 143 ++++++++++++++++++++++++++-----
> >  include/trace/events/drm_print.h | 116 +++++++++++++++++++++++++
> >  2 files changed, 239 insertions(+), 20 deletions(-)
> >  create mode 100644 include/trace/events/drm_print.h
> >
> > diff --git a/drivers/gpu/drm/drm_print.c b/drivers/gpu/drm/drm_print.c
> > index 9a25d73c155c..f591292811aa 100644
> > --- a/drivers/gpu/drm/drm_print.c
> > +++ b/drivers/gpu/drm/drm_print.c
> > @@ -27,11 +27,15 @@
> >  
> >  #include <stdarg.h>
> >  
> > +#include <linux/bitops.h>
> >  #include <linux/io.h>
> >  #include <linux/moduleparam.h>
> >  #include <linux/seq_file.h>
> >  #include <linux/slab.h>
> >  
> > +#define CREATE_TRACE_POINTS
> > +#include <trace/events/drm_print.h>
> > +
> >  #include <drm/drm.h>
> >  #include <drm/drm_drv.h>
> >  #include <drm/drm_print.h>
> > @@ -241,10 +245,10 @@ void drm_dev_printk(const struct device *dev, const char *level,
> >  	struct va_format vaf;
> >  	va_list args;
> >  
> > -	va_start(args, format);
> >  	vaf.fmt = format;
> >  	vaf.va = &args;
> >  
> > +	va_start(args, format);
> >  	if (dev)
> >  		dev_printk(level, dev, "[" DRM_NAME ":%ps] %pV",
> >  			   __builtin_return_address(0), &vaf);
> > @@ -253,49 +257,145 @@ void drm_dev_printk(const struct device *dev, const char *level,
> >  		       level, __builtin_return_address(0), &vaf);
> >  
> >  	va_end(args);
> > +
> > +	va_start(args, format);
> > +	trace_drm_log(level, dev, &vaf);
> > +	va_end(args);
> >  }
> >  EXPORT_SYMBOL(drm_dev_printk);
> >  
> > +static unsigned int drm_trace_enabled(unsigned int category)
> > +{
> > +	unsigned int bit;
> > +
> > +	for_each_set_bit(bit, (unsigned long*)&category, sizeof(category) * 8) {
> 
> You'll want to use BITS_PER_TYPE().
> 
> But wait, I've switched category to an enum upstream, and there should
> only ever be one bit set anyway?
> 
> > +		switch (BIT(bit)) {
> > +		case DRM_UT_NONE:
> > +			return trace_drm_dbg_none_enabled();
> > +		case DRM_UT_CORE:
> > +			return trace_drm_dbg_core_enabled();
> > +		case DRM_UT_DRIVER:
> > +			return trace_drm_dbg_driver_enabled();
> > +		case DRM_UT_KMS:
> > +			return trace_drm_dbg_kms_enabled();
> > +		case DRM_UT_PRIME:
> > +			return trace_drm_dbg_prime_enabled();
> > +		case DRM_UT_ATOMIC:
> > +			return trace_drm_dbg_atomic_enabled();
> > +		case DRM_UT_VBL:
> > +			return trace_drm_dbg_vbl_enabled();
> > +		case DRM_UT_STATE:
> > +			return trace_drm_dbg_state_enabled();
> > +		case DRM_UT_LEASE:
> > +			return trace_drm_dbg_lease_enabled();
> > +		case DRM_UT_DP:
> > +			return trace_drm_dbg_dp_enabled();
> > +		default:
> > +			return trace_drm_dbg_unknown_enabled();
> > +		}
> > +	}
> > +	return false;
> > +}
> > +
> > +static void drm_do_trace(const struct device *dev, unsigned int category,
> > +			 struct va_format *vaf)
> > +{
> > +	WARN_ON(hweight32(category) > 1);
> > +
> > +	switch (category) {
> > +	case DRM_UT_NONE:
> > +		trace_drm_dbg_none(dev, vaf);
> > +		break;
> > +	case DRM_UT_CORE:
> > +		trace_drm_dbg_core(dev, vaf);
> > +		break;
> > +	case DRM_UT_DRIVER:
> > +		trace_drm_dbg_driver(dev, vaf);
> > +		break;
> > +	case DRM_UT_KMS:
> > +		trace_drm_dbg_kms(dev, vaf);
> > +		break;
> > +	case DRM_UT_PRIME:
> > +		trace_drm_dbg_prime(dev, vaf);
> > +		break;
> > +	case DRM_UT_ATOMIC:
> > +		trace_drm_dbg_atomic(dev, vaf);
> > +		break;
> > +	case DRM_UT_VBL:
> > +		trace_drm_dbg_vbl(dev, vaf);
> > +		break;
> > +	case DRM_UT_STATE:
> > +		trace_drm_dbg_state(dev, vaf);
> > +		break;
> > +	case DRM_UT_LEASE:
> > +		trace_drm_dbg_lease(dev, vaf);
> > +		break;
> > +	case DRM_UT_DP:
> > +		trace_drm_dbg_dp(dev, vaf);
> > +		break;
> > +	default:
> > +		trace_drm_dbg_unknown(dev, vaf);
> > +		break;
> > +	}
> > +}
> > +
> >  void drm_dev_dbg(const struct device *dev, unsigned int category,
> >  		 const char *format, ...)
> >  {
> >  	struct va_format vaf;
> > +	unsigned int bit;
> >  	va_list args;
> >  
> > -	if (!drm_debug_enabled(category))
> > -		return;
> > -
> > -	va_start(args, format);
> >  	vaf.fmt = format;
> >  	vaf.va = &args;
> >  
> > -	if (dev)
> > -		dev_printk(KERN_DEBUG, dev, "[" DRM_NAME ":%ps] %pV",
> > -			   __builtin_return_address(0), &vaf);
> > -	else
> > -		printk(KERN_DEBUG "[" DRM_NAME ":%ps] %pV",
> > -		       __builtin_return_address(0), &vaf);
> > +	if (drm_debug_enabled(category)) {
> 
> Ville wants to move this check outside of the functions in the macro
> level in the header file. Apparently it's pretty bad for performance on
> some (older Atom) machines to do the call for nothing.

I've been meaning to measure if this actually matters. But haven't
managed to motivate myself enough. Could be a good project for
some random person to write a test that just does tons of different
kinds of atomic TEST_ONLY commits to measure how badly we suck.
Sean Paul Dec. 13, 2019, 4:47 p.m. UTC | #6
Hi Steven,
Thanks for the feedback!

On Thu, Dec 12, 2019 at 10:20:50PM -0500, Steven Rostedt wrote:
> On Thu, 12 Dec 2019 15:32:35 -0500
> Sean Paul <sean@poorly.run> wrote:
> 
> > 
> > What about trace_printk()? It doesn't give us the control we get from using
> > tracepoints and it's not meant to be left sprinkled around in code.
> 
> Not to mention that trace_printk() is not for production use (only for
> developers debugging purposes).
> 

Yep, I'll clarify this in the next version.

> > 
> > Cc: David Airlie <airlied@gmail.com>
> > Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> > Cc: Pekka Paalanen <ppaalanen@gmail.com>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > Cc: Thomas Zimmermann <tzimmermann@suse.de>
> > Cc: Rob Clark <robdclark@gmail.com>
> > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> > Signed-off-by: Sean Paul <seanpaul@chromium.org>
> > Link: https://patchwork.freedesktop.org/patch/msgid/20191010204823.195540-1-sean@poorly.run #v1
> > 
> > Changes in v2:
> > - Went with a completely different approach: https://lists.freedesktop.org/archives/dri-devel/2019-November/243230.html
> > 
> > Changes in v3:
> > - Changed commit message to be a bit less RFC-y
> > - Make class_drm_category_log an actual trace class
> > ---
> > 
> > Even though we don't want it to be, this is UAPI. So here's some userspace
> > code which uses it:
> > https://chromium-review.googlesource.com/c/chromiumos/platform2/+/1965562
> > 
> > 
> >  drivers/gpu/drm/drm_print.c      | 143 ++++++++++++++++++++++++++-----
> >  include/trace/events/drm_print.h | 116 +++++++++++++++++++++++++
> >  2 files changed, 239 insertions(+), 20 deletions(-)
> >  create mode 100644 include/trace/events/drm_print.h
> > 
> > diff --git a/drivers/gpu/drm/drm_print.c b/drivers/gpu/drm/drm_print.c
> > index 9a25d73c155c..f591292811aa 100644
> > --- a/drivers/gpu/drm/drm_print.c
> > +++ b/drivers/gpu/drm/drm_print.c
> > @@ -27,11 +27,15 @@
> >  
> >  #include <stdarg.h>
> >  
> > +#include <linux/bitops.h>
> >  #include <linux/io.h>
> >  #include <linux/moduleparam.h>
> >  #include <linux/seq_file.h>
> >  #include <linux/slab.h>
> >  
> > +#define CREATE_TRACE_POINTS
> > +#include <trace/events/drm_print.h>
> > +
> >  #include <drm/drm.h>
> >  #include <drm/drm_drv.h>
> >  #include <drm/drm_print.h>
> > @@ -241,10 +245,10 @@ void drm_dev_printk(const struct device *dev, const char *level,
> >  	struct va_format vaf;
> >  	va_list args;
> >  
> > -	va_start(args, format);
> >  	vaf.fmt = format;
> >  	vaf.va = &args;
> >  
> > +	va_start(args, format);
> >  	if (dev)
> >  		dev_printk(level, dev, "[" DRM_NAME ":%ps] %pV",
> >  			   __builtin_return_address(0), &vaf);
> > @@ -253,49 +257,145 @@ void drm_dev_printk(const struct device *dev, const char *level,
> >  		       level, __builtin_return_address(0), &vaf);
> >  
> >  	va_end(args);
> > +
> > +	va_start(args, format);
> > +	trace_drm_log(level, dev, &vaf);
> > +	va_end(args);
> >  }
> >  EXPORT_SYMBOL(drm_dev_printk);
> >  
> > +static unsigned int drm_trace_enabled(unsigned int category)
> > +{
> > +	unsigned int bit;
> > +
> > +	for_each_set_bit(bit, (unsigned long*)&category, sizeof(category) * 8) {
> > +		switch (BIT(bit)) {
> > +		case DRM_UT_NONE:
> > +			return trace_drm_dbg_none_enabled();
> > +		case DRM_UT_CORE:
> > +			return trace_drm_dbg_core_enabled();
> > +		case DRM_UT_DRIVER:
> > +			return trace_drm_dbg_driver_enabled();
> > +		case DRM_UT_KMS:
> > +			return trace_drm_dbg_kms_enabled();
> > +		case DRM_UT_PRIME:
> > +			return trace_drm_dbg_prime_enabled();
> > +		case DRM_UT_ATOMIC:
> > +			return trace_drm_dbg_atomic_enabled();
> > +		case DRM_UT_VBL:
> > +			return trace_drm_dbg_vbl_enabled();
> > +		case DRM_UT_STATE:
> > +			return trace_drm_dbg_state_enabled();
> > +		case DRM_UT_LEASE:
> > +			return trace_drm_dbg_lease_enabled();
> > +		case DRM_UT_DP:
> > +			return trace_drm_dbg_dp_enabled();
> > +		default:
> > +			return trace_drm_dbg_unknown_enabled();
> > +		}
> 
> Why this double loop?
> 

category is technically a bitfield, but only 1 bit is ever set (grander plans
when it was created, I suppose). As Jani mentions, it's being changed to an
enum, so we won't need this loop.

> > +	}
> > +	return false;
> > +}
> > +
> > +static void drm_do_trace(const struct device *dev, unsigned int category,
> > +			 struct va_format *vaf)
> > +{
> > +	WARN_ON(hweight32(category) > 1);
> > +
> > +	switch (category) {
> > +	case DRM_UT_NONE:
> > +		trace_drm_dbg_none(dev, vaf);
> > +		break;
> > +	case DRM_UT_CORE:
> > +		trace_drm_dbg_core(dev, vaf);
> > +		break;
> > +	case DRM_UT_DRIVER:
> > +		trace_drm_dbg_driver(dev, vaf);
> > +		break;
> > +	case DRM_UT_KMS:
> > +		trace_drm_dbg_kms(dev, vaf);
> > +		break;
> > +	case DRM_UT_PRIME:
> > +		trace_drm_dbg_prime(dev, vaf);
> > +		break;
> > +	case DRM_UT_ATOMIC:
> > +		trace_drm_dbg_atomic(dev, vaf);
> > +		break;
> > +	case DRM_UT_VBL:
> > +		trace_drm_dbg_vbl(dev, vaf);
> > +		break;
> > +	case DRM_UT_STATE:
> > +		trace_drm_dbg_state(dev, vaf);
> > +		break;
> > +	case DRM_UT_LEASE:
> > +		trace_drm_dbg_lease(dev, vaf);
> > +		break;
> > +	case DRM_UT_DP:
> > +		trace_drm_dbg_dp(dev, vaf);
> > +		break;
> > +	default:
> > +		trace_drm_dbg_unknown(dev, vaf);
> > +		break;
> 
> Why is there a separate trace event for each of these?
> 

To make it easier on userspace to enable just a single drm category.

> > +	}
> > +}
> > +
> >  void drm_dev_dbg(const struct device *dev, unsigned int category,
> >  		 const char *format, ...)
> >  {
> >  	struct va_format vaf;
> > +	unsigned int bit;
> >  	va_list args;
> >  
> > -	if (!drm_debug_enabled(category))
> > -		return;
> > -
> > -	va_start(args, format);
> >  	vaf.fmt = format;
> >  	vaf.va = &args;
> >  
> > -	if (dev)
> > -		dev_printk(KERN_DEBUG, dev, "[" DRM_NAME ":%ps] %pV",
> > -			   __builtin_return_address(0), &vaf);
> > -	else
> > -		printk(KERN_DEBUG "[" DRM_NAME ":%ps] %pV",
> > -		       __builtin_return_address(0), &vaf);
> > +	if (drm_debug_enabled(category)) {
> > +		va_start(args, format);
> > +		if (dev)
> > +			dev_printk(KERN_DEBUG, dev, "[" DRM_NAME ":%ps] %pV",
> > +				   __builtin_return_address(0), &vaf);
> > +		else
> > +			printk(KERN_DEBUG "[" DRM_NAME ":%ps] %pV",
> > +			       __builtin_return_address(0), &vaf);
> > +		va_end(args);
> > +	}
> > +
> > +	if (!drm_trace_enabled(category))
> > +		return;
> > +
> > +	for_each_set_bit(bit, (unsigned long*)&category, sizeof(category) * 8) {
> > +		va_start(args, format);
> > +		drm_do_trace(dev, BIT(bit), &vaf);
> > +		va_end(args);
> 
> I'm thinking we could find a way to move the logic here to have a
> single loop.

Should be done with the enum conversion mentioned above.

> 
> Note, the current approach is racy. The state can change between the
> "drm_trace_enabled()" and this for loop.

Yep, good catch. I think the race is ok since in that case the message will get
dropped when we call drm_do_trace. The trace_enabled() call is just a fastpath
for the majority case.

Sean

> 
> -- Steve
> 
> 
> > +	}
> >  
> > -	va_end(args);
> >  }
> >  EXPORT_SYMBOL(drm_dev_dbg);
> >  
> >  void drm_dbg(unsigned int category, const char *format, ...)
> >  {
> >  	struct va_format vaf;
> > +	unsigned int bit;
> >  	va_list args;
> >  
> > -	if (!drm_debug_enabled(category))
> > -		return;
> > -
> > -	va_start(args, format);
> >  	vaf.fmt = format;
> >  	vaf.va = &args;
> >  
> > -	printk(KERN_DEBUG "[" DRM_NAME ":%ps] %pV",
> > -	       __builtin_return_address(0), &vaf);
> > +	if (drm_debug_enabled(category)) {
> > +		va_start(args, format);
> > +		printk(KERN_DEBUG "[" DRM_NAME ":%ps] %pV",
> > +		       __builtin_return_address(0), &vaf);
> > +		va_end(args);
> > +	}
> >  
> > -	va_end(args);
> > +	if (!drm_trace_enabled(category))
> > +		return;
> > +
> > +	for_each_set_bit(bit, (unsigned long*)&category, sizeof(category) * 8) {
> > +		va_start(args, format);
> > +		drm_do_trace(NULL, BIT(bit), &vaf);
> > +		va_end(args);
> > +	}
> >  }
> >  EXPORT_SYMBOL(drm_dbg);
> >  
> > @@ -304,13 +404,16 @@ void drm_err(const char *format, ...)
> >  	struct va_format vaf;
> >  	va_list args;
> >  
> > -	va_start(args, format);
> >  	vaf.fmt = format;
> >  	vaf.va = &args;
> >  
> > +	va_start(args, format);
> >  	printk(KERN_ERR "[" DRM_NAME ":%ps] *ERROR* %pV",
> >  	       __builtin_return_address(0), &vaf);
> > +	va_end(args);
> >  
> > +	va_start(args, format);
> > +	trace_drm_err(NULL, &vaf);
> >  	va_end(args);
> >  }
> >  EXPORT_SYMBOL(drm_err);
> > diff --git a/include/trace/events/drm_print.h b/include/trace/events/drm_print.h
> > new file mode 100644
> > index 000000000000..862728fe0f89
> > --- /dev/null
> > +++ b/include/trace/events/drm_print.h
> > @@ -0,0 +1,116 @@
> > +// SPDX-License-Identifier: MIT
> > +/*
> > + * Copyright (C) 2019 Google, Inc.
> > + *
> > + * Authors:
> > + * Sean Paul <seanpaul@chromium.org>
> > + */
> > +#undef TRACE_SYSTEM
> > +#define TRACE_SYSTEM drm_print
> > +
> > +#if !defined(_TRACE_DRM_PRINT_H) || defined(TRACE_HEADER_MULTI_READ)
> > +
> > +#include <linux/device.h>
> > +#include <linux/tracepoint.h>
> > +
> > +#define DRM_PRINT_MAX 256
> > +
> > +#define _TRACE_DRM_PRINT_H
> > +
> > +TRACE_EVENT(drm_log,
> > +	TP_PROTO(const char * level, const struct device *dev,
> > +		 struct va_format *vaf),
> > +	TP_ARGS(level, dev, vaf),
> > +	TP_STRUCT__entry(
> > +		__field(const char *,		level			)
> > +		__field(const struct device *,	dev			)
> > +		__dynamic_array(char,		msg,	DRM_PRINT_MAX	)
> > +		__field(int,			len			)
> > +	),
> > +	TP_fast_assign(
> > +		__entry->level = level;
> > +		__entry->dev = dev;
> > +		__entry->len = vsnprintf(__get_str(msg), DRM_PRINT_MAX,
> > +					 vaf->fmt, *vaf->va);
> > +		if (__entry->len > 0 &&
> > +		    __get_str(msg)[__entry->len - 1] == '\n')
> > +			__get_str(msg)[--__entry->len] = '\0';
> > +	),
> > +	TP_printk("%s %s %s: %s", __entry->level,
> > +		  __entry->dev ? dev_driver_string(__entry->dev) : "(NULL):",
> > +		  __entry->dev ? dev_name(__entry->dev) : "", __get_str(msg))
> > +);
> > +
> > +DECLARE_EVENT_CLASS(class_drm_category_log,
> > +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> > +	TP_ARGS(dev, vaf),
> > +	TP_STRUCT__entry(
> > +		__field(const struct device *,	dev			)
> > +		__dynamic_array(char,		msg,	DRM_PRINT_MAX	)
> > +		__field(int,			len			)
> > +	),
> > +	TP_fast_assign(
> > +		__entry->dev = dev;
> > +		__entry->len = vsnprintf(__get_str(msg), DRM_PRINT_MAX,
> > +					 vaf->fmt, *vaf->va);
> > +		if (__entry->len > 0 &&
> > +		    __get_str(msg)[__entry->len - 1] == '\n')
> > +			__get_str(msg)[--__entry->len] = '\0';
> > +	),
> > +	TP_printk("%s %s%s%s",
> > +		  __entry->dev ? dev_driver_string(__entry->dev) : "",
> > +		  __entry->dev ? dev_name(__entry->dev) : "",
> > +		  __entry->dev ? ": " : "", __get_str(msg))
> > +);
> > +DEFINE_EVENT(class_drm_category_log, drm_err,
> > +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> > +	TP_ARGS(dev, vaf)
> > +);
> > +DEFINE_EVENT(class_drm_category_log, drm_dbg_none,
> > +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> > +	TP_ARGS(dev, vaf)
> > +);
> > +DEFINE_EVENT(class_drm_category_log, drm_dbg_core,
> > +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> > +	TP_ARGS(dev, vaf)
> > +);
> > +DEFINE_EVENT(class_drm_category_log, drm_dbg_driver,
> > +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> > +	TP_ARGS(dev, vaf)
> > +);
> > +DEFINE_EVENT(class_drm_category_log, drm_dbg_kms,
> > +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> > +	TP_ARGS(dev, vaf)
> > +);
> > +DEFINE_EVENT(class_drm_category_log, drm_dbg_prime,
> > +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> > +	TP_ARGS(dev, vaf)
> > +);
> > +DEFINE_EVENT(class_drm_category_log, drm_dbg_atomic,
> > +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> > +	TP_ARGS(dev, vaf)
> > +);
> > +DEFINE_EVENT(class_drm_category_log, drm_dbg_vbl,
> > +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> > +	TP_ARGS(dev, vaf)
> > +);
> > +DEFINE_EVENT(class_drm_category_log, drm_dbg_state,
> > +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> > +	TP_ARGS(dev, vaf)
> > +);
> > +DEFINE_EVENT(class_drm_category_log, drm_dbg_lease,
> > +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> > +	TP_ARGS(dev, vaf)
> > +);
> > +DEFINE_EVENT(class_drm_category_log, drm_dbg_dp,
> > +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> > +	TP_ARGS(dev, vaf)
> > +);
> > +DEFINE_EVENT(class_drm_category_log, drm_dbg_unknown,
> > +	TP_PROTO(const struct device *dev, struct va_format *vaf),
> > +	TP_ARGS(dev, vaf)
> > +);
> > +#endif
> > +
> > +/* This part must be outside protection */
> > +#include <trace/define_trace.h>
>
Sean Paul Dec. 13, 2019, 4:54 p.m. UTC | #7
On Fri, Dec 13, 2019 at 01:34:46PM +0200, Pekka Paalanen wrote:
> On Thu, 12 Dec 2019 15:32:35 -0500
> Sean Paul <sean@poorly.run> wrote:
> 
> > From: Sean Paul <seanpaul@chromium.org>
> > 
> > For a long while now, we (ChromeOS) have been struggling getting any
> > value out of user feedback reports of display failures (notably external
> > displays not working). The problem is that all logging, even fatal
> > errors (well, fatal in the sense that a display won't light up) are
> > logged at DEBUG log level. So in order to extract these logs, users need
> > to be able to turn on logging, and reproduce the issue with debug
> > enabled. Unfortunately, this isn't really something we can ask CrOS users
> > to do. I spoke with airlied about this and RHEL has similar issues. After
> > a few more people piped up on previous versions of this patch, it is a
> > Real Issue.
> > 
> > So why don't we just enable DRM_UT_BLAH? Here are the reasons in
> > ascending order of severity:
> >  1- People aren't consistent with their categories, so we'd have to
> >     enable a bunch to get proper coverage
> >  2- We don't want to overwhelm syslog with drm spam, others have to use
> >     it too
> >  3- Console logging is slow
> > 
> > So what we really want is a ringbuffer of the most recent logs
> > (filtered by categories we're interested in) exposed via debugfs so the
> > logs can be extracted when users file feedback.
> > 
> > It just so happens that there is something which does _exactly_ this!
> > This patch dumps drm logs into tracepoints, which allows us to turn tracing
> > on and off depending on which category is useful, and pull them from
> > tracefs on demand.
> > 
> > What about trace_printk()? It doesn't give us the control we get from using
> > tracepoints and it's not meant to be left sprinkled around in code.
> > 
> > Cc: David Airlie <airlied@gmail.com>
> > Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> > Cc: Pekka Paalanen <ppaalanen@gmail.com>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > Cc: Thomas Zimmermann <tzimmermann@suse.de>
> > Cc: Rob Clark <robdclark@gmail.com>
> > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> > Signed-off-by: Sean Paul <seanpaul@chromium.org>
> > Link: https://patchwork.freedesktop.org/patch/msgid/20191010204823.195540-1-sean@poorly.run #v1
> > 
> > Changes in v2:
> > - Went with a completely different approach: https://lists.freedesktop.org/archives/dri-devel/2019-November/243230.html
> > 
> > Changes in v3:
> > - Changed commit message to be a bit less RFC-y
> > - Make class_drm_category_log an actual trace class
> > ---
> > 
> > Even though we don't want it to be, this is UAPI. So here's some userspace
> > code which uses it:
> > https://chromium-review.googlesource.com/c/chromiumos/platform2/+/1965562
> > 
> > 
> >  drivers/gpu/drm/drm_print.c      | 143 ++++++++++++++++++++++++++-----
> >  include/trace/events/drm_print.h | 116 +++++++++++++++++++++++++
> >  2 files changed, 239 insertions(+), 20 deletions(-)
> >  create mode 100644 include/trace/events/drm_print.h
> 
> Hi,
> 
> reading the userspace patch is very enlightening, thanks.
> 
> It uses debugfs, and it uses the generic tracing UAPI. When all
> distributions will enable this debug logging like you do in your
> userspace patch (I really want that to be the end result, since
> otherwise we are back to asking people to manually enable debug and then
> reproduce the failure), does that scale?
> 
> What if V4L2 is the next one deciding they need a similar logging
> framework to debug camera issues? If the trace log is already flooded
> with DRM messages, it will be useless for them?
> 
> Or maybe someone else wants their piece and flood it even more
> aggressively than DRM, making the DRM messages disappear before they
> can be saved?
> 
> Is there a way to pull out messages
> from /sys/kernel/debug/tracing/trace and filter them on reading instead
> of on writing?

Hi Pekka,
Yep, there's also a trace_pipe output from tracefs. So you could pipe the output
through a classifier in userspace and split off different subsystems.

I think if this type of patch proliferates this could be a problem. In that case,
we'd probably have to become better citizens and reclassify some of our log
messages such that we're not needlessly spamming the trace buffer.

I'm not too worried about this since we have a number of tools at our disposal.

Sean


> 
> 
> Thanks,
> pq
Steven Rostedt Dec. 13, 2019, 4:54 p.m. UTC | #8
On Fri, 13 Dec 2019 11:47:03 -0500
Sean Paul <sean@poorly.run> wrote:

> > Why is there a separate trace event for each of these?
> >   
> 
> To make it easier on userspace to enable just a single drm category.
> 

But trace events can easily be filtered. Say you have a field called
category, you could then just set in the trace event filter:

 echo 'category == 1 || category == 5 || (category > 8 && category < 12)' > /sys/kernel/tracing/drm_print/drm_category_log/filter
 echo 1 > /sys/kernel/tracing/drm_print/drm_category_log/enable

-- Steve
Sean Paul Dec. 13, 2019, 6:49 p.m. UTC | #9
On Fri, Dec 13, 2019 at 11:54:36AM -0500, Steven Rostedt wrote:
> On Fri, 13 Dec 2019 11:47:03 -0500
> Sean Paul <sean@poorly.run> wrote:
> 
> > > Why is there a separate trace event for each of these?
> > >   
> > 
> > To make it easier on userspace to enable just a single drm category.
> > 
> 
> But trace events can easily be filtered. Say you have a field called
> category, you could then just set in the trace event filter:
> 
>  echo 'category == 1 || category == 5 || (category > 8 && category < 12)' > /sys/kernel/tracing/drm_print/drm_category_log/filter
>  echo 1 > /sys/kernel/tracing/drm_print/drm_category_log/enable

Hmm, true. I suppose the only downside would be that this would cause all logs
to be processed/assigned as trace events before the filter is applied. That's a
lot of extra work each frame.

Sean

> 
> -- Steve
Pekka Paalanen Dec. 16, 2019, 8:55 a.m. UTC | #10
On Fri, 13 Dec 2019 11:54:33 -0500
Sean Paul <sean@poorly.run> wrote:

> On Fri, Dec 13, 2019 at 01:34:46PM +0200, Pekka Paalanen wrote:
> > On Thu, 12 Dec 2019 15:32:35 -0500
> > Sean Paul <sean@poorly.run> wrote:
> >   
> > > From: Sean Paul <seanpaul@chromium.org>
> > > 
> > > For a long while now, we (ChromeOS) have been struggling getting any
> > > value out of user feedback reports of display failures (notably external
> > > displays not working). The problem is that all logging, even fatal
> > > errors (well, fatal in the sense that a display won't light up) are
> > > logged at DEBUG log level. So in order to extract these logs, users need
> > > to be able to turn on logging, and reproduce the issue with debug
> > > enabled. Unfortunately, this isn't really something we can ask CrOS users
> > > to do. I spoke with airlied about this and RHEL has similar issues. After
> > > a few more people piped up on previous versions of this patch, it is a
> > > Real Issue.
> > > 
> > > So why don't we just enable DRM_UT_BLAH? Here are the reasons in
> > > ascending order of severity:
> > >  1- People aren't consistent with their categories, so we'd have to
> > >     enable a bunch to get proper coverage
> > >  2- We don't want to overwhelm syslog with drm spam, others have to use
> > >     it too
> > >  3- Console logging is slow
> > > 
> > > So what we really want is a ringbuffer of the most recent logs
> > > (filtered by categories we're interested in) exposed via debugfs so the
> > > logs can be extracted when users file feedback.
> > > 
> > > It just so happens that there is something which does _exactly_ this!
> > > This patch dumps drm logs into tracepoints, which allows us to turn tracing
> > > on and off depending on which category is useful, and pull them from
> > > tracefs on demand.
> > > 
> > > What about trace_printk()? It doesn't give us the control we get from using
> > > tracepoints and it's not meant to be left sprinkled around in code.
> > > 
> > > Cc: David Airlie <airlied@gmail.com>
> > > Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> > > Cc: Pekka Paalanen <ppaalanen@gmail.com>
> > > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > > Cc: Thomas Zimmermann <tzimmermann@suse.de>
> > > Cc: Rob Clark <robdclark@gmail.com>
> > > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> > > Signed-off-by: Sean Paul <seanpaul@chromium.org>
> > > Link: https://patchwork.freedesktop.org/patch/msgid/20191010204823.195540-1-sean@poorly.run #v1
> > > 
> > > Changes in v2:
> > > - Went with a completely different approach: https://lists.freedesktop.org/archives/dri-devel/2019-November/243230.html
> > > 
> > > Changes in v3:
> > > - Changed commit message to be a bit less RFC-y
> > > - Make class_drm_category_log an actual trace class
> > > ---
> > > 
> > > Even though we don't want it to be, this is UAPI. So here's some userspace
> > > code which uses it:
> > > https://chromium-review.googlesource.com/c/chromiumos/platform2/+/1965562
> > > 
> > > 
> > >  drivers/gpu/drm/drm_print.c      | 143 ++++++++++++++++++++++++++-----
> > >  include/trace/events/drm_print.h | 116 +++++++++++++++++++++++++
> > >  2 files changed, 239 insertions(+), 20 deletions(-)
> > >  create mode 100644 include/trace/events/drm_print.h  
> > 
> > Hi,
> > 
> > reading the userspace patch is very enlightening, thanks.
> > 
> > It uses debugfs, and it uses the generic tracing UAPI. When all
> > distributions will enable this debug logging like you do in your
> > userspace patch (I really want that to be the end result, since
> > otherwise we are back to asking people to manually enable debug and then
> > reproduce the failure), does that scale?
> > 
> > What if V4L2 is the next one deciding they need a similar logging
> > framework to debug camera issues? If the trace log is already flooded
> > with DRM messages, it will be useless for them?
> > 
> > Or maybe someone else wants their piece and flood it even more
> > aggressively than DRM, making the DRM messages disappear before they
> > can be saved?
> > 
> > Is there a way to pull out messages
> > from /sys/kernel/debug/tracing/trace and filter them on reading instead
> > of on writing?  
> 
> Hi Pekka,
> Yep, there's also a trace_pipe output from tracefs. So you could pipe the output
> through a classifier in userspace and split off different subsystems.

Hi,

IOW, one needs to run a userspace daemon to read the pipe, filter it,
and keep a flight recorder buffer, instead of the kernel keeping it.
Right?

I suppose keeping the flight recorder in userspace makes it much more
flexible with things like dynamic sizing and remotes and whatnot. Is
this where you would like to aim for, for general purpose desktop
distributions to run one at all times?

> I think if this type of patch proliferates this could be a problem. In that case,
> we'd probably have to become better citizens and reclassify some of our log
> messages such that we're not needlessly spamming the trace buffer.

How will you do that in a way that people who have already taken to bug
reporting logging do not scream "UAPI broke, the logs are much less
useful now"? :-)

Although I might worry more about people screaming "my tracing broke"
if the buffers fill up with DRM chatter in the first place. After all,
the aim is to have this enabled in desktop distributions by default,
and if distributions get bug reports forcing them to disable it by
default, then we're back to square "please enable debugging and
reproduce the issue so we have something to look at" again.

FWIW, I'm very much in favour of your overall goal, but I'm worried it
might not be sustainable or practical in the long term outside of very
specialized distributions. Then again, I have no alternative
suggestions that would have the same level of code and UAPI re-use.

> I'm not too worried about this since we have a number of tools at our disposal.

What tools would those be?


Thanks,
pq
diff mbox series

Patch

diff --git a/drivers/gpu/drm/drm_print.c b/drivers/gpu/drm/drm_print.c
index 9a25d73c155c..f591292811aa 100644
--- a/drivers/gpu/drm/drm_print.c
+++ b/drivers/gpu/drm/drm_print.c
@@ -27,11 +27,15 @@ 
 
 #include <stdarg.h>
 
+#include <linux/bitops.h>
 #include <linux/io.h>
 #include <linux/moduleparam.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/drm_print.h>
+
 #include <drm/drm.h>
 #include <drm/drm_drv.h>
 #include <drm/drm_print.h>
@@ -241,10 +245,10 @@  void drm_dev_printk(const struct device *dev, const char *level,
 	struct va_format vaf;
 	va_list args;
 
-	va_start(args, format);
 	vaf.fmt = format;
 	vaf.va = &args;
 
+	va_start(args, format);
 	if (dev)
 		dev_printk(level, dev, "[" DRM_NAME ":%ps] %pV",
 			   __builtin_return_address(0), &vaf);
@@ -253,49 +257,145 @@  void drm_dev_printk(const struct device *dev, const char *level,
 		       level, __builtin_return_address(0), &vaf);
 
 	va_end(args);
+
+	va_start(args, format);
+	trace_drm_log(level, dev, &vaf);
+	va_end(args);
 }
 EXPORT_SYMBOL(drm_dev_printk);
 
+static unsigned int drm_trace_enabled(unsigned int category)
+{
+	unsigned int bit;
+
+	for_each_set_bit(bit, (unsigned long*)&category, sizeof(category) * 8) {
+		switch (BIT(bit)) {
+		case DRM_UT_NONE:
+			return trace_drm_dbg_none_enabled();
+		case DRM_UT_CORE:
+			return trace_drm_dbg_core_enabled();
+		case DRM_UT_DRIVER:
+			return trace_drm_dbg_driver_enabled();
+		case DRM_UT_KMS:
+			return trace_drm_dbg_kms_enabled();
+		case DRM_UT_PRIME:
+			return trace_drm_dbg_prime_enabled();
+		case DRM_UT_ATOMIC:
+			return trace_drm_dbg_atomic_enabled();
+		case DRM_UT_VBL:
+			return trace_drm_dbg_vbl_enabled();
+		case DRM_UT_STATE:
+			return trace_drm_dbg_state_enabled();
+		case DRM_UT_LEASE:
+			return trace_drm_dbg_lease_enabled();
+		case DRM_UT_DP:
+			return trace_drm_dbg_dp_enabled();
+		default:
+			return trace_drm_dbg_unknown_enabled();
+		}
+	}
+	return false;
+}
+
+static void drm_do_trace(const struct device *dev, unsigned int category,
+			 struct va_format *vaf)
+{
+	WARN_ON(hweight32(category) > 1);
+
+	switch (category) {
+	case DRM_UT_NONE:
+		trace_drm_dbg_none(dev, vaf);
+		break;
+	case DRM_UT_CORE:
+		trace_drm_dbg_core(dev, vaf);
+		break;
+	case DRM_UT_DRIVER:
+		trace_drm_dbg_driver(dev, vaf);
+		break;
+	case DRM_UT_KMS:
+		trace_drm_dbg_kms(dev, vaf);
+		break;
+	case DRM_UT_PRIME:
+		trace_drm_dbg_prime(dev, vaf);
+		break;
+	case DRM_UT_ATOMIC:
+		trace_drm_dbg_atomic(dev, vaf);
+		break;
+	case DRM_UT_VBL:
+		trace_drm_dbg_vbl(dev, vaf);
+		break;
+	case DRM_UT_STATE:
+		trace_drm_dbg_state(dev, vaf);
+		break;
+	case DRM_UT_LEASE:
+		trace_drm_dbg_lease(dev, vaf);
+		break;
+	case DRM_UT_DP:
+		trace_drm_dbg_dp(dev, vaf);
+		break;
+	default:
+		trace_drm_dbg_unknown(dev, vaf);
+		break;
+	}
+}
+
 void drm_dev_dbg(const struct device *dev, unsigned int category,
 		 const char *format, ...)
 {
 	struct va_format vaf;
+	unsigned int bit;
 	va_list args;
 
-	if (!drm_debug_enabled(category))
-		return;
-
-	va_start(args, format);
 	vaf.fmt = format;
 	vaf.va = &args;
 
-	if (dev)
-		dev_printk(KERN_DEBUG, dev, "[" DRM_NAME ":%ps] %pV",
-			   __builtin_return_address(0), &vaf);
-	else
-		printk(KERN_DEBUG "[" DRM_NAME ":%ps] %pV",
-		       __builtin_return_address(0), &vaf);
+	if (drm_debug_enabled(category)) {
+		va_start(args, format);
+		if (dev)
+			dev_printk(KERN_DEBUG, dev, "[" DRM_NAME ":%ps] %pV",
+				   __builtin_return_address(0), &vaf);
+		else
+			printk(KERN_DEBUG "[" DRM_NAME ":%ps] %pV",
+			       __builtin_return_address(0), &vaf);
+		va_end(args);
+	}
+
+	if (!drm_trace_enabled(category))
+		return;
+
+	for_each_set_bit(bit, (unsigned long*)&category, sizeof(category) * 8) {
+		va_start(args, format);
+		drm_do_trace(dev, BIT(bit), &vaf);
+		va_end(args);
+	}
 
-	va_end(args);
 }
 EXPORT_SYMBOL(drm_dev_dbg);
 
 void drm_dbg(unsigned int category, const char *format, ...)
 {
 	struct va_format vaf;
+	unsigned int bit;
 	va_list args;
 
-	if (!drm_debug_enabled(category))
-		return;
-
-	va_start(args, format);
 	vaf.fmt = format;
 	vaf.va = &args;
 
-	printk(KERN_DEBUG "[" DRM_NAME ":%ps] %pV",
-	       __builtin_return_address(0), &vaf);
+	if (drm_debug_enabled(category)) {
+		va_start(args, format);
+		printk(KERN_DEBUG "[" DRM_NAME ":%ps] %pV",
+		       __builtin_return_address(0), &vaf);
+		va_end(args);
+	}
 
-	va_end(args);
+	if (!drm_trace_enabled(category))
+		return;
+
+	for_each_set_bit(bit, (unsigned long*)&category, sizeof(category) * 8) {
+		va_start(args, format);
+		drm_do_trace(NULL, BIT(bit), &vaf);
+		va_end(args);
+	}
 }
 EXPORT_SYMBOL(drm_dbg);
 
@@ -304,13 +404,16 @@  void drm_err(const char *format, ...)
 	struct va_format vaf;
 	va_list args;
 
-	va_start(args, format);
 	vaf.fmt = format;
 	vaf.va = &args;
 
+	va_start(args, format);
 	printk(KERN_ERR "[" DRM_NAME ":%ps] *ERROR* %pV",
 	       __builtin_return_address(0), &vaf);
+	va_end(args);
 
+	va_start(args, format);
+	trace_drm_err(NULL, &vaf);
 	va_end(args);
 }
 EXPORT_SYMBOL(drm_err);
diff --git a/include/trace/events/drm_print.h b/include/trace/events/drm_print.h
new file mode 100644
index 000000000000..862728fe0f89
--- /dev/null
+++ b/include/trace/events/drm_print.h
@@ -0,0 +1,116 @@ 
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright (C) 2019 Google, Inc.
+ *
+ * Authors:
+ * Sean Paul <seanpaul@chromium.org>
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM drm_print
+
+#if !defined(_TRACE_DRM_PRINT_H) || defined(TRACE_HEADER_MULTI_READ)
+
+#include <linux/device.h>
+#include <linux/tracepoint.h>
+
+#define DRM_PRINT_MAX 256
+
+#define _TRACE_DRM_PRINT_H
+
+TRACE_EVENT(drm_log,
+	TP_PROTO(const char * level, const struct device *dev,
+		 struct va_format *vaf),
+	TP_ARGS(level, dev, vaf),
+	TP_STRUCT__entry(
+		__field(const char *,		level			)
+		__field(const struct device *,	dev			)
+		__dynamic_array(char,		msg,	DRM_PRINT_MAX	)
+		__field(int,			len			)
+	),
+	TP_fast_assign(
+		__entry->level = level;
+		__entry->dev = dev;
+		__entry->len = vsnprintf(__get_str(msg), DRM_PRINT_MAX,
+					 vaf->fmt, *vaf->va);
+		if (__entry->len > 0 &&
+		    __get_str(msg)[__entry->len - 1] == '\n')
+			__get_str(msg)[--__entry->len] = '\0';
+	),
+	TP_printk("%s %s %s: %s", __entry->level,
+		  __entry->dev ? dev_driver_string(__entry->dev) : "(NULL):",
+		  __entry->dev ? dev_name(__entry->dev) : "", __get_str(msg))
+);
+
+DECLARE_EVENT_CLASS(class_drm_category_log,
+	TP_PROTO(const struct device *dev, struct va_format *vaf),
+	TP_ARGS(dev, vaf),
+	TP_STRUCT__entry(
+		__field(const struct device *,	dev			)
+		__dynamic_array(char,		msg,	DRM_PRINT_MAX	)
+		__field(int,			len			)
+	),
+	TP_fast_assign(
+		__entry->dev = dev;
+		__entry->len = vsnprintf(__get_str(msg), DRM_PRINT_MAX,
+					 vaf->fmt, *vaf->va);
+		if (__entry->len > 0 &&
+		    __get_str(msg)[__entry->len - 1] == '\n')
+			__get_str(msg)[--__entry->len] = '\0';
+	),
+	TP_printk("%s %s%s%s",
+		  __entry->dev ? dev_driver_string(__entry->dev) : "",
+		  __entry->dev ? dev_name(__entry->dev) : "",
+		  __entry->dev ? ": " : "", __get_str(msg))
+);
+DEFINE_EVENT(class_drm_category_log, drm_err,
+	TP_PROTO(const struct device *dev, struct va_format *vaf),
+	TP_ARGS(dev, vaf)
+);
+DEFINE_EVENT(class_drm_category_log, drm_dbg_none,
+	TP_PROTO(const struct device *dev, struct va_format *vaf),
+	TP_ARGS(dev, vaf)
+);
+DEFINE_EVENT(class_drm_category_log, drm_dbg_core,
+	TP_PROTO(const struct device *dev, struct va_format *vaf),
+	TP_ARGS(dev, vaf)
+);
+DEFINE_EVENT(class_drm_category_log, drm_dbg_driver,
+	TP_PROTO(const struct device *dev, struct va_format *vaf),
+	TP_ARGS(dev, vaf)
+);
+DEFINE_EVENT(class_drm_category_log, drm_dbg_kms,
+	TP_PROTO(const struct device *dev, struct va_format *vaf),
+	TP_ARGS(dev, vaf)
+);
+DEFINE_EVENT(class_drm_category_log, drm_dbg_prime,
+	TP_PROTO(const struct device *dev, struct va_format *vaf),
+	TP_ARGS(dev, vaf)
+);
+DEFINE_EVENT(class_drm_category_log, drm_dbg_atomic,
+	TP_PROTO(const struct device *dev, struct va_format *vaf),
+	TP_ARGS(dev, vaf)
+);
+DEFINE_EVENT(class_drm_category_log, drm_dbg_vbl,
+	TP_PROTO(const struct device *dev, struct va_format *vaf),
+	TP_ARGS(dev, vaf)
+);
+DEFINE_EVENT(class_drm_category_log, drm_dbg_state,
+	TP_PROTO(const struct device *dev, struct va_format *vaf),
+	TP_ARGS(dev, vaf)
+);
+DEFINE_EVENT(class_drm_category_log, drm_dbg_lease,
+	TP_PROTO(const struct device *dev, struct va_format *vaf),
+	TP_ARGS(dev, vaf)
+);
+DEFINE_EVENT(class_drm_category_log, drm_dbg_dp,
+	TP_PROTO(const struct device *dev, struct va_format *vaf),
+	TP_ARGS(dev, vaf)
+);
+DEFINE_EVENT(class_drm_category_log, drm_dbg_unknown,
+	TP_PROTO(const struct device *dev, struct va_format *vaf),
+	TP_ARGS(dev, vaf)
+);
+#endif
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>