diff mbox

[08/15] xen: trace RCU behavior

Message ID 149633847165.12814.17021925979608518857.stgit@Solace.fritz.box (mailing list archive)
State New, archived
Headers show

Commit Message

Dario Faggioli June 1, 2017, 5:34 p.m. UTC
Making it possible generate events showing the
activity and the behavior of the RCU subsystem.

Gate this with its specific Kconfig option (under
CONFIG_TRACING), and keep it in disabled state by
default.
---
Cc: George Dunlap <George.Dunlap@eu.citrix.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Tim Deegan <tim@xen.org>
---
 xen/Kconfig.debug          |    8 ++++
 xen/common/rcupdate.c      |   82 ++++++++++++++++++++++++++++++++++++++++----
 xen/include/public/trace.h |   14 ++++++++
 3 files changed, 97 insertions(+), 7 deletions(-)

Comments

Jan Beulich June 9, 2017, 10:48 a.m. UTC | #1
>>> On 01.06.17 at 19:34, <dario.faggioli@citrix.com> wrote:
> --- a/xen/common/rcupdate.c
> +++ b/xen/common/rcupdate.c
> @@ -92,6 +92,57 @@ static int qhimark = 10000;
>  static int qlowmark = 100;
>  static int rsinterval = 1000;
>  
> +#ifdef CONFIG_TRACE_RCU
> +static inline void trace_call_rcu(void *func)
> +{
> +    uint64_t addr = (uint64_t)func;

I've probably overlooked something similar in patch 6 - why uint64_t
rather than unsigned long? Does this even build without warning for
ARM32?

> +    if ( likely(!tb_init_done) )
> +        return;
> +
> +    __trace_var(TRC_XEN_RCU_CALL_RCU, 0, sizeof(addr), &addr);
> +}
> +static inline void trace_start_batch(const cpumask_t *m)

Blank lines between functions please.

> +{
> +    uint32_t mask[6];
> +
> +    if ( likely(!tb_init_done) )
> +        return;
> +
> +    memset(mask, 0, sizeof(mask));

Perhaps better use {} as initializer?

> +    memcpy(mask, m, min(sizeof(mask), sizeof(cpumask_t)));

sizeof(*m)

> +#define trace_force_qstate()    TRACE_0D(TRC_XEN_RCU_FORCE_QSTATE)
> +#define trace_cpu_quiet()       TRACE_0D(TRC_XEN_RCU_CPU_QUIET)
> +#define trace_check_qstate(p)   TRACE_1D(TRC_XEN_RCU_CHECK_QSTATE, p)
> +#define trace_do_callbacks()    TRACE_0D(TRC_XEN_RCU_DO_CALLBKS)
> +#define trace_pending(p)        TRACE_1D(TRC_XEN_RCU_PENDING, p)
> +#else /* !TRACE_RCU */
> +#define trace_call_rcu(f)       do {} while ( 0 )
> +#define trace_start_batch(m)    do {} while ( 0 )
> +#define trace_do_batch(f, q)    do {} while ( 0 )
> +#define trace_force_qstate()    do {} while ( 0 )
> +#define trace_cpu_quiet()       do {} while ( 0 )
> +#define trace_check_qstate(p)   do {} while ( 0 )
> +#define trace_do_callbacks()    do {} while ( 0 )
> +#define trace_pending(p)        do {} while ( 0 )

Here and elsewhere please make sure you evaluate macro
arguments consistently (i.e. exactly once regardless of tracing
being enabled or disabled).

Jan
George Dunlap June 13, 2017, 4:05 p.m. UTC | #2
On 01/06/17 18:34, Dario Faggioli wrote:
> Making it possible generate events showing the
> activity and the behavior of the RCU subsystem.
> 
> Gate this with its specific Kconfig option (under
> CONFIG_TRACING), and keep it in disabled state by
> default.
> ---
> Cc: George Dunlap <George.Dunlap@eu.citrix.com>
> Cc: Andrew Cooper <andrew.cooper3@citrix.com>
> Cc: Jan Beulich <jbeulich@suse.com>
> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> Cc: Stefano Stabellini <sstabellini@kernel.org>
> Cc: Tim Deegan <tim@xen.org>
> ---
>  xen/Kconfig.debug          |    8 ++++
>  xen/common/rcupdate.c      |   82 ++++++++++++++++++++++++++++++++++++++++----
>  xen/include/public/trace.h |   14 ++++++++
>  3 files changed, 97 insertions(+), 7 deletions(-)
> 
> diff --git a/xen/Kconfig.debug b/xen/Kconfig.debug
> index 81910c9..fd5cccc 100644
> --- a/xen/Kconfig.debug
> +++ b/xen/Kconfig.debug
> @@ -116,6 +116,14 @@ config TRACE_IRQSOFF
>            Note that this comes with high overead and produces huge amount of
>            tracing data.
>  
> +config TRACE_RCU
> +       bool "Trace RCU behavior" if EXPERT = "y"
> +       default n
> +       depends on TRACING
> +       ---help---
> +         Makes it possible generate events  showing the activity and the
> +         behavior of the RCU subsystem.
> +
>  config VERBOSE_DEBUG
>  	bool "Verbose debug messages"
>  	default DEBUG
> diff --git a/xen/common/rcupdate.c b/xen/common/rcupdate.c
> index 8cc5a82..f160582 100644
> --- a/xen/common/rcupdate.c
> +++ b/xen/common/rcupdate.c
> @@ -92,6 +92,57 @@ static int qhimark = 10000;
>  static int qlowmark = 100;
>  static int rsinterval = 1000;
>  
> +#ifdef CONFIG_TRACE_RCU
> +static inline void trace_call_rcu(void *func)
> +{
> +    uint64_t addr = (uint64_t)func;
> +
> +    if ( likely(!tb_init_done) )
> +        return;
> +
> +    __trace_var(TRC_XEN_RCU_CALL_RCU, 0, sizeof(addr), &addr);
> +}
> +static inline void trace_start_batch(const cpumask_t *m)
> +{
> +    uint32_t mask[6];
> +
> +    if ( likely(!tb_init_done) )
> +        return;
> +
> +    memset(mask, 0, sizeof(mask));
> +    memcpy(mask, m, min(sizeof(mask), sizeof(cpumask_t)));
> +    __trace_var(TRC_XEN_RCU_START_BATCH, 0, sizeof(mask), &mask);

Another possibility here would be to say something like:

size = min(sizeof(mask), sizeof(cpumask_t))
memcpy(mask, m, size)
__trace_var(..., 0, size, &mask)

That would result in a more efficient log on systems which, say, had
defied NR_CPUS to something relatively small for efficiency reasons; and
you wouldn't need to clear the unused bits.

Everything else looks good (with Jan's comments).

 -George
diff mbox

Patch

diff --git a/xen/Kconfig.debug b/xen/Kconfig.debug
index 81910c9..fd5cccc 100644
--- a/xen/Kconfig.debug
+++ b/xen/Kconfig.debug
@@ -116,6 +116,14 @@  config TRACE_IRQSOFF
           Note that this comes with high overead and produces huge amount of
           tracing data.
 
+config TRACE_RCU
+       bool "Trace RCU behavior" if EXPERT = "y"
+       default n
+       depends on TRACING
+       ---help---
+         Makes it possible generate events  showing the activity and the
+         behavior of the RCU subsystem.
+
 config VERBOSE_DEBUG
 	bool "Verbose debug messages"
 	default DEBUG
diff --git a/xen/common/rcupdate.c b/xen/common/rcupdate.c
index 8cc5a82..f160582 100644
--- a/xen/common/rcupdate.c
+++ b/xen/common/rcupdate.c
@@ -92,6 +92,57 @@  static int qhimark = 10000;
 static int qlowmark = 100;
 static int rsinterval = 1000;
 
+#ifdef CONFIG_TRACE_RCU
+static inline void trace_call_rcu(void *func)
+{
+    uint64_t addr = (uint64_t)func;
+
+    if ( likely(!tb_init_done) )
+        return;
+
+    __trace_var(TRC_XEN_RCU_CALL_RCU, 0, sizeof(addr), &addr);
+}
+static inline void trace_start_batch(const cpumask_t *m)
+{
+    uint32_t mask[6];
+
+    if ( likely(!tb_init_done) )
+        return;
+
+    memset(mask, 0, sizeof(mask));
+    memcpy(mask, m, min(sizeof(mask), sizeof(cpumask_t)));
+    __trace_var(TRC_XEN_RCU_START_BATCH, 0, sizeof(mask), &mask);
+}
+static inline void trace_do_batch(void *func, long int qlen)
+{
+    struct {
+        uint64_t addr;
+        int64_t qlen;
+    } d;
+
+    if ( likely(!tb_init_done) )
+        return;
+
+    d.addr = (uint64_t)func;
+    d.qlen = qlen;
+    __trace_var(TRC_XEN_RCU_DO_BATCH, 0, sizeof(d), &d);
+}
+#define trace_force_qstate()    TRACE_0D(TRC_XEN_RCU_FORCE_QSTATE)
+#define trace_cpu_quiet()       TRACE_0D(TRC_XEN_RCU_CPU_QUIET)
+#define trace_check_qstate(p)   TRACE_1D(TRC_XEN_RCU_CHECK_QSTATE, p)
+#define trace_do_callbacks()    TRACE_0D(TRC_XEN_RCU_DO_CALLBKS)
+#define trace_pending(p)        TRACE_1D(TRC_XEN_RCU_PENDING, p)
+#else /* !TRACE_RCU */
+#define trace_call_rcu(f)       do {} while ( 0 )
+#define trace_start_batch(m)    do {} while ( 0 )
+#define trace_do_batch(f, q)    do {} while ( 0 )
+#define trace_force_qstate()    do {} while ( 0 )
+#define trace_cpu_quiet()       do {} while ( 0 )
+#define trace_check_qstate(p)   do {} while ( 0 )
+#define trace_do_callbacks()    do {} while ( 0 )
+#define trace_pending(p)        do {} while ( 0 )
+#endif /* TRACE_RCU */
+
 struct rcu_barrier_data {
     struct rcu_head head;
     atomic_t *cpu_count;
@@ -145,6 +196,9 @@  static void force_quiescent_state(struct rcu_data *rdp,
                                   struct rcu_ctrlblk *rcp)
 {
     cpumask_t cpumask;
+
+    trace_force_qstate();
+
     raise_softirq(SCHEDULE_SOFTIRQ);
     if (unlikely(rdp->qlen - rdp->last_rs_qlen > rsinterval)) {
         rdp->last_rs_qlen = rdp->qlen;
@@ -177,6 +231,7 @@  void call_rcu(struct rcu_head *head,
     head->func = func;
     head->next = NULL;
     local_irq_save(flags);
+    trace_call_rcu(func);
     rdp = &__get_cpu_var(rcu_data);
     *rdp->nxttail = head;
     rdp->nxttail = &head->next;
@@ -199,12 +254,14 @@  static void rcu_do_batch(struct rcu_data *rdp)
     list = rdp->donelist;
     while (list) {
         next = rdp->donelist = list->next;
+        trace_do_batch(list->func, rdp->qlen);
         list->func(list);
         list = next;
         rdp->qlen--;
         if (++count >= rdp->blimit)
             break;
     }
+
     if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark)
         rdp->blimit = blimit;
     if (!rdp->donelist)
@@ -249,6 +306,7 @@  static void rcu_start_batch(struct rcu_ctrlblk *rcp)
         rcp->cur++;
 
         cpumask_copy(&rcp->cpumask, &cpu_online_map);
+        trace_start_batch(&rcp->cpumask);
     }
 }
 
@@ -259,6 +317,7 @@  static void rcu_start_batch(struct rcu_ctrlblk *rcp)
  */
 static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
 {
+    trace_cpu_quiet();
     cpumask_clear_cpu(cpu, &rcp->cpumask);
     if (cpumask_empty(&rcp->cpumask)) {
         /* batch completed ! */
@@ -279,7 +338,7 @@  static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
         /* start new grace period: */
         rdp->qs_pending = 1;
         rdp->quiescbatch = rcp->cur;
-        return;
+        goto out;
     }
 
     /* Grace period already completed for this cpu?
@@ -287,7 +346,7 @@  static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
      * cacheline trashing.
      */
     if (!rdp->qs_pending)
-        return;
+        goto out;
 
     rdp->qs_pending = 0;
 
@@ -300,6 +359,8 @@  static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
         cpu_quiet(rdp->cpu, rcp);
 
     spin_unlock(&rcp->lock);
+ out:
+    trace_check_qstate(rdp->qs_pending);
 }
 
 
@@ -309,6 +370,8 @@  static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
 static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
                                     struct rcu_data *rdp)
 {
+    trace_do_callbacks();
+
     if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) {
         *rdp->donetail = rdp->curlist;
         rdp->donetail = rdp->curtail;
@@ -357,26 +420,31 @@  static void rcu_process_callbacks(void)
 
 static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
 {
+    bool ret = true;
+
     /* This cpu has pending rcu entries and the grace period
      * for them has completed.
      */
     if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch))
-        return 1;
+        goto out;
 
     /* This cpu has no pending entries, but there are new entries */
     if (!rdp->curlist && rdp->nxtlist)
-        return 1;
+        goto out;
 
     /* This cpu has finished callbacks to invoke */
     if (rdp->donelist)
-        return 1;
+        goto out;
 
     /* The rcu core waits for a quiescent state from the cpu */
     if (rdp->quiescbatch != rcp->cur || rdp->qs_pending)
-        return 1;
+        goto out;
 
     /* nothing to do */
-    return 0;
+    ret = false;
+ out:
+    trace_pending(ret);
+    return ret;
 }
 
 int rcu_pending(int cpu)
diff --git a/xen/include/public/trace.h b/xen/include/public/trace.h
index 1692a79..50b4fcc 100644
--- a/xen/include/public/trace.h
+++ b/xen/include/public/trace.h
@@ -39,6 +39,7 @@ 
 #define TRC_PV       0x0020f000    /* Xen PV traces            */
 #define TRC_SHADOW   0x0040f000    /* Xen shadow tracing       */
 #define TRC_HW       0x0080f000    /* Xen hardware-related traces */
+#define TRC_XEN      0x0100f000    /* Xen misc traces (RCU, softirq, etc) */
 #define TRC_GUEST    0x0800f000    /* Guest-generated traces   */
 #define TRC_ALL      0x0ffff000
 #define TRC_HD_TO_EVENT(x) ((x)&0x0fffffff)
@@ -92,6 +93,9 @@ 
 #define TRC_HW_PM           0x00801000   /* Power management traces */
 #define TRC_HW_IRQ          0x00802000   /* Traces relating to the handling of IRQs */
 
+/* Trace subclasses for Xen internals */
+#define TRC_XEN_RCU         0x01001000   /* RCU traces */
+
 /* Trace events per class */
 #define TRC_LOST_RECORDS        (TRC_GEN + 1)
 #define TRC_TRACE_WRAP_BUFFER  (TRC_GEN + 2)
@@ -278,6 +282,16 @@ 
 #define TRC_HW_IRQ_DISABLE            (TRC_HW_IRQ + 0xD)
 #define TRC_HW_IRQ_ENABLE             (TRC_HW_IRQ + 0xE)
 
+/* Trace events for RCU */
+#define TRC_XEN_RCU_FORCE_QSTATE      (TRC_XEN_RCU + 0x1)
+#define TRC_XEN_RCU_CALL_RCU          (TRC_XEN_RCU + 0x2)
+#define TRC_XEN_RCU_START_BATCH       (TRC_XEN_RCU + 0x3)
+#define TRC_XEN_RCU_DO_BATCH          (TRC_XEN_RCU + 0x4)
+#define TRC_XEN_RCU_CPU_QUIET         (TRC_XEN_RCU + 0x5)
+#define TRC_XEN_RCU_CHECK_QSTATE      (TRC_XEN_RCU + 0x6)
+#define TRC_XEN_RCU_DO_CALLBKS        (TRC_XEN_RCU + 0x7)
+#define TRC_XEN_RCU_PENDING           (TRC_XEN_RCU + 0x8)
+
 /*
  * Event Flags
  *