diff mbox series

[v6,06/11] x86/hvm: processor trace interface in HVM

Message ID 1916e06793ffaaa70c471bcd6bcf168597793bd5.1594150543.git.michal.leszczynski@cert.pl (mailing list archive)
State New, archived
Headers show
Series Implement support for external IPT monitoring | expand

Commit Message

Michał Leszczyński July 7, 2020, 7:39 p.m. UTC
From: Michal Leszczynski <michal.leszczynski@cert.pl>

Implement necessary changes in common code/HVM to support
processor trace features. Define vmtrace_pt_* API and
implement trace buffer allocation/deallocation in common
code.

Signed-off-by: Michal Leszczynski <michal.leszczynski@cert.pl>
---
 xen/arch/x86/domain.c         | 21 +++++++++++++++++++++
 xen/common/domain.c           | 35 +++++++++++++++++++++++++++++++++++
 xen/include/asm-x86/hvm/hvm.h | 20 ++++++++++++++++++++
 xen/include/xen/sched.h       |  4 ++++
 4 files changed, 80 insertions(+)

Comments

Roger Pau Monne July 15, 2020, 3:43 p.m. UTC | #1
On Tue, Jul 07, 2020 at 09:39:45PM +0200, Michał Leszczyński wrote:
> From: Michal Leszczynski <michal.leszczynski@cert.pl>
> 
> Implement necessary changes in common code/HVM to support
> processor trace features. Define vmtrace_pt_* API and
> implement trace buffer allocation/deallocation in common
> code.
> 
> Signed-off-by: Michal Leszczynski <michal.leszczynski@cert.pl>
> ---
>  xen/arch/x86/domain.c         | 21 +++++++++++++++++++++
>  xen/common/domain.c           | 35 +++++++++++++++++++++++++++++++++++
>  xen/include/asm-x86/hvm/hvm.h | 20 ++++++++++++++++++++
>  xen/include/xen/sched.h       |  4 ++++
>  4 files changed, 80 insertions(+)
> 
> diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
> index b75017b28b..8ce2ab6b8f 100644
> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -2205,6 +2205,27 @@ int domain_relinquish_resources(struct domain *d)
>                  altp2m_vcpu_disable_ve(v);
>          }
>  
> +        for_each_vcpu ( d, v )
> +        {
> +            unsigned int i;
> +            uint64_t nr_pages = v->domain->processor_trace_buf_kb * KB(1);
> +            nr_pages >>= PAGE_SHIFT;

It would be easier as:

unsigned int nr_pages = d->processor_trace_buf_kb / KB(4);

Or maybe:

unsigned int nr_pages = d->processor_trace_buf_kb >> (PAGE_SHIFT - 10);

> +
> +            if ( !v->vmtrace.pt_buf )
> +                continue;
> +
> +            for ( i = 0; i < nr_pages; i++ )
> +            {
> +                struct page_info *pg = mfn_to_page(
> +                    mfn_add(page_to_mfn(v->vmtrace.pt_buf), i));

You can just do:

struct page_info *pg = v->vmtrace.pt_buf[i];

> +
> +                put_page_alloc_ref(pg);
> +                put_page_and_type(pg);
> +            }
> +
> +            v->vmtrace.pt_buf = NULL;
> +        }
> +
>          if ( is_pv_domain(d) )
>          {
>              for_each_vcpu ( d, v )
> diff --git a/xen/common/domain.c b/xen/common/domain.c
> index e6e8f88da1..193099a2ab 100644
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -137,6 +137,38 @@ static void vcpu_destroy(struct vcpu *v)
>      free_vcpu_struct(v);
>  }
>  
> +static int vmtrace_alloc_buffers(struct vcpu *v)
> +{
> +    unsigned int i;
> +    struct page_info *pg;
> +    uint64_t size = v->domain->processor_trace_buf_kb * KB(1);

Same here, you could just use a number of pages directly and turn size
into 'unsigned int nr_pages', and then use get_order_from_pages
below.

> +
> +    pg = alloc_domheap_pages(v->domain, get_order_from_bytes(size),
> +                             MEMF_no_refcount);
> +

Extra newline.

> +    if ( !pg )
> +        return -ENOMEM;
> +
> +    for ( i = 0; i < (size >> PAGE_SHIFT); i++ )
> +    {
> +        struct page_info *pg_iter = mfn_to_page(
> +            mfn_add(page_to_mfn(pg), i));

Same as above here, just use pg[i],

> +
> +        if ( !get_page_and_type(pg_iter, v->domain, PGT_writable_page) )
> +        {
> +            /*
> +             * The domain can't possibly know about this page yet, so failure
> +             * here is a clear indication of something fishy going on.
> +             */
> +            domain_crash(v->domain);
> +            return -ENODATA;

ENODATA is IMO a weird return code, ENOMEM would likely be better.

What about the pg array of pages, don't you need to free it somehow?
(and likely drop the references to pages before pg[i] on the array)

> +        }
> +    }

Also you seem to assume that size is a power of 2, but I think that's
only guaranteed by the current Intel implementation, and hence other
implementations could have a more lax requirement (or even Intel when
using TOPA).

So you need to free the remaining pages if
(1 << get_order_from_pages(nr_pages)) != nr_pages.

> +
> +    v->vmtrace.pt_buf = pg;
> +    return 0;
> +}
> +
>  struct vcpu *vcpu_create(struct domain *d, unsigned int vcpu_id)
>  {
>      struct vcpu *v;
> @@ -162,6 +194,9 @@ struct vcpu *vcpu_create(struct domain *d, unsigned int vcpu_id)
>      v->vcpu_id = vcpu_id;
>      v->dirty_cpu = VCPU_CPU_CLEAN;
>  
> +    if ( d->processor_trace_buf_kb && vmtrace_alloc_buffers(v) != 0 )
> +        return NULL;

Don't you need to do some cleanup here in case of failure? AFAICT this
seems to leak the allocated v at least.

>      spin_lock_init(&v->virq_lock);
>  
>      tasklet_init(&v->continue_hypercall_tasklet, NULL, NULL);
> diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
> index 1eb377dd82..476a216205 100644
> --- a/xen/include/asm-x86/hvm/hvm.h
> +++ b/xen/include/asm-x86/hvm/hvm.h
> @@ -214,6 +214,10 @@ struct hvm_function_table {
>      bool_t (*altp2m_vcpu_emulate_ve)(struct vcpu *v);
>      int (*altp2m_vcpu_emulate_vmfunc)(const struct cpu_user_regs *regs);
>  
> +    /* vmtrace */
> +    int (*vmtrace_control_pt)(struct vcpu *v, bool enable);
> +    int (*vmtrace_get_pt_offset)(struct vcpu *v, uint64_t *offset, uint64_t *size);
> +
>      /*
>       * Parameters and callbacks for hardware-assisted TSC scaling,
>       * which are valid only when the hardware feature is available.
> @@ -655,6 +659,22 @@ static inline bool altp2m_vcpu_emulate_ve(struct vcpu *v)
>      return false;
>  }
>  
> +static inline int vmtrace_control_pt(struct vcpu *v, bool enable)
> +{
> +    if ( hvm_funcs.vmtrace_control_pt )
> +        return hvm_funcs.vmtrace_control_pt(v, enable);
> +
> +    return -EOPNOTSUPP;
> +}
> +
> +static inline int vmtrace_get_pt_offset(struct vcpu *v, uint64_t *offset, uint64_t *size)
> +{
> +    if ( hvm_funcs.vmtrace_get_pt_offset )
> +        return hvm_funcs.vmtrace_get_pt_offset(v, offset, size);
> +
> +    return -EOPNOTSUPP;
> +}

I think this API would be better placed together with the VMX
implementation of those functions, introducing it in this patch is
not required since there are no callers?

Thanks.
Jan Beulich Aug. 7, 2020, 2:37 p.m. UTC | #2
On 07.07.2020 21:39, Michał Leszczyński wrote:
> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -2205,6 +2205,27 @@ int domain_relinquish_resources(struct domain *d)
>                  altp2m_vcpu_disable_ve(v);
>          }
>  
> +        for_each_vcpu ( d, v )
> +        {
> +            unsigned int i;
> +            uint64_t nr_pages = v->domain->processor_trace_buf_kb * KB(1);
> +            nr_pages >>= PAGE_SHIFT;
> +
> +            if ( !v->vmtrace.pt_buf )
> +                continue;
> +
> +            for ( i = 0; i < nr_pages; i++ )
> +            {
> +                struct page_info *pg = mfn_to_page(
> +                    mfn_add(page_to_mfn(v->vmtrace.pt_buf), i));
> +
> +                put_page_alloc_ref(pg);
> +                put_page_and_type(pg);
> +            }
> +
> +            v->vmtrace.pt_buf = NULL;
> +        }

This needs to allow for preemption. Also this isn't x86-specific,
so should be implemented in common code (just like allocation is).

Jan
diff mbox series

Patch

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index b75017b28b..8ce2ab6b8f 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -2205,6 +2205,27 @@  int domain_relinquish_resources(struct domain *d)
                 altp2m_vcpu_disable_ve(v);
         }
 
+        for_each_vcpu ( d, v )
+        {
+            unsigned int i;
+            uint64_t nr_pages = v->domain->processor_trace_buf_kb * KB(1);
+            nr_pages >>= PAGE_SHIFT;
+
+            if ( !v->vmtrace.pt_buf )
+                continue;
+
+            for ( i = 0; i < nr_pages; i++ )
+            {
+                struct page_info *pg = mfn_to_page(
+                    mfn_add(page_to_mfn(v->vmtrace.pt_buf), i));
+
+                put_page_alloc_ref(pg);
+                put_page_and_type(pg);
+            }
+
+            v->vmtrace.pt_buf = NULL;
+        }
+
         if ( is_pv_domain(d) )
         {
             for_each_vcpu ( d, v )
diff --git a/xen/common/domain.c b/xen/common/domain.c
index e6e8f88da1..193099a2ab 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -137,6 +137,38 @@  static void vcpu_destroy(struct vcpu *v)
     free_vcpu_struct(v);
 }
 
+static int vmtrace_alloc_buffers(struct vcpu *v)
+{
+    unsigned int i;
+    struct page_info *pg;
+    uint64_t size = v->domain->processor_trace_buf_kb * KB(1);
+
+    pg = alloc_domheap_pages(v->domain, get_order_from_bytes(size),
+                             MEMF_no_refcount);
+
+    if ( !pg )
+        return -ENOMEM;
+
+    for ( i = 0; i < (size >> PAGE_SHIFT); i++ )
+    {
+        struct page_info *pg_iter = mfn_to_page(
+            mfn_add(page_to_mfn(pg), i));
+
+        if ( !get_page_and_type(pg_iter, v->domain, PGT_writable_page) )
+        {
+            /*
+             * The domain can't possibly know about this page yet, so failure
+             * here is a clear indication of something fishy going on.
+             */
+            domain_crash(v->domain);
+            return -ENODATA;
+        }
+    }
+
+    v->vmtrace.pt_buf = pg;
+    return 0;
+}
+
 struct vcpu *vcpu_create(struct domain *d, unsigned int vcpu_id)
 {
     struct vcpu *v;
@@ -162,6 +194,9 @@  struct vcpu *vcpu_create(struct domain *d, unsigned int vcpu_id)
     v->vcpu_id = vcpu_id;
     v->dirty_cpu = VCPU_CPU_CLEAN;
 
+    if ( d->processor_trace_buf_kb && vmtrace_alloc_buffers(v) != 0 )
+        return NULL;
+
     spin_lock_init(&v->virq_lock);
 
     tasklet_init(&v->continue_hypercall_tasklet, NULL, NULL);
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 1eb377dd82..476a216205 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -214,6 +214,10 @@  struct hvm_function_table {
     bool_t (*altp2m_vcpu_emulate_ve)(struct vcpu *v);
     int (*altp2m_vcpu_emulate_vmfunc)(const struct cpu_user_regs *regs);
 
+    /* vmtrace */
+    int (*vmtrace_control_pt)(struct vcpu *v, bool enable);
+    int (*vmtrace_get_pt_offset)(struct vcpu *v, uint64_t *offset, uint64_t *size);
+
     /*
      * Parameters and callbacks for hardware-assisted TSC scaling,
      * which are valid only when the hardware feature is available.
@@ -655,6 +659,22 @@  static inline bool altp2m_vcpu_emulate_ve(struct vcpu *v)
     return false;
 }
 
+static inline int vmtrace_control_pt(struct vcpu *v, bool enable)
+{
+    if ( hvm_funcs.vmtrace_control_pt )
+        return hvm_funcs.vmtrace_control_pt(v, enable);
+
+    return -EOPNOTSUPP;
+}
+
+static inline int vmtrace_get_pt_offset(struct vcpu *v, uint64_t *offset, uint64_t *size)
+{
+    if ( hvm_funcs.vmtrace_get_pt_offset )
+        return hvm_funcs.vmtrace_get_pt_offset(v, offset, size);
+
+    return -EOPNOTSUPP;
+}
+
 /*
  * This must be defined as a macro instead of an inline function,
  * because it uses 'struct vcpu' and 'struct domain' which have
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index c046e59886..b6f39233aa 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -253,6 +253,10 @@  struct vcpu
     /* vPCI per-vCPU area, used to store data for long running operations. */
     struct vpci_vcpu vpci;
 
+    struct {
+        struct page_info *pt_buf;
+    } vmtrace;
+
     struct arch_vcpu arch;
 };