Message ID | 7ddfc44d6ffde0fa307f0e074225f588c397aef0.1594150543.git.michal.leszczynski@cert.pl (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Implement support for external IPT monitoring | expand |
On Tue, Jul 07, 2020 at 09:39:46PM +0200, Michał Leszczyński wrote: > From: Michal Leszczynski <michal.leszczynski@cert.pl> > > Use Intel Processor Trace feature to provide vmtrace_pt_* > interface for HVM/VMX. > > Signed-off-by: Michal Leszczynski <michal.leszczynski@cert.pl> > --- > xen/arch/x86/hvm/vmx/vmx.c | 110 +++++++++++++++++++++++++++++ > xen/include/asm-x86/hvm/vmx/vmcs.h | 3 + > xen/include/asm-x86/hvm/vmx/vmx.h | 14 ++++ > 3 files changed, 127 insertions(+) > > diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c > index cc6d4ece22..63a5a76e16 100644 > --- a/xen/arch/x86/hvm/vmx/vmx.c > +++ b/xen/arch/x86/hvm/vmx/vmx.c > @@ -428,6 +428,56 @@ static void vmx_domain_relinquish_resources(struct domain *d) > vmx_free_vlapic_mapping(d); > } > > +static int vmx_init_pt(struct vcpu *v) > +{ > + int rc; > + uint64_t size = v->domain->processor_trace_buf_kb * KB(1); As I commented in other patches, I don't think there's a need to have the size in bytes, and hence you could just convert to number of pages? You might have to check that the value is rounded to a page boundary. > + > + if ( !v->vmtrace.pt_buf || !size ) > + return -EINVAL; > + > + /* > + * We don't accept trace buffer size smaller than single page > + * and the upper bound is defined as 4GB in the specification. > + * The buffer size must be also a power of 2. > + */ > + if ( size < PAGE_SIZE || size > GB(4) || (size & (size - 1)) ) > + return -EINVAL; IMO there should be a hook to sanitize the buffer size before you go and allocate it. It makes no sense to allocate a buffer to come here and realize it's not suitable. > + > + v->arch.hvm.vmx.ipt_state = xzalloc(struct ipt_state); > + Extra newline. > + if ( !v->arch.hvm.vmx.ipt_state ) > + return -ENOMEM; > + > + v->arch.hvm.vmx.ipt_state->output_base = > + page_to_maddr(v->vmtrace.pt_buf); The above fits on a single line now. You could also avoid having an output_base field and just do the conversion in vmx_restore_guest_msrs, I'm not sure there's much value in having this cached here. > + v->arch.hvm.vmx.ipt_state->output_mask.raw = size - 1; > + > + rc = vmx_add_host_load_msr(v, MSR_RTIT_CTL, 0); > + > + if ( rc ) > + return rc; > + > + rc = vmx_add_guest_msr(v, MSR_RTIT_CTL, > + RTIT_CTL_TRACE_EN | RTIT_CTL_OS | > + RTIT_CTL_USR | RTIT_CTL_BRANCH_EN); > + > + if ( rc ) > + return rc; We don't usually leave an empty line between setting and testing rc. > + > + return 0; > +} > + > +static int vmx_destroy_pt(struct vcpu* v) > +{ > + if ( v->arch.hvm.vmx.ipt_state ) > + xfree(v->arch.hvm.vmx.ipt_state); > + > + v->arch.hvm.vmx.ipt_state = NULL; > + return 0; > +} > + > + Double newline, just one newline please between functions. > static int vmx_vcpu_initialise(struct vcpu *v) > { > int rc; > @@ -471,6 +521,14 @@ static int vmx_vcpu_initialise(struct vcpu *v) > > vmx_install_vlapic_mapping(v); > > + if ( v->domain->processor_trace_buf_kb ) Can you move this check inside of vmx_init_pt, so that here you just do: return vmx_init_pt(v); > + { > + rc = vmx_init_pt(v); > + > + if ( rc ) > + return rc; > + } > + > return 0; > } > > @@ -483,6 +541,7 @@ static void vmx_vcpu_destroy(struct vcpu *v) > * prior to vmx_domain_destroy so we need to disable PML for each vcpu > * separately here. > */ > + vmx_destroy_pt(v); > vmx_vcpu_disable_pml(v); > vmx_destroy_vmcs(v); > passive_domain_destroy(v); > @@ -513,6 +572,18 @@ static void vmx_save_guest_msrs(struct vcpu *v) > * be updated at any time via SWAPGS, which we cannot trap. > */ > v->arch.hvm.vmx.shadow_gs = rdgsshadow(); > + > + if ( unlikely(v->arch.hvm.vmx.ipt_state && > + v->arch.hvm.vmx.ipt_state->active) ) > + { > + uint64_t rtit_ctl; Missing newline. > + rdmsrl(MSR_RTIT_CTL, rtit_ctl); > + BUG_ON(rtit_ctl & RTIT_CTL_TRACE_EN); > + > + rdmsrl(MSR_RTIT_STATUS, v->arch.hvm.vmx.ipt_state->status); > + rdmsrl(MSR_RTIT_OUTPUT_MASK, > + v->arch.hvm.vmx.ipt_state->output_mask.raw); > + } > } > > static void vmx_restore_guest_msrs(struct vcpu *v) > @@ -524,6 +595,17 @@ static void vmx_restore_guest_msrs(struct vcpu *v) > > if ( cpu_has_msr_tsc_aux ) > wrmsr_tsc_aux(v->arch.msrs->tsc_aux); > + > + if ( unlikely(v->arch.hvm.vmx.ipt_state && > + v->arch.hvm.vmx.ipt_state->active) ) > + { > + wrmsrl(MSR_RTIT_OUTPUT_BASE, > + v->arch.hvm.vmx.ipt_state->output_base); > + wrmsrl(MSR_RTIT_OUTPUT_MASK, > + v->arch.hvm.vmx.ipt_state->output_mask.raw); > + wrmsrl(MSR_RTIT_STATUS, > + v->arch.hvm.vmx.ipt_state->status); > + } > } > > void vmx_update_cpu_exec_control(struct vcpu *v) > @@ -2240,6 +2322,25 @@ static bool vmx_get_pending_event(struct vcpu *v, struct x86_event *info) > return true; > } > > +static int vmx_control_pt(struct vcpu *v, bool enable) > +{ > + if ( !v->arch.hvm.vmx.ipt_state ) > + return -EINVAL; > + > + v->arch.hvm.vmx.ipt_state->active = enable; I think you should assert that the vCPU is paused? As doing this on a non-paused vCPU is not going to work reliably? > + return 0; > +} > + > +static int vmx_get_pt_offset(struct vcpu *v, uint64_t *offset, uint64_t *size) > +{ > + if ( !v->arch.hvm.vmx.ipt_state ) > + return -EINVAL; > + > + *offset = v->arch.hvm.vmx.ipt_state->output_mask.offset; > + *size = v->arch.hvm.vmx.ipt_state->output_mask.size + 1; > + return 0; > +} > + > static struct hvm_function_table __initdata vmx_function_table = { > .name = "VMX", > .cpu_up_prepare = vmx_cpu_up_prepare, > @@ -2295,6 +2396,8 @@ static struct hvm_function_table __initdata vmx_function_table = { > .altp2m_vcpu_update_vmfunc_ve = vmx_vcpu_update_vmfunc_ve, > .altp2m_vcpu_emulate_ve = vmx_vcpu_emulate_ve, > .altp2m_vcpu_emulate_vmfunc = vmx_vcpu_emulate_vmfunc, > + .vmtrace_control_pt = vmx_control_pt, > + .vmtrace_get_pt_offset = vmx_get_pt_offset, > .tsc_scaling = { > .max_ratio = VMX_TSC_MULTIPLIER_MAX, > }, > @@ -3674,6 +3777,13 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs) > > hvm_invalidate_regs_fields(regs); > > + if ( unlikely(v->arch.hvm.vmx.ipt_state && > + v->arch.hvm.vmx.ipt_state->active) ) > + { > + rdmsrl(MSR_RTIT_OUTPUT_MASK, > + v->arch.hvm.vmx.ipt_state->output_mask.raw); > + } Unneeded braces. Thanks.
On 07.07.2020 21:39, Michał Leszczyński wrote: > --- a/xen/arch/x86/hvm/vmx/vmx.c > +++ b/xen/arch/x86/hvm/vmx/vmx.c > @@ -428,6 +428,56 @@ static void vmx_domain_relinquish_resources(struct domain *d) > vmx_free_vlapic_mapping(d); > } > > +static int vmx_init_pt(struct vcpu *v) > +{ > + int rc; > + uint64_t size = v->domain->processor_trace_buf_kb * KB(1); > + > + if ( !v->vmtrace.pt_buf || !size ) > + return -EINVAL; > + > + /* > + * We don't accept trace buffer size smaller than single page > + * and the upper bound is defined as 4GB in the specification. > + * The buffer size must be also a power of 2. > + */ > + if ( size < PAGE_SIZE || size > GB(4) || (size & (size - 1)) ) > + return -EINVAL; > + > + v->arch.hvm.vmx.ipt_state = xzalloc(struct ipt_state); > + > + if ( !v->arch.hvm.vmx.ipt_state ) > + return -ENOMEM; > + > + v->arch.hvm.vmx.ipt_state->output_base = > + page_to_maddr(v->vmtrace.pt_buf); > + v->arch.hvm.vmx.ipt_state->output_mask.raw = size - 1; > + > + rc = vmx_add_host_load_msr(v, MSR_RTIT_CTL, 0); > + > + if ( rc ) > + return rc; > + > + rc = vmx_add_guest_msr(v, MSR_RTIT_CTL, > + RTIT_CTL_TRACE_EN | RTIT_CTL_OS | > + RTIT_CTL_USR | RTIT_CTL_BRANCH_EN); Indentation is off by three here, and ... > + if ( rc ) > + return rc; > + > + return 0; > +} ... this whole thing would be shorter (and hence easier to read) as return vmx_add_guest_msr(v, MSR_RTIT_CTL, RTIT_CTL_TRACE_EN | RTIT_CTL_OS | RTIT_CTL_USR | RTIT_CTL_BRANCH_EN); > +static int vmx_destroy_pt(struct vcpu* v) > +{ > + if ( v->arch.hvm.vmx.ipt_state ) > + xfree(v->arch.hvm.vmx.ipt_state); No need for the if(). > + v->arch.hvm.vmx.ipt_state = NULL; And everything together is actually simply "XFREE(v->arch.hvm.vmx.ipt_state);". > @@ -471,6 +521,14 @@ static int vmx_vcpu_initialise(struct vcpu *v) > > vmx_install_vlapic_mapping(v); > > + if ( v->domain->processor_trace_buf_kb ) > + { > + rc = vmx_init_pt(v); > + > + if ( rc ) > + return rc; > + } > + > return 0; > } Is there no cleanup you need to do in case of failure? The caller will invoke vmx_vcpu_destroy() only for failures subsequent to one coming from here. > @@ -513,6 +572,18 @@ static void vmx_save_guest_msrs(struct vcpu *v) > * be updated at any time via SWAPGS, which we cannot trap. > */ > v->arch.hvm.vmx.shadow_gs = rdgsshadow(); > + > + if ( unlikely(v->arch.hvm.vmx.ipt_state && > + v->arch.hvm.vmx.ipt_state->active) ) likely() / unlikely(), for being efficient, don't want && or || in their expressions. Please limit to just the left side of &&. > @@ -2240,6 +2322,25 @@ static bool vmx_get_pending_event(struct vcpu *v, struct x86_event *info) > return true; > } > > +static int vmx_control_pt(struct vcpu *v, bool enable) > +{ > + if ( !v->arch.hvm.vmx.ipt_state ) > + return -EINVAL; > + > + v->arch.hvm.vmx.ipt_state->active = enable; Peeking ahead into patch 9, the vCPU is paused at this point. Please add a respective ASSERT(), if only for documentation purposes. > +static int vmx_get_pt_offset(struct vcpu *v, uint64_t *offset, uint64_t *size) > +{ > + if ( !v->arch.hvm.vmx.ipt_state ) > + return -EINVAL; > + > + *offset = v->arch.hvm.vmx.ipt_state->output_mask.offset; > + *size = v->arch.hvm.vmx.ipt_state->output_mask.size + 1; Either the function parameter or the struct field is misnamed, or else there shouldn't be an addition of 1 here. > @@ -2295,6 +2396,8 @@ static struct hvm_function_table __initdata vmx_function_table = { > .altp2m_vcpu_update_vmfunc_ve = vmx_vcpu_update_vmfunc_ve, > .altp2m_vcpu_emulate_ve = vmx_vcpu_emulate_ve, > .altp2m_vcpu_emulate_vmfunc = vmx_vcpu_emulate_vmfunc, > + .vmtrace_control_pt = vmx_control_pt, > + .vmtrace_get_pt_offset = vmx_get_pt_offset, Better install these hooks only if the underlying feature is available (like we do for several other hooks)? > @@ -3674,6 +3777,13 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs) > > hvm_invalidate_regs_fields(regs); > > + if ( unlikely(v->arch.hvm.vmx.ipt_state && > + v->arch.hvm.vmx.ipt_state->active) ) > + { > + rdmsrl(MSR_RTIT_OUTPUT_MASK, > + v->arch.hvm.vmx.ipt_state->output_mask.raw); Don't you also need to latch RTIT_STATUS? > --- a/xen/include/asm-x86/hvm/vmx/vmx.h > +++ b/xen/include/asm-x86/hvm/vmx/vmx.h > @@ -689,4 +689,18 @@ typedef union ldt_or_tr_instr_info { > }; > } ldt_or_tr_instr_info_t; > > +/* Processor Trace state per vCPU */ > +struct ipt_state { > + bool active; > + uint64_t status; > + uint64_t output_base; maddr_t according to its use. Jan
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c index cc6d4ece22..63a5a76e16 100644 --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -428,6 +428,56 @@ static void vmx_domain_relinquish_resources(struct domain *d) vmx_free_vlapic_mapping(d); } +static int vmx_init_pt(struct vcpu *v) +{ + int rc; + uint64_t size = v->domain->processor_trace_buf_kb * KB(1); + + if ( !v->vmtrace.pt_buf || !size ) + return -EINVAL; + + /* + * We don't accept trace buffer size smaller than single page + * and the upper bound is defined as 4GB in the specification. + * The buffer size must be also a power of 2. + */ + if ( size < PAGE_SIZE || size > GB(4) || (size & (size - 1)) ) + return -EINVAL; + + v->arch.hvm.vmx.ipt_state = xzalloc(struct ipt_state); + + if ( !v->arch.hvm.vmx.ipt_state ) + return -ENOMEM; + + v->arch.hvm.vmx.ipt_state->output_base = + page_to_maddr(v->vmtrace.pt_buf); + v->arch.hvm.vmx.ipt_state->output_mask.raw = size - 1; + + rc = vmx_add_host_load_msr(v, MSR_RTIT_CTL, 0); + + if ( rc ) + return rc; + + rc = vmx_add_guest_msr(v, MSR_RTIT_CTL, + RTIT_CTL_TRACE_EN | RTIT_CTL_OS | + RTIT_CTL_USR | RTIT_CTL_BRANCH_EN); + + if ( rc ) + return rc; + + return 0; +} + +static int vmx_destroy_pt(struct vcpu* v) +{ + if ( v->arch.hvm.vmx.ipt_state ) + xfree(v->arch.hvm.vmx.ipt_state); + + v->arch.hvm.vmx.ipt_state = NULL; + return 0; +} + + static int vmx_vcpu_initialise(struct vcpu *v) { int rc; @@ -471,6 +521,14 @@ static int vmx_vcpu_initialise(struct vcpu *v) vmx_install_vlapic_mapping(v); + if ( v->domain->processor_trace_buf_kb ) + { + rc = vmx_init_pt(v); + + if ( rc ) + return rc; + } + return 0; } @@ -483,6 +541,7 @@ static void vmx_vcpu_destroy(struct vcpu *v) * prior to vmx_domain_destroy so we need to disable PML for each vcpu * separately here. */ + vmx_destroy_pt(v); vmx_vcpu_disable_pml(v); vmx_destroy_vmcs(v); passive_domain_destroy(v); @@ -513,6 +572,18 @@ static void vmx_save_guest_msrs(struct vcpu *v) * be updated at any time via SWAPGS, which we cannot trap. */ v->arch.hvm.vmx.shadow_gs = rdgsshadow(); + + if ( unlikely(v->arch.hvm.vmx.ipt_state && + v->arch.hvm.vmx.ipt_state->active) ) + { + uint64_t rtit_ctl; + rdmsrl(MSR_RTIT_CTL, rtit_ctl); + BUG_ON(rtit_ctl & RTIT_CTL_TRACE_EN); + + rdmsrl(MSR_RTIT_STATUS, v->arch.hvm.vmx.ipt_state->status); + rdmsrl(MSR_RTIT_OUTPUT_MASK, + v->arch.hvm.vmx.ipt_state->output_mask.raw); + } } static void vmx_restore_guest_msrs(struct vcpu *v) @@ -524,6 +595,17 @@ static void vmx_restore_guest_msrs(struct vcpu *v) if ( cpu_has_msr_tsc_aux ) wrmsr_tsc_aux(v->arch.msrs->tsc_aux); + + if ( unlikely(v->arch.hvm.vmx.ipt_state && + v->arch.hvm.vmx.ipt_state->active) ) + { + wrmsrl(MSR_RTIT_OUTPUT_BASE, + v->arch.hvm.vmx.ipt_state->output_base); + wrmsrl(MSR_RTIT_OUTPUT_MASK, + v->arch.hvm.vmx.ipt_state->output_mask.raw); + wrmsrl(MSR_RTIT_STATUS, + v->arch.hvm.vmx.ipt_state->status); + } } void vmx_update_cpu_exec_control(struct vcpu *v) @@ -2240,6 +2322,25 @@ static bool vmx_get_pending_event(struct vcpu *v, struct x86_event *info) return true; } +static int vmx_control_pt(struct vcpu *v, bool enable) +{ + if ( !v->arch.hvm.vmx.ipt_state ) + return -EINVAL; + + v->arch.hvm.vmx.ipt_state->active = enable; + return 0; +} + +static int vmx_get_pt_offset(struct vcpu *v, uint64_t *offset, uint64_t *size) +{ + if ( !v->arch.hvm.vmx.ipt_state ) + return -EINVAL; + + *offset = v->arch.hvm.vmx.ipt_state->output_mask.offset; + *size = v->arch.hvm.vmx.ipt_state->output_mask.size + 1; + return 0; +} + static struct hvm_function_table __initdata vmx_function_table = { .name = "VMX", .cpu_up_prepare = vmx_cpu_up_prepare, @@ -2295,6 +2396,8 @@ static struct hvm_function_table __initdata vmx_function_table = { .altp2m_vcpu_update_vmfunc_ve = vmx_vcpu_update_vmfunc_ve, .altp2m_vcpu_emulate_ve = vmx_vcpu_emulate_ve, .altp2m_vcpu_emulate_vmfunc = vmx_vcpu_emulate_vmfunc, + .vmtrace_control_pt = vmx_control_pt, + .vmtrace_get_pt_offset = vmx_get_pt_offset, .tsc_scaling = { .max_ratio = VMX_TSC_MULTIPLIER_MAX, }, @@ -3674,6 +3777,13 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs) hvm_invalidate_regs_fields(regs); + if ( unlikely(v->arch.hvm.vmx.ipt_state && + v->arch.hvm.vmx.ipt_state->active) ) + { + rdmsrl(MSR_RTIT_OUTPUT_MASK, + v->arch.hvm.vmx.ipt_state->output_mask.raw); + } + if ( paging_mode_hap(v->domain) ) { /* diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h index 6153ba6769..65971fa6ad 100644 --- a/xen/include/asm-x86/hvm/vmx/vmcs.h +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h @@ -186,6 +186,9 @@ struct vmx_vcpu { * pCPU and wakeup the related vCPU. */ struct pi_blocking_vcpu pi_blocking; + + /* State of processor trace feature */ + struct ipt_state *ipt_state; }; int vmx_create_vmcs(struct vcpu *v); diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h b/xen/include/asm-x86/hvm/vmx/vmx.h index 111ccd7e61..8d7c67e43d 100644 --- a/xen/include/asm-x86/hvm/vmx/vmx.h +++ b/xen/include/asm-x86/hvm/vmx/vmx.h @@ -689,4 +689,18 @@ typedef union ldt_or_tr_instr_info { }; } ldt_or_tr_instr_info_t; +/* Processor Trace state per vCPU */ +struct ipt_state { + bool active; + uint64_t status; + uint64_t output_base; + union { + uint64_t raw; + struct { + uint32_t size; + uint32_t offset; + }; + } output_mask; +}; + #endif /* __ASM_X86_HVM_VMX_VMX_H__ */