Message ID | 1467312212-3010-1-git-send-email-czuzu@bitdefender.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 06/30/16 21:43, Corneliu ZUZU wrote: > For readability: > > * Add function arch_monitor_write_data (in x86/monitor.c) and separate handling > of monitor_write_data there (previously done directly in hvm_do_resume). > > * Add function write_ctrlreg_adjust_traps (in x86/monitor.c) and relocate > enabling/disabling of CPU_BASED_CR3_LOAD_EXITING for CR3 monitor vm-events there > (previously done through CR0 node @ vmx_update_guest_cr(v, 0)). > > Signed-off-by: Corneliu ZUZU <czuzu@bitdefender.com> > --- > xen/arch/x86/hvm/hvm.c | 48 +++++++++------------- > xen/arch/x86/hvm/vmx/vmx.c | 5 --- > xen/arch/x86/monitor.c | 94 +++++++++++++++++++++++++++++++++++++++---- > xen/include/asm-x86/monitor.h | 2 + > 4 files changed, 107 insertions(+), 42 deletions(-) > > diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c > index c89ab6e..5481a6e 100644 > --- a/xen/arch/x86/hvm/hvm.c > +++ b/xen/arch/x86/hvm/hvm.c > @@ -475,8 +475,6 @@ void hvm_do_resume(struct vcpu *v) > > if ( unlikely(v->arch.vm_event) ) > { > - struct monitor_write_data *w = &v->arch.vm_event->write_data; > - > if ( v->arch.vm_event->emulate_flags ) > { > enum emul_kind kind = EMUL_KIND_NORMAL; > @@ -493,32 +491,10 @@ void hvm_do_resume(struct vcpu *v) > > v->arch.vm_event->emulate_flags = 0; > } > - > - if ( w->do_write.msr ) > - { > - hvm_msr_write_intercept(w->msr, w->value, 0); > - w->do_write.msr = 0; > - } > - > - if ( w->do_write.cr0 ) > - { > - hvm_set_cr0(w->cr0, 0); > - w->do_write.cr0 = 0; > - } > - > - if ( w->do_write.cr4 ) > - { > - hvm_set_cr4(w->cr4, 0); > - w->do_write.cr4 = 0; > - } > - > - if ( w->do_write.cr3 ) > - { > - hvm_set_cr3(w->cr3, 0); > - w->do_write.cr3 = 0; > - } > } > > + arch_monitor_write_data(v); > + > /* Inject pending hw/sw trap */ > if ( v->arch.hvm_vcpu.inject_trap.vector != -1 ) > { > @@ -2206,7 +2182,10 @@ int hvm_set_cr0(unsigned long value, bool_t may_defer) > > if ( hvm_monitor_crX(CR0, value, old_value) ) > { > - /* The actual write will occur in hvm_do_resume(), if permitted. */ > + /* > + * The actual write will occur in arch_monitor_write_data(), if > + * permitted. > + */ > v->arch.vm_event->write_data.do_write.cr0 = 1; > v->arch.vm_event->write_data.cr0 = value; > > @@ -2308,7 +2287,10 @@ int hvm_set_cr3(unsigned long value, bool_t may_defer) > > if ( hvm_monitor_crX(CR3, value, old) ) > { > - /* The actual write will occur in hvm_do_resume(), if permitted. */ > + /* > + * The actual write will occur in arch_monitor_write_data(), if > + * permitted. > + */ > v->arch.vm_event->write_data.do_write.cr3 = 1; > v->arch.vm_event->write_data.cr3 = value; > > @@ -2388,7 +2370,10 @@ int hvm_set_cr4(unsigned long value, bool_t may_defer) > > if ( hvm_monitor_crX(CR4, value, old_cr) ) > { > - /* The actual write will occur in hvm_do_resume(), if permitted. */ > + /* > + * The actual write will occur in arch_monitor_write_data(), if > + * permitted. > + */ > v->arch.vm_event->write_data.do_write.cr4 = 1; > v->arch.vm_event->write_data.cr4 = value; > > @@ -3767,7 +3752,10 @@ int hvm_msr_write_intercept(unsigned int msr, uint64_t msr_content, > { > ASSERT(v->arch.vm_event); > > - /* The actual write will occur in hvm_do_resume() (if permitted). */ > + /* > + * The actual write will occur in arch_monitor_write_data(), if > + * permitted. > + */ > v->arch.vm_event->write_data.do_write.msr = 1; > v->arch.vm_event->write_data.msr = msr; > v->arch.vm_event->write_data.value = msr_content; > diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c > index 15c84c2..de04e6c 100644 > --- a/xen/arch/x86/hvm/vmx/vmx.c > +++ b/xen/arch/x86/hvm/vmx/vmx.c > @@ -1442,11 +1442,6 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr) > if ( !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v) ) > v->arch.hvm_vmx.exec_control |= cr3_ctls; > > - /* Trap CR3 updates if CR3 memory events are enabled. */ > - if ( v->domain->arch.monitor.write_ctrlreg_enabled & > - monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3) ) > - v->arch.hvm_vmx.exec_control |= CPU_BASED_CR3_LOAD_EXITING; > - > if ( old_ctls != v->arch.hvm_vmx.exec_control ) > vmx_update_cpu_exec_control(v); > } > diff --git a/xen/arch/x86/monitor.c b/xen/arch/x86/monitor.c > index a271161..90e4856 100644 > --- a/xen/arch/x86/monitor.c > +++ b/xen/arch/x86/monitor.c > @@ -20,6 +20,9 @@ > */ > > #include <asm/monitor.h> > +#include <asm/paging.h> > +#include <asm/hvm/vmx/vmx.h> > +#include <asm/vm_event.h> > #include <public/vm_event.h> As previously stated, unless there's a compelling reason to do otherwise, AFAIK asm/hvm/... goes above asm/monitor.h. Otherwise, for the monitor parts: Acked-by: Razvan Cojocaru <rcojocaru@bitdefender.com> Thanks, Razvan
>>> On 30.06.16 at 20:43, <czuzu@bitdefender.com> wrote: > --- a/xen/arch/x86/hvm/hvm.c > +++ b/xen/arch/x86/hvm/hvm.c > @@ -475,8 +475,6 @@ void hvm_do_resume(struct vcpu *v) > > if ( unlikely(v->arch.vm_event) ) > { > - struct monitor_write_data *w = &v->arch.vm_event->write_data; > - > if ( v->arch.vm_event->emulate_flags ) > { > enum emul_kind kind = EMUL_KIND_NORMAL; > @@ -493,32 +491,10 @@ void hvm_do_resume(struct vcpu *v) > > v->arch.vm_event->emulate_flags = 0; > } > - > - if ( w->do_write.msr ) > - { > - hvm_msr_write_intercept(w->msr, w->value, 0); > - w->do_write.msr = 0; > - } > - > - if ( w->do_write.cr0 ) > - { > - hvm_set_cr0(w->cr0, 0); > - w->do_write.cr0 = 0; > - } > - > - if ( w->do_write.cr4 ) > - { > - hvm_set_cr4(w->cr4, 0); > - w->do_write.cr4 = 0; > - } > - > - if ( w->do_write.cr3 ) > - { > - hvm_set_cr3(w->cr3, 0); > - w->do_write.cr3 = 0; > - } > } > > + arch_monitor_write_data(v); Why does this get moved outside the if(), with the same condition getting added inside the function (inverted for bailing early)? > @@ -119,6 +156,55 @@ bool_t monitored_msr(const struct domain *d, u32 msr) > return test_bit(msr, bitmap); > } > > +static void write_ctrlreg_adjust_traps(struct domain *d) > +{ > + struct vcpu *v; > + struct arch_vmx_struct *avmx; > + unsigned int cr3_bitmask; > + bool_t cr3_vmevent, cr3_ldexit; > + > + /* Adjust CR3 load-exiting. */ > + > + /* vmx only */ > + ASSERT(cpu_has_vmx); > + > + /* non-hap domains trap CR3 writes unconditionally */ > + if ( !paging_mode_hap(d) ) > + { > + for_each_vcpu ( d, v ) > + ASSERT(v->arch.hvm_vmx.exec_control & CPU_BASED_CR3_LOAD_EXITING); > + return; > + } > + > + cr3_bitmask = monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3); > + cr3_vmevent = !!(d->arch.monitor.write_ctrlreg_enabled & cr3_bitmask); > + > + for_each_vcpu ( d, v ) > + { > + avmx = &v->arch.hvm_vmx; > + cr3_ldexit = !!(avmx->exec_control & CPU_BASED_CR3_LOAD_EXITING); > + > + if ( cr3_vmevent == cr3_ldexit ) > + continue; > + > + /* > + * If CR0.PE=0, CR3 load exiting must remain enabled. > + * See vmx_update_guest_cr code motion for cr = 0. > + */ > + if ( cr3_ldexit && !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v) > ) > + continue; > + > + if ( cr3_vmevent ) > + avmx->exec_control |= CPU_BASED_CR3_LOAD_EXITING; > + else > + avmx->exec_control &= ~CPU_BASED_CR3_LOAD_EXITING; > + > + vmx_vmcs_enter(v); > + vmx_update_cpu_exec_control(v); > + vmx_vmcs_exit(v); > + } > +} While Razvan gave his ack already, I wonder whether it's really a good idea to put deeply VMX-specific code outside of a VMX-specific file. Jan
Hi Jan, On 7/4/2016 1:22 PM, Jan Beulich wrote: >>>> On 30.06.16 at 20:43, <czuzu@bitdefender.com> wrote: >> --- a/xen/arch/x86/hvm/hvm.c >> +++ b/xen/arch/x86/hvm/hvm.c >> @@ -475,8 +475,6 @@ void hvm_do_resume(struct vcpu *v) >> >> if ( unlikely(v->arch.vm_event) ) >> { >> - struct monitor_write_data *w = &v->arch.vm_event->write_data; >> - >> if ( v->arch.vm_event->emulate_flags ) >> { >> enum emul_kind kind = EMUL_KIND_NORMAL; >> @@ -493,32 +491,10 @@ void hvm_do_resume(struct vcpu *v) >> >> v->arch.vm_event->emulate_flags = 0; >> } >> - >> - if ( w->do_write.msr ) >> - { >> - hvm_msr_write_intercept(w->msr, w->value, 0); >> - w->do_write.msr = 0; >> - } >> - >> - if ( w->do_write.cr0 ) >> - { >> - hvm_set_cr0(w->cr0, 0); >> - w->do_write.cr0 = 0; >> - } >> - >> - if ( w->do_write.cr4 ) >> - { >> - hvm_set_cr4(w->cr4, 0); >> - w->do_write.cr4 = 0; >> - } >> - >> - if ( w->do_write.cr3 ) >> - { >> - hvm_set_cr3(w->cr3, 0); >> - w->do_write.cr3 = 0; >> - } >> } >> >> + arch_monitor_write_data(v); > Why does this get moved outside the if(), with the same condition > getting added inside the function (inverted for bailing early)? I left that so because of patch 5/8 - specifically, monitor_write_data handling shouldn't depend on the vm_event subsystem being initialized. But you're right, it still does depend on that initialization in this patch, so I should leave the call inside the if (and remove the check inside the function) as you suggest and only get it out in 5/8. Will do that in v2. > >> @@ -119,6 +156,55 @@ bool_t monitored_msr(const struct domain *d, u32 msr) >> return test_bit(msr, bitmap); >> } >> >> +static void write_ctrlreg_adjust_traps(struct domain *d) >> +{ >> + struct vcpu *v; >> + struct arch_vmx_struct *avmx; >> + unsigned int cr3_bitmask; >> + bool_t cr3_vmevent, cr3_ldexit; >> + >> + /* Adjust CR3 load-exiting. */ >> + >> + /* vmx only */ >> + ASSERT(cpu_has_vmx); >> + >> + /* non-hap domains trap CR3 writes unconditionally */ >> + if ( !paging_mode_hap(d) ) >> + { >> + for_each_vcpu ( d, v ) >> + ASSERT(v->arch.hvm_vmx.exec_control & CPU_BASED_CR3_LOAD_EXITING); >> + return; >> + } >> + >> + cr3_bitmask = monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3); >> + cr3_vmevent = !!(d->arch.monitor.write_ctrlreg_enabled & cr3_bitmask); >> + >> + for_each_vcpu ( d, v ) >> + { >> + avmx = &v->arch.hvm_vmx; >> + cr3_ldexit = !!(avmx->exec_control & CPU_BASED_CR3_LOAD_EXITING); >> + >> + if ( cr3_vmevent == cr3_ldexit ) >> + continue; >> + >> + /* >> + * If CR0.PE=0, CR3 load exiting must remain enabled. >> + * See vmx_update_guest_cr code motion for cr = 0. >> + */ >> + if ( cr3_ldexit && !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v) >> ) >> + continue; >> + >> + if ( cr3_vmevent ) >> + avmx->exec_control |= CPU_BASED_CR3_LOAD_EXITING; >> + else >> + avmx->exec_control &= ~CPU_BASED_CR3_LOAD_EXITING; >> + >> + vmx_vmcs_enter(v); >> + vmx_update_cpu_exec_control(v); >> + vmx_vmcs_exit(v); >> + } >> +} > While Razvan gave his ack already, I wonder whether it's really a > good idea to put deeply VMX-specific code outside of a VMX-specific > file. > > Jan Well, a summary of what this function does would sound like: "adjusts CR3 load-exiting for cr-write monitor vm-events". IMHO that's (monitor) vm-event specific enough to be placed within the vm-event subsystem. Could you suggest concretely how this separation would look like? (where to put this function/parts of it (and what parts), what name should it have once moved). Another reason this was done (besides avoiding hackishly doing a CR0 update when we actually need a CR3 update specifically for a vm-event to happen) is keeping symmetry between ARM<->X86 in a future patch that would implement monitor CR vm-events for ARM. In that patch write_ctrlreg_adjust_traps is renamed and implemented per-architecture, on ARM it would have the same job, i.e. updating some hypervisor traps (~ vmx execution controls) for CR vm-events to happen. On a different note, one thing I forgot to do though is to also move the following check (instead of completely removing it from arch_monitor_domctl_event): if ( VM_EVENT_X86_CR3 == mop->u.mov_to_cr.index ) inside write_ctrlreg_adjust_traps. Will remedy that in v2. Thanks, Corneliu.
On 07/04/16 13:22, Jan Beulich wrote: >>>> On 30.06.16 at 20:43, <czuzu@bitdefender.com> wrote: >> --- a/xen/arch/x86/hvm/hvm.c >> +++ b/xen/arch/x86/hvm/hvm.c >> @@ -475,8 +475,6 @@ void hvm_do_resume(struct vcpu *v) >> >> if ( unlikely(v->arch.vm_event) ) >> { >> - struct monitor_write_data *w = &v->arch.vm_event->write_data; >> - >> if ( v->arch.vm_event->emulate_flags ) >> { >> enum emul_kind kind = EMUL_KIND_NORMAL; >> @@ -493,32 +491,10 @@ void hvm_do_resume(struct vcpu *v) >> >> v->arch.vm_event->emulate_flags = 0; >> } >> - >> - if ( w->do_write.msr ) >> - { >> - hvm_msr_write_intercept(w->msr, w->value, 0); >> - w->do_write.msr = 0; >> - } >> - >> - if ( w->do_write.cr0 ) >> - { >> - hvm_set_cr0(w->cr0, 0); >> - w->do_write.cr0 = 0; >> - } >> - >> - if ( w->do_write.cr4 ) >> - { >> - hvm_set_cr4(w->cr4, 0); >> - w->do_write.cr4 = 0; >> - } >> - >> - if ( w->do_write.cr3 ) >> - { >> - hvm_set_cr3(w->cr3, 0); >> - w->do_write.cr3 = 0; >> - } >> } >> >> + arch_monitor_write_data(v); > > Why does this get moved outside the if(), with the same condition > getting added inside the function (inverted for bailing early)? > >> @@ -119,6 +156,55 @@ bool_t monitored_msr(const struct domain *d, u32 msr) >> return test_bit(msr, bitmap); >> } >> >> +static void write_ctrlreg_adjust_traps(struct domain *d) >> +{ >> + struct vcpu *v; >> + struct arch_vmx_struct *avmx; >> + unsigned int cr3_bitmask; >> + bool_t cr3_vmevent, cr3_ldexit; >> + >> + /* Adjust CR3 load-exiting. */ >> + >> + /* vmx only */ >> + ASSERT(cpu_has_vmx); >> + >> + /* non-hap domains trap CR3 writes unconditionally */ >> + if ( !paging_mode_hap(d) ) >> + { >> + for_each_vcpu ( d, v ) >> + ASSERT(v->arch.hvm_vmx.exec_control & CPU_BASED_CR3_LOAD_EXITING); >> + return; >> + } >> + >> + cr3_bitmask = monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3); >> + cr3_vmevent = !!(d->arch.monitor.write_ctrlreg_enabled & cr3_bitmask); >> + >> + for_each_vcpu ( d, v ) >> + { >> + avmx = &v->arch.hvm_vmx; >> + cr3_ldexit = !!(avmx->exec_control & CPU_BASED_CR3_LOAD_EXITING); >> + >> + if ( cr3_vmevent == cr3_ldexit ) >> + continue; >> + >> + /* >> + * If CR0.PE=0, CR3 load exiting must remain enabled. >> + * See vmx_update_guest_cr code motion for cr = 0. >> + */ >> + if ( cr3_ldexit && !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v) >> ) >> + continue; >> + >> + if ( cr3_vmevent ) >> + avmx->exec_control |= CPU_BASED_CR3_LOAD_EXITING; >> + else >> + avmx->exec_control &= ~CPU_BASED_CR3_LOAD_EXITING; >> + >> + vmx_vmcs_enter(v); >> + vmx_update_cpu_exec_control(v); >> + vmx_vmcs_exit(v); >> + } >> +} > > While Razvan gave his ack already, I wonder whether it's really a > good idea to put deeply VMX-specific code outside of a VMX-specific > file. Didn't I add "for monitor / vm_event parts Acked-by: ..."? If I didn't, I meant to. Obviously VMX code maintainers outrank me on these issues. Thanks, Razvan
>>> On 04.07.16 at 15:22, <rcojocaru@bitdefender.com> wrote: > On 07/04/16 13:22, Jan Beulich wrote: >>>>> On 30.06.16 at 20:43, <czuzu@bitdefender.com> wrote: >>> @@ -119,6 +156,55 @@ bool_t monitored_msr(const struct domain *d, u32 msr) >>> return test_bit(msr, bitmap); >>> } >>> >>> +static void write_ctrlreg_adjust_traps(struct domain *d) >>> +{ >>> + struct vcpu *v; >>> + struct arch_vmx_struct *avmx; >>> + unsigned int cr3_bitmask; >>> + bool_t cr3_vmevent, cr3_ldexit; >>> + >>> + /* Adjust CR3 load-exiting. */ >>> + >>> + /* vmx only */ >>> + ASSERT(cpu_has_vmx); >>> + >>> + /* non-hap domains trap CR3 writes unconditionally */ >>> + if ( !paging_mode_hap(d) ) >>> + { >>> + for_each_vcpu ( d, v ) >>> + ASSERT(v->arch.hvm_vmx.exec_control & CPU_BASED_CR3_LOAD_EXITING); >>> + return; >>> + } >>> + >>> + cr3_bitmask = monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3); >>> + cr3_vmevent = !!(d->arch.monitor.write_ctrlreg_enabled & cr3_bitmask); >>> + >>> + for_each_vcpu ( d, v ) >>> + { >>> + avmx = &v->arch.hvm_vmx; >>> + cr3_ldexit = !!(avmx->exec_control & CPU_BASED_CR3_LOAD_EXITING); >>> + >>> + if ( cr3_vmevent == cr3_ldexit ) >>> + continue; >>> + >>> + /* >>> + * If CR0.PE=0, CR3 load exiting must remain enabled. >>> + * See vmx_update_guest_cr code motion for cr = 0. >>> + */ >>> + if ( cr3_ldexit && !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v) >>> ) >>> + continue; >>> + >>> + if ( cr3_vmevent ) >>> + avmx->exec_control |= CPU_BASED_CR3_LOAD_EXITING; >>> + else >>> + avmx->exec_control &= ~CPU_BASED_CR3_LOAD_EXITING; >>> + >>> + vmx_vmcs_enter(v); >>> + vmx_update_cpu_exec_control(v); >>> + vmx_vmcs_exit(v); >>> + } >>> +} >> >> While Razvan gave his ack already, I wonder whether it's really a >> good idea to put deeply VMX-specific code outside of a VMX-specific >> file. > > Didn't I add "for monitor / vm_event parts Acked-by: ..."? If I didn't, > I meant to. Well - this is a monitor file (monitor.c). Jan
On 07/04/16 17:05, Jan Beulich wrote: >>>> On 04.07.16 at 15:22, <rcojocaru@bitdefender.com> wrote: >> On 07/04/16 13:22, Jan Beulich wrote: >>>>>> On 30.06.16 at 20:43, <czuzu@bitdefender.com> wrote: >>>> @@ -119,6 +156,55 @@ bool_t monitored_msr(const struct domain *d, u32 msr) >>>> return test_bit(msr, bitmap); >>>> } >>>> >>>> +static void write_ctrlreg_adjust_traps(struct domain *d) >>>> +{ >>>> + struct vcpu *v; >>>> + struct arch_vmx_struct *avmx; >>>> + unsigned int cr3_bitmask; >>>> + bool_t cr3_vmevent, cr3_ldexit; >>>> + >>>> + /* Adjust CR3 load-exiting. */ >>>> + >>>> + /* vmx only */ >>>> + ASSERT(cpu_has_vmx); >>>> + >>>> + /* non-hap domains trap CR3 writes unconditionally */ >>>> + if ( !paging_mode_hap(d) ) >>>> + { >>>> + for_each_vcpu ( d, v ) >>>> + ASSERT(v->arch.hvm_vmx.exec_control & CPU_BASED_CR3_LOAD_EXITING); >>>> + return; >>>> + } >>>> + >>>> + cr3_bitmask = monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3); >>>> + cr3_vmevent = !!(d->arch.monitor.write_ctrlreg_enabled & cr3_bitmask); >>>> + >>>> + for_each_vcpu ( d, v ) >>>> + { >>>> + avmx = &v->arch.hvm_vmx; >>>> + cr3_ldexit = !!(avmx->exec_control & CPU_BASED_CR3_LOAD_EXITING); >>>> + >>>> + if ( cr3_vmevent == cr3_ldexit ) >>>> + continue; >>>> + >>>> + /* >>>> + * If CR0.PE=0, CR3 load exiting must remain enabled. >>>> + * See vmx_update_guest_cr code motion for cr = 0. >>>> + */ >>>> + if ( cr3_ldexit && !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v) >>>> ) >>>> + continue; >>>> + >>>> + if ( cr3_vmevent ) >>>> + avmx->exec_control |= CPU_BASED_CR3_LOAD_EXITING; >>>> + else >>>> + avmx->exec_control &= ~CPU_BASED_CR3_LOAD_EXITING; >>>> + >>>> + vmx_vmcs_enter(v); >>>> + vmx_update_cpu_exec_control(v); >>>> + vmx_vmcs_exit(v); >>>> + } >>>> +} >>> >>> While Razvan gave his ack already, I wonder whether it's really a >>> good idea to put deeply VMX-specific code outside of a VMX-specific >>> file. >> >> Didn't I add "for monitor / vm_event parts Acked-by: ..."? If I didn't, >> I meant to. > > Well - this is a monitor file (monitor.c). Fair enough, I should have been more detailed here. I do see the merit of your suggestion, and so FWIW I second your suggestion to move the code to some VMX-specific part of the tree if possible. Thanks, Razvan
On 7/4/2016 5:16 PM, Razvan Cojocaru wrote: > On 07/04/16 17:05, Jan Beulich wrote: >>>>> On 04.07.16 at 15:22, <rcojocaru@bitdefender.com> wrote: >>> On 07/04/16 13:22, Jan Beulich wrote: >>>>>>> On 30.06.16 at 20:43, <czuzu@bitdefender.com> wrote: >>>>> @@ -119,6 +156,55 @@ bool_t monitored_msr(const struct domain *d, u32 msr) >>>>> return test_bit(msr, bitmap); >>>>> } >>>>> >>>>> +static void write_ctrlreg_adjust_traps(struct domain *d) >>>>> +{ >>>>> + struct vcpu *v; >>>>> + struct arch_vmx_struct *avmx; >>>>> + unsigned int cr3_bitmask; >>>>> + bool_t cr3_vmevent, cr3_ldexit; >>>>> + >>>>> + /* Adjust CR3 load-exiting. */ >>>>> + >>>>> + /* vmx only */ >>>>> + ASSERT(cpu_has_vmx); >>>>> + >>>>> + /* non-hap domains trap CR3 writes unconditionally */ >>>>> + if ( !paging_mode_hap(d) ) >>>>> + { >>>>> + for_each_vcpu ( d, v ) >>>>> + ASSERT(v->arch.hvm_vmx.exec_control & CPU_BASED_CR3_LOAD_EXITING); >>>>> + return; >>>>> + } >>>>> + >>>>> + cr3_bitmask = monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3); >>>>> + cr3_vmevent = !!(d->arch.monitor.write_ctrlreg_enabled & cr3_bitmask); >>>>> + >>>>> + for_each_vcpu ( d, v ) >>>>> + { >>>>> + avmx = &v->arch.hvm_vmx; >>>>> + cr3_ldexit = !!(avmx->exec_control & CPU_BASED_CR3_LOAD_EXITING); >>>>> + >>>>> + if ( cr3_vmevent == cr3_ldexit ) >>>>> + continue; >>>>> + >>>>> + /* >>>>> + * If CR0.PE=0, CR3 load exiting must remain enabled. >>>>> + * See vmx_update_guest_cr code motion for cr = 0. >>>>> + */ >>>>> + if ( cr3_ldexit && !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v) >>>>> ) >>>>> + continue; >>>>> + >>>>> + if ( cr3_vmevent ) >>>>> + avmx->exec_control |= CPU_BASED_CR3_LOAD_EXITING; >>>>> + else >>>>> + avmx->exec_control &= ~CPU_BASED_CR3_LOAD_EXITING; >>>>> + >>>>> + vmx_vmcs_enter(v); >>>>> + vmx_update_cpu_exec_control(v); >>>>> + vmx_vmcs_exit(v); >>>>> + } >>>>> +} >>>> While Razvan gave his ack already, I wonder whether it's really a >>>> good idea to put deeply VMX-specific code outside of a VMX-specific >>>> file. >>> Didn't I add "for monitor / vm_event parts Acked-by: ..."? If I didn't, >>> I meant to. >> Well - this is a monitor file (monitor.c). > Fair enough, I should have been more detailed here. I do see the merit > of your suggestion, and so FWIW I second your suggestion to move the > code to some VMX-specific part of the tree if possible. But did you also see my reply concerning this suggestion? :-) > > Thanks, > Razvan > Thanks, Corneliu.
>>> On 04.07.16 at 13:02, <czuzu@bitdefender.com> wrote: > On 7/4/2016 1:22 PM, Jan Beulich wrote: >>>>> On 30.06.16 at 20:43, <czuzu@bitdefender.com> wrote: >>> @@ -119,6 +156,55 @@ bool_t monitored_msr(const struct domain *d, u32 msr) >>> return test_bit(msr, bitmap); >>> } >>> >>> +static void write_ctrlreg_adjust_traps(struct domain *d) >>> +{ >>> + struct vcpu *v; >>> + struct arch_vmx_struct *avmx; >>> + unsigned int cr3_bitmask; >>> + bool_t cr3_vmevent, cr3_ldexit; >>> + >>> + /* Adjust CR3 load-exiting. */ >>> + >>> + /* vmx only */ >>> + ASSERT(cpu_has_vmx); >>> + >>> + /* non-hap domains trap CR3 writes unconditionally */ >>> + if ( !paging_mode_hap(d) ) >>> + { >>> + for_each_vcpu ( d, v ) >>> + ASSERT(v->arch.hvm_vmx.exec_control & CPU_BASED_CR3_LOAD_EXITING); >>> + return; >>> + } >>> + >>> + cr3_bitmask = monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3); >>> + cr3_vmevent = !!(d->arch.monitor.write_ctrlreg_enabled & cr3_bitmask); >>> + >>> + for_each_vcpu ( d, v ) >>> + { >>> + avmx = &v->arch.hvm_vmx; >>> + cr3_ldexit = !!(avmx->exec_control & CPU_BASED_CR3_LOAD_EXITING); >>> + >>> + if ( cr3_vmevent == cr3_ldexit ) >>> + continue; >>> + >>> + /* >>> + * If CR0.PE=0, CR3 load exiting must remain enabled. >>> + * See vmx_update_guest_cr code motion for cr = 0. >>> + */ >>> + if ( cr3_ldexit && !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v) >>> ) >>> + continue; >>> + >>> + if ( cr3_vmevent ) >>> + avmx->exec_control |= CPU_BASED_CR3_LOAD_EXITING; >>> + else >>> + avmx->exec_control &= ~CPU_BASED_CR3_LOAD_EXITING; >>> + >>> + vmx_vmcs_enter(v); >>> + vmx_update_cpu_exec_control(v); >>> + vmx_vmcs_exit(v); >>> + } >>> +} >> While Razvan gave his ack already, I wonder whether it's really a >> good idea to put deeply VMX-specific code outside of a VMX-specific >> file. > > Well, a summary of what this function does would sound like: "adjusts > CR3 load-exiting for cr-write monitor vm-events". IMHO that's (monitor) > vm-event specific enough to be placed within the vm-event subsystem. > Could you suggest concretely how this separation would look like? (where > to put this function/parts of it (and what parts), what name should it > have once moved). I won't go into that level of detail. Fact is that VMX-specific code should be kept out of here. Whether you move the entire function behind a hvm_funcs hook or just part of it is of no interest to me. In no case should, if and when SVM eventually gets supported for vm-event/monitor too, this function end up doing both VMX and SVM specific things. Jan
On 7/4/2016 5:58 PM, Jan Beulich wrote: >>>> On 04.07.16 at 13:02, <czuzu@bitdefender.com> wrote: >> On 7/4/2016 1:22 PM, Jan Beulich wrote: >>>>>> On 30.06.16 at 20:43, <czuzu@bitdefender.com> wrote: >>>> @@ -119,6 +156,55 @@ bool_t monitored_msr(const struct domain *d, u32 msr) >>>> return test_bit(msr, bitmap); >>>> } >>>> >>>> +static void write_ctrlreg_adjust_traps(struct domain *d) >>>> +{ >>>> + struct vcpu *v; >>>> + struct arch_vmx_struct *avmx; >>>> + unsigned int cr3_bitmask; >>>> + bool_t cr3_vmevent, cr3_ldexit; >>>> + >>>> + /* Adjust CR3 load-exiting. */ >>>> + >>>> + /* vmx only */ >>>> + ASSERT(cpu_has_vmx); >>>> + >>>> + /* non-hap domains trap CR3 writes unconditionally */ >>>> + if ( !paging_mode_hap(d) ) >>>> + { >>>> + for_each_vcpu ( d, v ) >>>> + ASSERT(v->arch.hvm_vmx.exec_control & CPU_BASED_CR3_LOAD_EXITING); >>>> + return; >>>> + } >>>> + >>>> + cr3_bitmask = monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3); >>>> + cr3_vmevent = !!(d->arch.monitor.write_ctrlreg_enabled & cr3_bitmask); >>>> + >>>> + for_each_vcpu ( d, v ) >>>> + { >>>> + avmx = &v->arch.hvm_vmx; >>>> + cr3_ldexit = !!(avmx->exec_control & CPU_BASED_CR3_LOAD_EXITING); >>>> + >>>> + if ( cr3_vmevent == cr3_ldexit ) >>>> + continue; >>>> + >>>> + /* >>>> + * If CR0.PE=0, CR3 load exiting must remain enabled. >>>> + * See vmx_update_guest_cr code motion for cr = 0. >>>> + */ >>>> + if ( cr3_ldexit && !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v) >>>> ) >>>> + continue; >>>> + >>>> + if ( cr3_vmevent ) >>>> + avmx->exec_control |= CPU_BASED_CR3_LOAD_EXITING; >>>> + else >>>> + avmx->exec_control &= ~CPU_BASED_CR3_LOAD_EXITING; >>>> + >>>> + vmx_vmcs_enter(v); >>>> + vmx_update_cpu_exec_control(v); >>>> + vmx_vmcs_exit(v); >>>> + } >>>> +} >>> While Razvan gave his ack already, I wonder whether it's really a >>> good idea to put deeply VMX-specific code outside of a VMX-specific >>> file. >> Well, a summary of what this function does would sound like: "adjusts >> CR3 load-exiting for cr-write monitor vm-events". IMHO that's (monitor) >> vm-event specific enough to be placed within the vm-event subsystem. >> Could you suggest concretely how this separation would look like? (where >> to put this function/parts of it (and what parts), what name should it >> have once moved). > I won't go into that level of detail. Fact is that VMX-specific code > should be kept out of here. Whether you move the entire function > behind a hvm_funcs hook or just part of it is of no interest to me. > In no case should, if and when SVM eventually gets supported for > vm-event/monitor too, this function end up doing both VMX and SVM > specific things. > > Jan Why move it behind a hvm_funcs hook if it's only valid for VMX? SVM support is not currently implemented, hence the ASSERT(cpu_has_vmx) at the beginning of the function. And of course if @ some point SVM support will be implemented then the right thing to do is what you say, i.e. make this function part of hvm_function_table, but until then I don't see why we should do that. Note that arch_monitor_get_capabilities also returns no capability at the moment if !cpu_has_vmx. What if I move the vmx-specific parts to vmx.c in a function called something like vmx_vm_event_update_cr3_traps() and call it from write_ctrlreg_adjust_traps instead?... Corneliu.
>>> On 04.07.16 at 18:00, <czuzu@bitdefender.com> wrote: > On 7/4/2016 5:58 PM, Jan Beulich wrote: >>>>> On 04.07.16 at 13:02, <czuzu@bitdefender.com> wrote: >>> On 7/4/2016 1:22 PM, Jan Beulich wrote: >>>>>>> On 30.06.16 at 20:43, <czuzu@bitdefender.com> wrote: >>>>> @@ -119,6 +156,55 @@ bool_t monitored_msr(const struct domain *d, u32 msr) >>>>> return test_bit(msr, bitmap); >>>>> } >>>>> >>>>> +static void write_ctrlreg_adjust_traps(struct domain *d) >>>>> +{ >>>>> + struct vcpu *v; >>>>> + struct arch_vmx_struct *avmx; >>>>> + unsigned int cr3_bitmask; >>>>> + bool_t cr3_vmevent, cr3_ldexit; >>>>> + >>>>> + /* Adjust CR3 load-exiting. */ >>>>> + >>>>> + /* vmx only */ >>>>> + ASSERT(cpu_has_vmx); >>>>> + >>>>> + /* non-hap domains trap CR3 writes unconditionally */ >>>>> + if ( !paging_mode_hap(d) ) >>>>> + { >>>>> + for_each_vcpu ( d, v ) >>>>> + ASSERT(v->arch.hvm_vmx.exec_control & CPU_BASED_CR3_LOAD_EXITING); >>>>> + return; >>>>> + } >>>>> + >>>>> + cr3_bitmask = monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3); >>>>> + cr3_vmevent = !!(d->arch.monitor.write_ctrlreg_enabled & cr3_bitmask); >>>>> + >>>>> + for_each_vcpu ( d, v ) >>>>> + { >>>>> + avmx = &v->arch.hvm_vmx; >>>>> + cr3_ldexit = !!(avmx->exec_control & CPU_BASED_CR3_LOAD_EXITING); >>>>> + >>>>> + if ( cr3_vmevent == cr3_ldexit ) >>>>> + continue; >>>>> + >>>>> + /* >>>>> + * If CR0.PE=0, CR3 load exiting must remain enabled. >>>>> + * See vmx_update_guest_cr code motion for cr = 0. >>>>> + */ >>>>> + if ( cr3_ldexit && !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v) >>>>> ) >>>>> + continue; >>>>> + >>>>> + if ( cr3_vmevent ) >>>>> + avmx->exec_control |= CPU_BASED_CR3_LOAD_EXITING; >>>>> + else >>>>> + avmx->exec_control &= ~CPU_BASED_CR3_LOAD_EXITING; >>>>> + >>>>> + vmx_vmcs_enter(v); >>>>> + vmx_update_cpu_exec_control(v); >>>>> + vmx_vmcs_exit(v); >>>>> + } >>>>> +} >>>> While Razvan gave his ack already, I wonder whether it's really a >>>> good idea to put deeply VMX-specific code outside of a VMX-specific >>>> file. >>> Well, a summary of what this function does would sound like: "adjusts >>> CR3 load-exiting for cr-write monitor vm-events". IMHO that's (monitor) >>> vm-event specific enough to be placed within the vm-event subsystem. >>> Could you suggest concretely how this separation would look like? (where >>> to put this function/parts of it (and what parts), what name should it >>> have once moved). >> I won't go into that level of detail. Fact is that VMX-specific code >> should be kept out of here. Whether you move the entire function >> behind a hvm_funcs hook or just part of it is of no interest to me. >> In no case should, if and when SVM eventually gets supported for >> vm-event/monitor too, this function end up doing both VMX and SVM >> specific things. > > Why move it behind a hvm_funcs hook if it's only valid for VMX? SVM > support is not currently implemented, hence the ASSERT(cpu_has_vmx) at > the beginning of the function. > And of course if @ some point SVM support will be implemented then the > right thing to do is what you say, i.e. make this function part of > hvm_function_table, but until then I don't see why we should do that. > Note that arch_monitor_get_capabilities also returns no capability at > the moment if !cpu_has_vmx. > > What if I move the vmx-specific parts to vmx.c in a function called > something like vmx_vm_event_update_cr3_traps() and call it from > write_ctrlreg_adjust_traps instead?... That would be fine with me. Jan
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index c89ab6e..5481a6e 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -475,8 +475,6 @@ void hvm_do_resume(struct vcpu *v) if ( unlikely(v->arch.vm_event) ) { - struct monitor_write_data *w = &v->arch.vm_event->write_data; - if ( v->arch.vm_event->emulate_flags ) { enum emul_kind kind = EMUL_KIND_NORMAL; @@ -493,32 +491,10 @@ void hvm_do_resume(struct vcpu *v) v->arch.vm_event->emulate_flags = 0; } - - if ( w->do_write.msr ) - { - hvm_msr_write_intercept(w->msr, w->value, 0); - w->do_write.msr = 0; - } - - if ( w->do_write.cr0 ) - { - hvm_set_cr0(w->cr0, 0); - w->do_write.cr0 = 0; - } - - if ( w->do_write.cr4 ) - { - hvm_set_cr4(w->cr4, 0); - w->do_write.cr4 = 0; - } - - if ( w->do_write.cr3 ) - { - hvm_set_cr3(w->cr3, 0); - w->do_write.cr3 = 0; - } } + arch_monitor_write_data(v); + /* Inject pending hw/sw trap */ if ( v->arch.hvm_vcpu.inject_trap.vector != -1 ) { @@ -2206,7 +2182,10 @@ int hvm_set_cr0(unsigned long value, bool_t may_defer) if ( hvm_monitor_crX(CR0, value, old_value) ) { - /* The actual write will occur in hvm_do_resume(), if permitted. */ + /* + * The actual write will occur in arch_monitor_write_data(), if + * permitted. + */ v->arch.vm_event->write_data.do_write.cr0 = 1; v->arch.vm_event->write_data.cr0 = value; @@ -2308,7 +2287,10 @@ int hvm_set_cr3(unsigned long value, bool_t may_defer) if ( hvm_monitor_crX(CR3, value, old) ) { - /* The actual write will occur in hvm_do_resume(), if permitted. */ + /* + * The actual write will occur in arch_monitor_write_data(), if + * permitted. + */ v->arch.vm_event->write_data.do_write.cr3 = 1; v->arch.vm_event->write_data.cr3 = value; @@ -2388,7 +2370,10 @@ int hvm_set_cr4(unsigned long value, bool_t may_defer) if ( hvm_monitor_crX(CR4, value, old_cr) ) { - /* The actual write will occur in hvm_do_resume(), if permitted. */ + /* + * The actual write will occur in arch_monitor_write_data(), if + * permitted. + */ v->arch.vm_event->write_data.do_write.cr4 = 1; v->arch.vm_event->write_data.cr4 = value; @@ -3767,7 +3752,10 @@ int hvm_msr_write_intercept(unsigned int msr, uint64_t msr_content, { ASSERT(v->arch.vm_event); - /* The actual write will occur in hvm_do_resume() (if permitted). */ + /* + * The actual write will occur in arch_monitor_write_data(), if + * permitted. + */ v->arch.vm_event->write_data.do_write.msr = 1; v->arch.vm_event->write_data.msr = msr; v->arch.vm_event->write_data.value = msr_content; diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c index 15c84c2..de04e6c 100644 --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -1442,11 +1442,6 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr) if ( !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v) ) v->arch.hvm_vmx.exec_control |= cr3_ctls; - /* Trap CR3 updates if CR3 memory events are enabled. */ - if ( v->domain->arch.monitor.write_ctrlreg_enabled & - monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3) ) - v->arch.hvm_vmx.exec_control |= CPU_BASED_CR3_LOAD_EXITING; - if ( old_ctls != v->arch.hvm_vmx.exec_control ) vmx_update_cpu_exec_control(v); } diff --git a/xen/arch/x86/monitor.c b/xen/arch/x86/monitor.c index a271161..90e4856 100644 --- a/xen/arch/x86/monitor.c +++ b/xen/arch/x86/monitor.c @@ -20,6 +20,9 @@ */ #include <asm/monitor.h> +#include <asm/paging.h> +#include <asm/hvm/vmx/vmx.h> +#include <asm/vm_event.h> #include <public/vm_event.h> int arch_monitor_init_domain(struct domain *d) @@ -41,6 +44,40 @@ void arch_monitor_cleanup_domain(struct domain *d) memset(&d->monitor, 0, sizeof(d->monitor)); } +void arch_monitor_write_data(struct vcpu *v) +{ + struct monitor_write_data *w; + + if ( likely(!v->arch.vm_event) ) + return; + + w = &v->arch.vm_event->write_data; + + if ( w->do_write.msr ) + { + hvm_msr_write_intercept(w->msr, w->value, 0); + w->do_write.msr = 0; + } + + if ( w->do_write.cr0 ) + { + hvm_set_cr0(w->cr0, 0); + w->do_write.cr0 = 0; + } + + if ( w->do_write.cr4 ) + { + hvm_set_cr4(w->cr4, 0); + w->do_write.cr4 = 0; + } + + if ( w->do_write.cr3 ) + { + hvm_set_cr3(w->cr3, 0); + w->do_write.cr3 = 0; + } +} + static unsigned long *monitor_bitmap_for_msr(const struct domain *d, u32 *msr) { ASSERT(d->arch.monitor.msr_bitmap && msr); @@ -119,6 +156,55 @@ bool_t monitored_msr(const struct domain *d, u32 msr) return test_bit(msr, bitmap); } +static void write_ctrlreg_adjust_traps(struct domain *d) +{ + struct vcpu *v; + struct arch_vmx_struct *avmx; + unsigned int cr3_bitmask; + bool_t cr3_vmevent, cr3_ldexit; + + /* Adjust CR3 load-exiting. */ + + /* vmx only */ + ASSERT(cpu_has_vmx); + + /* non-hap domains trap CR3 writes unconditionally */ + if ( !paging_mode_hap(d) ) + { + for_each_vcpu ( d, v ) + ASSERT(v->arch.hvm_vmx.exec_control & CPU_BASED_CR3_LOAD_EXITING); + return; + } + + cr3_bitmask = monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3); + cr3_vmevent = !!(d->arch.monitor.write_ctrlreg_enabled & cr3_bitmask); + + for_each_vcpu ( d, v ) + { + avmx = &v->arch.hvm_vmx; + cr3_ldexit = !!(avmx->exec_control & CPU_BASED_CR3_LOAD_EXITING); + + if ( cr3_vmevent == cr3_ldexit ) + continue; + + /* + * If CR0.PE=0, CR3 load exiting must remain enabled. + * See vmx_update_guest_cr code motion for cr = 0. + */ + if ( cr3_ldexit && !hvm_paging_enabled(v) && !vmx_unrestricted_guest(v) ) + continue; + + if ( cr3_vmevent ) + avmx->exec_control |= CPU_BASED_CR3_LOAD_EXITING; + else + avmx->exec_control &= ~CPU_BASED_CR3_LOAD_EXITING; + + vmx_vmcs_enter(v); + vmx_update_cpu_exec_control(v); + vmx_vmcs_exit(v); + } +} + int arch_monitor_domctl_event(struct domain *d, struct xen_domctl_monitor_op *mop) { @@ -159,13 +245,7 @@ int arch_monitor_domctl_event(struct domain *d, else ad->monitor.write_ctrlreg_enabled &= ~ctrlreg_bitmask; - if ( VM_EVENT_X86_CR3 == mop->u.mov_to_cr.index ) - { - struct vcpu *v; - /* Latches new CR3 mask through CR0 code. */ - for_each_vcpu ( d, v ) - hvm_update_guest_cr(v, 0); - } + write_ctrlreg_adjust_traps(d); domain_unpause(d); diff --git a/xen/include/asm-x86/monitor.h b/xen/include/asm-x86/monitor.h index 94b6768..1068376 100644 --- a/xen/include/asm-x86/monitor.h +++ b/xen/include/asm-x86/monitor.h @@ -93,6 +93,8 @@ int arch_monitor_init_domain(struct domain *d); void arch_monitor_cleanup_domain(struct domain *d); +void arch_monitor_write_data(struct vcpu *v); + bool_t monitored_msr(const struct domain *d, u32 msr); #endif /* __ASM_X86_MONITOR_H__ */
For readability: * Add function arch_monitor_write_data (in x86/monitor.c) and separate handling of monitor_write_data there (previously done directly in hvm_do_resume). * Add function write_ctrlreg_adjust_traps (in x86/monitor.c) and relocate enabling/disabling of CPU_BASED_CR3_LOAD_EXITING for CR3 monitor vm-events there (previously done through CR0 node @ vmx_update_guest_cr(v, 0)). Signed-off-by: Corneliu ZUZU <czuzu@bitdefender.com> --- xen/arch/x86/hvm/hvm.c | 48 +++++++++------------- xen/arch/x86/hvm/vmx/vmx.c | 5 --- xen/arch/x86/monitor.c | 94 +++++++++++++++++++++++++++++++++++++++---- xen/include/asm-x86/monitor.h | 2 + 4 files changed, 107 insertions(+), 42 deletions(-)