Message ID | 4caa0f67589ae3c22c311ee0e6139496902f2edc.1658159083.git.maciej.szmigiero@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: nSVM: Pull CS.Base from actual VMCB12 for soft int/ex re-injection | expand |
On 7/18/22 17:47, Maciej S. Szmigiero wrote: > From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> > > enter_svm_guest_mode() first calls nested_vmcb02_prepare_control() to copy > control fields from VMCB12 to the current VMCB, then > nested_vmcb02_prepare_save() to perform a similar copy of the save area. > > This means that nested_vmcb02_prepare_control() still runs with the > previous save area values in the current VMCB so it shouldn't take the L2 > guest CS.Base from this area. > > Explicitly pull CS.Base from the actual VMCB12 instead in > enter_svm_guest_mode(). > > Granted, having a non-zero CS.Base is a very rare thing (and even > impossible in 64-bit mode), having it change between nested VMRUNs is > probably even rarer, but if it happens it would create a really subtle bug > so it's better to fix it upfront. > > Fixes: 6ef88d6e36c2 ("KVM: SVM: Re-inject INT3/INTO instead of retrying the instruction") > Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> > --- > arch/x86/kvm/svm/nested.c | 9 +++++---- > 1 file changed, 5 insertions(+), 4 deletions(-) Queued, thanks. Paolo > diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c > index adf4120b05d90..23252ab821941 100644 > --- a/arch/x86/kvm/svm/nested.c > +++ b/arch/x86/kvm/svm/nested.c > @@ -639,7 +639,8 @@ static bool is_evtinj_nmi(u32 evtinj) > } > > static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, > - unsigned long vmcb12_rip) > + unsigned long vmcb12_rip, > + unsigned long vmcb12_csbase) > { > u32 int_ctl_vmcb01_bits = V_INTR_MASKING_MASK; > u32 int_ctl_vmcb12_bits = V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK; > @@ -711,7 +712,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, > svm->nmi_l1_to_l2 = is_evtinj_nmi(vmcb02->control.event_inj); > if (is_evtinj_soft(vmcb02->control.event_inj)) { > svm->soft_int_injected = true; > - svm->soft_int_csbase = svm->vmcb->save.cs.base; > + svm->soft_int_csbase = vmcb12_csbase; > svm->soft_int_old_rip = vmcb12_rip; > if (svm->nrips_enabled) > svm->soft_int_next_rip = svm->nested.ctl.next_rip; > @@ -800,7 +801,7 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa, > nested_svm_copy_common_state(svm->vmcb01.ptr, svm->nested.vmcb02.ptr); > > svm_switch_vmcb(svm, &svm->nested.vmcb02); > - nested_vmcb02_prepare_control(svm, vmcb12->save.rip); > + nested_vmcb02_prepare_control(svm, vmcb12->save.rip, vmcb12->save.cs.base); > nested_vmcb02_prepare_save(svm, vmcb12); > > ret = nested_svm_load_cr3(&svm->vcpu, svm->nested.save.cr3, > @@ -1663,7 +1664,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, > nested_copy_vmcb_control_to_cache(svm, ctl); > > svm_switch_vmcb(svm, &svm->nested.vmcb02); > - nested_vmcb02_prepare_control(svm, svm->vmcb->save.rip); > + nested_vmcb02_prepare_control(svm, svm->vmcb->save.rip, svm->vmcb->save.cs.base); > > /* > * While the nested guest CR3 is already checked and set by >
On Mon, 2022-07-18 at 17:47 +0200, Maciej S. Szmigiero wrote: > From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> > > enter_svm_guest_mode() first calls nested_vmcb02_prepare_control() to copy > control fields from VMCB12 to the current VMCB, then > nested_vmcb02_prepare_save() to perform a similar copy of the save area. > > This means that nested_vmcb02_prepare_control() still runs with the > previous save area values in the current VMCB so it shouldn't take the L2 > guest CS.Base from this area. > > Explicitly pull CS.Base from the actual VMCB12 instead in > enter_svm_guest_mode(). > > Granted, having a non-zero CS.Base is a very rare thing (and even > impossible in 64-bit mode), having it change between nested VMRUNs is > probably even rarer, but if it happens it would create a really subtle bug > so it's better to fix it upfront. > > Fixes: 6ef88d6e36c2 ("KVM: SVM: Re-inject INT3/INTO instead of retrying the instruction") > Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> > --- > arch/x86/kvm/svm/nested.c | 9 +++++---- > 1 file changed, 5 insertions(+), 4 deletions(-) > > diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c > index adf4120b05d90..23252ab821941 100644 > --- a/arch/x86/kvm/svm/nested.c > +++ b/arch/x86/kvm/svm/nested.c > @@ -639,7 +639,8 @@ static bool is_evtinj_nmi(u32 evtinj) > } > > static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, > - unsigned long vmcb12_rip) > + unsigned long vmcb12_rip, > + unsigned long vmcb12_csbase) Honestly I don't like that nested_vmcb02_prepare_control starts to grow its parameter list, because it kind of defeats the purpose of vmcb12 cache we added back then. I think that it is better to add csbase/rip to vmcb_save_area_cached, but I am not 100% sure. What do you think? Best regards, Maxim Levitsky > { > u32 int_ctl_vmcb01_bits = V_INTR_MASKING_MASK; > u32 int_ctl_vmcb12_bits = V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK; > @@ -711,7 +712,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, > svm->nmi_l1_to_l2 = is_evtinj_nmi(vmcb02->control.event_inj); > if (is_evtinj_soft(vmcb02->control.event_inj)) { > svm->soft_int_injected = true; > - svm->soft_int_csbase = svm->vmcb->save.cs.base; > + svm->soft_int_csbase = vmcb12_csbase; > svm->soft_int_old_rip = vmcb12_rip; > if (svm->nrips_enabled) > svm->soft_int_next_rip = svm->nested.ctl.next_rip; > @@ -800,7 +801,7 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa, > nested_svm_copy_common_state(svm->vmcb01.ptr, svm->nested.vmcb02.ptr); > > svm_switch_vmcb(svm, &svm->nested.vmcb02); > - nested_vmcb02_prepare_control(svm, vmcb12->save.rip); > + nested_vmcb02_prepare_control(svm, vmcb12->save.rip, vmcb12->save.cs.base); > nested_vmcb02_prepare_save(svm, vmcb12); > > ret = nested_svm_load_cr3(&svm->vcpu, svm->nested.save.cr3, > @@ -1663,7 +1664,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, > nested_copy_vmcb_control_to_cache(svm, ctl); > > svm_switch_vmcb(svm, &svm->nested.vmcb02); > - nested_vmcb02_prepare_control(svm, svm->vmcb->save.rip); > + nested_vmcb02_prepare_control(svm, svm->vmcb->save.rip, svm->vmcb->save.cs.base); > > /* > * While the nested guest CR3 is already checked and set by >
On 20.07.2022 10:43, Maxim Levitsky wrote: > On Mon, 2022-07-18 at 17:47 +0200, Maciej S. Szmigiero wrote: >> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> >> >> enter_svm_guest_mode() first calls nested_vmcb02_prepare_control() to copy >> control fields from VMCB12 to the current VMCB, then >> nested_vmcb02_prepare_save() to perform a similar copy of the save area. >> >> This means that nested_vmcb02_prepare_control() still runs with the >> previous save area values in the current VMCB so it shouldn't take the L2 >> guest CS.Base from this area. >> >> Explicitly pull CS.Base from the actual VMCB12 instead in >> enter_svm_guest_mode(). >> >> Granted, having a non-zero CS.Base is a very rare thing (and even >> impossible in 64-bit mode), having it change between nested VMRUNs is >> probably even rarer, but if it happens it would create a really subtle bug >> so it's better to fix it upfront. >> >> Fixes: 6ef88d6e36c2 ("KVM: SVM: Re-inject INT3/INTO instead of retrying the instruction") >> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> >> --- >> arch/x86/kvm/svm/nested.c | 9 +++++---- >> 1 file changed, 5 insertions(+), 4 deletions(-) >> >> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c >> index adf4120b05d90..23252ab821941 100644 >> --- a/arch/x86/kvm/svm/nested.c >> +++ b/arch/x86/kvm/svm/nested.c >> @@ -639,7 +639,8 @@ static bool is_evtinj_nmi(u32 evtinj) >> } >> >> static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, >> - unsigned long vmcb12_rip) >> + unsigned long vmcb12_rip, >> + unsigned long vmcb12_csbase) > > Honestly I don't like that nested_vmcb02_prepare_control starts to grow its parameter list, > because it kind of defeats the purpose of vmcb12 cache we added back then. > > I think that it is better to add csbase/rip to vmcb_save_area_cached, > but I am not 100% sure. What do you think? This function has only 3 parameters now, so they fit well into registers without taking any extra memory (even assuming it won't get inlined). If in the future more parameters need to be added to this function (which may or may not happen) then they all can be moved to, for example, vmcb_ctrl_area_cached. > Best regards, > Maxim Levitsky > > Thanks, Maciej
On Wed, Jul 20, 2022, Maciej S. Szmigiero wrote: > On 20.07.2022 10:43, Maxim Levitsky wrote: > > On Mon, 2022-07-18 at 17:47 +0200, Maciej S. Szmigiero wrote: > > > Fixes: 6ef88d6e36c2 ("KVM: SVM: Re-inject INT3/INTO instead of retrying the instruction") > > > Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> > > > --- > > > arch/x86/kvm/svm/nested.c | 9 +++++---- > > > 1 file changed, 5 insertions(+), 4 deletions(-) > > > > > > diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c > > > index adf4120b05d90..23252ab821941 100644 > > > --- a/arch/x86/kvm/svm/nested.c > > > +++ b/arch/x86/kvm/svm/nested.c > > > @@ -639,7 +639,8 @@ static bool is_evtinj_nmi(u32 evtinj) > > > } > > > static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, > > > - unsigned long vmcb12_rip) > > > + unsigned long vmcb12_rip, > > > + unsigned long vmcb12_csbase) > > > > Honestly I don't like that nested_vmcb02_prepare_control starts to grow its parameter list, > > because it kind of defeats the purpose of vmcb12 cache we added back then. > > > > I think that it is better to add csbase/rip to vmcb_save_area_cached, > > but I am not 100% sure. What do you think? > > This function has only 3 parameters now, so they fit well into registers > without taking any extra memory (even assuming it won't get inlined). > > If in the future more parameters need to be added to this function > (which may or may not happen) then they all can be moved to, for example, > vmcb_ctrl_area_cached. I don't think Maxim is concerned about the size, rather that we have a dedicated struct for snapshotting select save state and aren't using it. IIRC, I deliberately avoided using the "cache" because the main/original purpose of the cache is to avoid TOCTOU issues. And because RIP and CS.base aren't checked, there's no need to throw them in the cache.
On Wed, 2022-07-20 at 21:34 +0000, Sean Christopherson wrote: > On Wed, Jul 20, 2022, Maciej S. Szmigiero wrote: > > On 20.07.2022 10:43, Maxim Levitsky wrote: > > > On Mon, 2022-07-18 at 17:47 +0200, Maciej S. Szmigiero wrote: > > > > Fixes: 6ef88d6e36c2 ("KVM: SVM: Re-inject INT3/INTO instead of retrying the instruction") > > > > Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> > > > > --- > > > > arch/x86/kvm/svm/nested.c | 9 +++++---- > > > > 1 file changed, 5 insertions(+), 4 deletions(-) > > > > > > > > diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c > > > > index adf4120b05d90..23252ab821941 100644 > > > > --- a/arch/x86/kvm/svm/nested.c > > > > +++ b/arch/x86/kvm/svm/nested.c > > > > @@ -639,7 +639,8 @@ static bool is_evtinj_nmi(u32 evtinj) > > > > } > > > > static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, > > > > - unsigned long vmcb12_rip) > > > > + unsigned long vmcb12_rip, > > > > + unsigned long vmcb12_csbase) > > > > > > Honestly I don't like that nested_vmcb02_prepare_control starts to grow its parameter list, > > > because it kind of defeats the purpose of vmcb12 cache we added back then. > > > > > > I think that it is better to add csbase/rip to vmcb_save_area_cached, > > > but I am not 100% sure. What do you think? > > > > This function has only 3 parameters now, so they fit well into registers > > without taking any extra memory (even assuming it won't get inlined). > > > > If in the future more parameters need to be added to this function > > (which may or may not happen) then they all can be moved to, for example, > > vmcb_ctrl_area_cached. > > I don't think Maxim is concerned about the size, rather that we have a dedicated > struct for snapshotting select save state and aren't using it. Yes my thoughts exactly. The thing is that passing these values as parameters to this function also works like a cache, but not going through the cache that we already have for this purpose. Anyway for now let it be, but we might need to rethink it in the future. Best regards, Maxim Levitsky > > IIRC, I deliberately avoided using the "cache" because the main/original purpose > of the cache is to avoid TOCTOU issues. And because RIP and CS.base aren't checked, > there's no need to throw them in the cache.
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index adf4120b05d90..23252ab821941 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -639,7 +639,8 @@ static bool is_evtinj_nmi(u32 evtinj) } static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, - unsigned long vmcb12_rip) + unsigned long vmcb12_rip, + unsigned long vmcb12_csbase) { u32 int_ctl_vmcb01_bits = V_INTR_MASKING_MASK; u32 int_ctl_vmcb12_bits = V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK; @@ -711,7 +712,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, svm->nmi_l1_to_l2 = is_evtinj_nmi(vmcb02->control.event_inj); if (is_evtinj_soft(vmcb02->control.event_inj)) { svm->soft_int_injected = true; - svm->soft_int_csbase = svm->vmcb->save.cs.base; + svm->soft_int_csbase = vmcb12_csbase; svm->soft_int_old_rip = vmcb12_rip; if (svm->nrips_enabled) svm->soft_int_next_rip = svm->nested.ctl.next_rip; @@ -800,7 +801,7 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa, nested_svm_copy_common_state(svm->vmcb01.ptr, svm->nested.vmcb02.ptr); svm_switch_vmcb(svm, &svm->nested.vmcb02); - nested_vmcb02_prepare_control(svm, vmcb12->save.rip); + nested_vmcb02_prepare_control(svm, vmcb12->save.rip, vmcb12->save.cs.base); nested_vmcb02_prepare_save(svm, vmcb12); ret = nested_svm_load_cr3(&svm->vcpu, svm->nested.save.cr3, @@ -1663,7 +1664,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, nested_copy_vmcb_control_to_cache(svm, ctl); svm_switch_vmcb(svm, &svm->nested.vmcb02); - nested_vmcb02_prepare_control(svm, svm->vmcb->save.rip); + nested_vmcb02_prepare_control(svm, svm->vmcb->save.rip, svm->vmcb->save.cs.base); /* * While the nested guest CR3 is already checked and set by