Message ID | 20210813071211.1635310-4-david.edmondson@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: x86: Convey the exit reason, etc. to user-space on emulation failure | expand |
On Fri, Aug 13, 2021, David Edmondson wrote: > -static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu) > +static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data, > + u8 ndata, u8 *insn_bytes, u8 insn_size) > { > - struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; > - u32 insn_size = ctxt->fetch.end - ctxt->fetch.data; > struct kvm_run *run = vcpu->run; > + u8 ndata_start; > + u64 info[5]; > + > + /* > + * Zero the whole array used to retrieve the exit info, casting to u32 > + * for select entries will leave some chunks uninitialized. > + */ > + memset(&info, 0, sizeof(info)); > + > + static_call(kvm_x86_get_exit_info)(vcpu, (u32 *)&info[0], &info[1], > + &info[2], (u32 *)&info[3], > + (u32 *)&info[4]); > > run->exit_reason = KVM_EXIT_INTERNAL_ERROR; > run->emulation_failure.suberror = KVM_INTERNAL_ERROR_EMULATION; > - run->emulation_failure.ndata = 0; > + > + /* > + * There's currently space for 13 entries, but 5 are used for the exit > + * reason and info. Restrict to 4 to reduce the maintenance burden > + * when expanding kvm_run.emulation_failure in the future. > + */ > + if (WARN_ON_ONCE(ndata > 4)) > + ndata = 4; > + > + /* Always include the flags as a 'data' entry. */ > + ndata_start = 1; > run->emulation_failure.flags = 0; > > if (insn_size) { > - run->emulation_failure.ndata = 3; > + ndata_start += (sizeof(run->emulation_failure.insn_size) + > + sizeof(run->emulation_failure.insn_bytes)) / > + sizeof(u64); Hrm, I like the intent, but the end result ends up being rather convoluted and unnecessarily scary, e.g. this would do the wrong thing if the combined size of the fields is not a multiple of 8. That's obviously is not true, but relying on insn_size/insn_bytes being carefully selected while simultaneously obscuring that dependency is a bit mean. What about a compile-time assertion with a more reader friendly literal for bumping the count? BUILD_BUG_ON((sizeof(run->emulation_failure.insn_size) + sizeof(run->emulation_failure.insn_bytes) != 16)); ndata_start += 2; > run->emulation_failure.flags |= > KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES; > run->emulation_failure.insn_size = insn_size; > memset(run->emulation_failure.insn_bytes, 0x90, > sizeof(run->emulation_failure.insn_bytes)); > - memcpy(run->emulation_failure.insn_bytes, > - ctxt->fetch.data, insn_size); > + memcpy(run->emulation_failure.insn_bytes, insn_bytes, insn_size); > } > + > + memcpy(&run->internal.data[ndata_start], info, sizeof(info)); Oof, coming back to this code after some time away, "ndata_start" is confusing. I believe past me thought that it would help convey that "info" is lumped into the arbitrary data, but for me at least it just ends up making the interaction with @data and @ndata more confusing. Sorry for the bad suggestion :-/ What about info_start? IMO, that makes the memcpy more readable. Another option would be to have the name describe the number of "ABI enries", but I can't come up with a variable name that's remotely readable. memcpy(&run->internal.data[info_start], info, sizeof(info)); memcpy(&run->internal.data[info_start + ARRAY_SIZE(info)], data, ndata * sizeof(data[0])); > + memcpy(&run->internal.data[ndata_start + ARRAY_SIZE(info)], data, > + ndata * sizeof(u64)); Not that it really matters, but it's probably better to use sizeof(data[0]) or sizeof(*data). E.g. if we do screw up the param in the future, we only botch the output formatting, as opposed to dumping kernel stack data to userspace. > + > + run->emulation_failure.ndata = ndata_start + ARRAY_SIZE(info) + ndata; > } > > +static void prepare_emulation_ctxt_failure_exit(struct kvm_vcpu *vcpu) > +{ > + struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; > + > + prepare_emulation_failure_exit(vcpu, NULL, 0, ctxt->fetch.data, > + ctxt->fetch.end - ctxt->fetch.data); > +} > + > +void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data, > + u8 ndata) > +{ > + prepare_emulation_failure_exit(vcpu, data, ndata, NULL, 0); > +} > +EXPORT_SYMBOL_GPL(__kvm_prepare_emulation_failure_exit); > + > +void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu) > +{ > + __kvm_prepare_emulation_failure_exit(vcpu, NULL, 0); > +} > +EXPORT_SYMBOL_GPL(kvm_prepare_emulation_failure_exit); > + > static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) > { > struct kvm *kvm = vcpu->kvm; > @@ -7502,16 +7551,14 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) > > if (kvm->arch.exit_on_emulation_error || > (emulation_type & EMULTYPE_SKIP)) { > - prepare_emulation_failure_exit(vcpu); > + prepare_emulation_ctxt_failure_exit(vcpu); > return 0; > } > > kvm_queue_exception(vcpu, UD_VECTOR); > > if (!is_guest_mode(vcpu) && static_call(kvm_x86_get_cpl)(vcpu) == 0) { > - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; > - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; > - vcpu->run->internal.ndata = 0; > + prepare_emulation_ctxt_failure_exit(vcpu); > return 0; > } > > @@ -12104,9 +12151,7 @@ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r, > * doesn't seem to be a real use-case behind such requests, just return > * KVM_EXIT_INTERNAL_ERROR for now. > */ > - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; > - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; > - vcpu->run->internal.ndata = 0; > + kvm_prepare_emulation_failure_exit(vcpu); > > return 0; > } > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index 6c79c1ce3703..e86cc2de7b5c 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -397,6 +397,12 @@ struct kvm_run { > * "ndata" is correct, that new fields are enumerated in "flags", > * and that each flag enumerates fields that are 64-bit aligned > * and sized (so that ndata+internal.data[] is valid/accurate). > + * > + * Space beyond the defined fields may be used to Please run these out to 80 chars. Even 80 is a soft limit, it's ok to run over a bit if the end result is (subjectively) prettier. > + * store arbitrary debug information relating to the > + * emulation failure. It is accounted for in "ndata" > + * but otherwise unspecified and is not represented in Explicitly state the format is unspecified? > + * "flags". And also explicitly stating the debug info isn't ABI, e.g. * Space beyond the defined fields may be used to store arbitrary * debug information relating to the emulation failure. It is * accounted for in "ndata" but the format is unspecified and * is not represented in "flags". Any such info is _not_ ABI! > */ > struct { > __u32 suberror; > @@ -408,6 +414,7 @@ struct kvm_run { > __u8 insn_bytes[15]; > }; > }; > + /* Arbitrary debug data may follow. */ > } emulation_failure; > /* KVM_EXIT_OSI */ > struct { > -- > 2.30.2 >
Sean Christopherson <seanjc@google.com> writes: > On Fri, Aug 13, 2021, David Edmondson wrote: >> -static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu) >> +static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data, >> + u8 ndata, u8 *insn_bytes, u8 insn_size) >> { >> - struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; >> - u32 insn_size = ctxt->fetch.end - ctxt->fetch.data; >> struct kvm_run *run = vcpu->run; >> + u8 ndata_start; >> + u64 info[5]; >> + >> + /* >> + * Zero the whole array used to retrieve the exit info, casting to u32 >> + * for select entries will leave some chunks uninitialized. >> + */ >> + memset(&info, 0, sizeof(info)); >> + >> + static_call(kvm_x86_get_exit_info)(vcpu, (u32 *)&info[0], &info[1], >> + &info[2], (u32 *)&info[3], >> + (u32 *)&info[4]); >> >> run->exit_reason = KVM_EXIT_INTERNAL_ERROR; >> run->emulation_failure.suberror = KVM_INTERNAL_ERROR_EMULATION; >> - run->emulation_failure.ndata = 0; >> + >> + /* >> + * There's currently space for 13 entries, but 5 are used for the exit >> + * reason and info. Restrict to 4 to reduce the maintenance burden >> + * when expanding kvm_run.emulation_failure in the future. >> + */ >> + if (WARN_ON_ONCE(ndata > 4)) >> + ndata = 4; >> + >> + /* Always include the flags as a 'data' entry. */ >> + ndata_start = 1; >> run->emulation_failure.flags = 0; >> >> if (insn_size) { >> - run->emulation_failure.ndata = 3; >> + ndata_start += (sizeof(run->emulation_failure.insn_size) + >> + sizeof(run->emulation_failure.insn_bytes)) / >> + sizeof(u64); > > Hrm, I like the intent, but the end result ends up being rather convoluted and > unnecessarily scary, e.g. this would do the wrong thing if the combined size of > the fields is not a multiple of 8. That's obviously is not true, but relying on > insn_size/insn_bytes being carefully selected while simultaneously obscuring that > dependency is a bit mean. What about a compile-time assertion with a more reader > friendly literal for bumping the count? > > BUILD_BUG_ON((sizeof(run->emulation_failure.insn_size) + > sizeof(run->emulation_failure.insn_bytes) != 16)); > ndata_start += 2; Okay. >> run->emulation_failure.flags |= >> KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES; >> run->emulation_failure.insn_size = insn_size; >> memset(run->emulation_failure.insn_bytes, 0x90, >> sizeof(run->emulation_failure.insn_bytes)); >> - memcpy(run->emulation_failure.insn_bytes, >> - ctxt->fetch.data, insn_size); >> + memcpy(run->emulation_failure.insn_bytes, insn_bytes, insn_size); >> } >> + >> + memcpy(&run->internal.data[ndata_start], info, sizeof(info)); > > Oof, coming back to this code after some time away, "ndata_start" is confusing. > I believe past me thought that it would help convey that "info" is lumped into > the arbitrary data, but for me at least it just ends up making the interaction > with @data and @ndata more confusing. Sorry for the bad suggestion :-/ > > What about info_start? IMO, that makes the memcpy more readable. Another option > would be to have the name describe the number of "ABI enries", but I can't come > up with a variable name that's remotely readable. > > memcpy(&run->internal.data[info_start], info, sizeof(info)); > memcpy(&run->internal.data[info_start + ARRAY_SIZE(info)], data, > ndata * sizeof(data[0])); Okay. >> + memcpy(&run->internal.data[ndata_start + ARRAY_SIZE(info)], data, >> + ndata * sizeof(u64)); > > Not that it really matters, but it's probably better to use sizeof(data[0]) or > sizeof(*data). E.g. if we do screw up the param in the future, we only botch the > output formatting, as opposed to dumping kernel stack data to userspace. Agreed. >> + >> + run->emulation_failure.ndata = ndata_start + ARRAY_SIZE(info) + ndata; >> } >> >> +static void prepare_emulation_ctxt_failure_exit(struct kvm_vcpu *vcpu) >> +{ >> + struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; >> + >> + prepare_emulation_failure_exit(vcpu, NULL, 0, ctxt->fetch.data, >> + ctxt->fetch.end - ctxt->fetch.data); >> +} >> + >> +void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data, >> + u8 ndata) >> +{ >> + prepare_emulation_failure_exit(vcpu, data, ndata, NULL, 0); >> +} >> +EXPORT_SYMBOL_GPL(__kvm_prepare_emulation_failure_exit); >> + >> +void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu) >> +{ >> + __kvm_prepare_emulation_failure_exit(vcpu, NULL, 0); >> +} >> +EXPORT_SYMBOL_GPL(kvm_prepare_emulation_failure_exit); >> + >> static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) >> { >> struct kvm *kvm = vcpu->kvm; >> @@ -7502,16 +7551,14 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) >> >> if (kvm->arch.exit_on_emulation_error || >> (emulation_type & EMULTYPE_SKIP)) { >> - prepare_emulation_failure_exit(vcpu); >> + prepare_emulation_ctxt_failure_exit(vcpu); >> return 0; >> } >> >> kvm_queue_exception(vcpu, UD_VECTOR); >> >> if (!is_guest_mode(vcpu) && static_call(kvm_x86_get_cpl)(vcpu) == 0) { >> - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; >> - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; >> - vcpu->run->internal.ndata = 0; >> + prepare_emulation_ctxt_failure_exit(vcpu); >> return 0; >> } >> >> @@ -12104,9 +12151,7 @@ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r, >> * doesn't seem to be a real use-case behind such requests, just return >> * KVM_EXIT_INTERNAL_ERROR for now. >> */ >> - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; >> - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; >> - vcpu->run->internal.ndata = 0; >> + kvm_prepare_emulation_failure_exit(vcpu); >> >> return 0; >> } >> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h >> index 6c79c1ce3703..e86cc2de7b5c 100644 >> --- a/include/uapi/linux/kvm.h >> +++ b/include/uapi/linux/kvm.h >> @@ -397,6 +397,12 @@ struct kvm_run { >> * "ndata" is correct, that new fields are enumerated in "flags", >> * and that each flag enumerates fields that are 64-bit aligned >> * and sized (so that ndata+internal.data[] is valid/accurate). >> + * >> + * Space beyond the defined fields may be used to > > Please run these out to 80 chars. Even 80 is a soft limit, it's ok to run over > a bit if the end result is (subjectively) prettier. > >> + * store arbitrary debug information relating to the >> + * emulation failure. It is accounted for in "ndata" >> + * but otherwise unspecified and is not represented in > > Explicitly state the format is unspecified? > >> + * "flags". > > And also explicitly stating the debug info isn't ABI, e.g. > > * Space beyond the defined fields may be used to store arbitrary > * debug information relating to the emulation failure. It is > * accounted for in "ndata" but the format is unspecified and > * is not represented in "flags". Any such info is _not_ ABI! Okay. >> */ >> struct { >> __u32 suberror; >> @@ -408,6 +414,7 @@ struct kvm_run { >> __u8 insn_bytes[15]; >> }; >> }; >> + /* Arbitrary debug data may follow. */ >> } emulation_failure; >> /* KVM_EXIT_OSI */ >> struct { >> -- >> 2.30.2 >>
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e3c0788bcdc2..da2d8f3a2019 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1630,6 +1630,9 @@ extern u64 kvm_mce_cap_supported; int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type); int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, void *insn, int insn_len); +void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, + u64 *data, u8 ndata); +void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu); void kvm_enable_efer_bits(u64); bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 6e5706ecce0b..9d14f68651f1 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5367,10 +5367,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) if (vmx->emulation_required && !vmx->rmode.vm86_active && vcpu->arch.exception.pending) { - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = - KVM_INTERNAL_ERROR_EMULATION; - vcpu->run->internal.ndata = 0; + kvm_prepare_emulation_failure_exit(vcpu); return 0; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e5d5c5ed7dd4..35639391de7b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7465,29 +7465,78 @@ void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) } EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt); -static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu) +static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data, + u8 ndata, u8 *insn_bytes, u8 insn_size) { - struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; - u32 insn_size = ctxt->fetch.end - ctxt->fetch.data; struct kvm_run *run = vcpu->run; + u8 ndata_start; + u64 info[5]; + + /* + * Zero the whole array used to retrieve the exit info, casting to u32 + * for select entries will leave some chunks uninitialized. + */ + memset(&info, 0, sizeof(info)); + + static_call(kvm_x86_get_exit_info)(vcpu, (u32 *)&info[0], &info[1], + &info[2], (u32 *)&info[3], + (u32 *)&info[4]); run->exit_reason = KVM_EXIT_INTERNAL_ERROR; run->emulation_failure.suberror = KVM_INTERNAL_ERROR_EMULATION; - run->emulation_failure.ndata = 0; + + /* + * There's currently space for 13 entries, but 5 are used for the exit + * reason and info. Restrict to 4 to reduce the maintenance burden + * when expanding kvm_run.emulation_failure in the future. + */ + if (WARN_ON_ONCE(ndata > 4)) + ndata = 4; + + /* Always include the flags as a 'data' entry. */ + ndata_start = 1; run->emulation_failure.flags = 0; if (insn_size) { - run->emulation_failure.ndata = 3; + ndata_start += (sizeof(run->emulation_failure.insn_size) + + sizeof(run->emulation_failure.insn_bytes)) / + sizeof(u64); run->emulation_failure.flags |= KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES; run->emulation_failure.insn_size = insn_size; memset(run->emulation_failure.insn_bytes, 0x90, sizeof(run->emulation_failure.insn_bytes)); - memcpy(run->emulation_failure.insn_bytes, - ctxt->fetch.data, insn_size); + memcpy(run->emulation_failure.insn_bytes, insn_bytes, insn_size); } + + memcpy(&run->internal.data[ndata_start], info, sizeof(info)); + memcpy(&run->internal.data[ndata_start + ARRAY_SIZE(info)], data, + ndata * sizeof(u64)); + + run->emulation_failure.ndata = ndata_start + ARRAY_SIZE(info) + ndata; } +static void prepare_emulation_ctxt_failure_exit(struct kvm_vcpu *vcpu) +{ + struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; + + prepare_emulation_failure_exit(vcpu, NULL, 0, ctxt->fetch.data, + ctxt->fetch.end - ctxt->fetch.data); +} + +void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data, + u8 ndata) +{ + prepare_emulation_failure_exit(vcpu, data, ndata, NULL, 0); +} +EXPORT_SYMBOL_GPL(__kvm_prepare_emulation_failure_exit); + +void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu) +{ + __kvm_prepare_emulation_failure_exit(vcpu, NULL, 0); +} +EXPORT_SYMBOL_GPL(kvm_prepare_emulation_failure_exit); + static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) { struct kvm *kvm = vcpu->kvm; @@ -7502,16 +7551,14 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) if (kvm->arch.exit_on_emulation_error || (emulation_type & EMULTYPE_SKIP)) { - prepare_emulation_failure_exit(vcpu); + prepare_emulation_ctxt_failure_exit(vcpu); return 0; } kvm_queue_exception(vcpu, UD_VECTOR); if (!is_guest_mode(vcpu) && static_call(kvm_x86_get_cpl)(vcpu) == 0) { - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; - vcpu->run->internal.ndata = 0; + prepare_emulation_ctxt_failure_exit(vcpu); return 0; } @@ -12104,9 +12151,7 @@ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r, * doesn't seem to be a real use-case behind such requests, just return * KVM_EXIT_INTERNAL_ERROR for now. */ - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; - vcpu->run->internal.ndata = 0; + kvm_prepare_emulation_failure_exit(vcpu); return 0; } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 6c79c1ce3703..e86cc2de7b5c 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -397,6 +397,12 @@ struct kvm_run { * "ndata" is correct, that new fields are enumerated in "flags", * and that each flag enumerates fields that are 64-bit aligned * and sized (so that ndata+internal.data[] is valid/accurate). + * + * Space beyond the defined fields may be used to + * store arbitrary debug information relating to the + * emulation failure. It is accounted for in "ndata" + * but otherwise unspecified and is not represented in + * "flags". */ struct { __u32 suberror; @@ -408,6 +414,7 @@ struct kvm_run { __u8 insn_bytes[15]; }; }; + /* Arbitrary debug data may follow. */ } emulation_failure; /* KVM_EXIT_OSI */ struct {
Should instruction emulation fail, include the VM exit reason, etc. in the emulation_failure data passed to userspace, in order that the VMM can report it as a debugging aid when describing the failure. Suggested-by: Joao Martins <joao.m.martins@oracle.com> Signed-off-by: David Edmondson <david.edmondson@oracle.com> --- arch/x86/include/asm/kvm_host.h | 3 ++ arch/x86/kvm/vmx/vmx.c | 5 +-- arch/x86/kvm/x86.c | 73 ++++++++++++++++++++++++++------- include/uapi/linux/kvm.h | 7 ++++ 4 files changed, 70 insertions(+), 18 deletions(-)