diff mbox series

[2/2] KVM: x86: On emulation failure, convey the exit reason to userspace

Message ID 20210628173152.2062988-3-david.edmondson@oracle.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86: Convey the exit reason to user-space on emulation failure | expand

Commit Message

David Edmondson June 28, 2021, 5:31 p.m. UTC
To aid in debugging.

Suggested-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: David Edmondson <david.edmondson@oracle.com>
---
 arch/x86/kvm/x86.c       | 23 +++++++++++++++++------
 include/uapi/linux/kvm.h |  2 ++
 2 files changed, 19 insertions(+), 6 deletions(-)

Comments

David Matlack June 30, 2021, 4:48 p.m. UTC | #1
On Mon, Jun 28, 2021 at 06:31:52PM +0100, David Edmondson wrote:
> To aid in debugging.

Please add more context to the commit message.

> 
> Suggested-by: Joao Martins <joao.m.martins@oracle.com>
> Signed-off-by: David Edmondson <david.edmondson@oracle.com>
> ---
>  arch/x86/kvm/x86.c       | 23 +++++++++++++++++------
>  include/uapi/linux/kvm.h |  2 ++
>  2 files changed, 19 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 8166ad113fb2..48ef0dc68faf 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7455,7 +7455,7 @@ void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
>  }
>  EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
>  
> -static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu)
> +static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, uint64_t flags)
>  {
>  	struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
>  	u32 insn_size = ctxt->fetch.end - ctxt->fetch.data;
> @@ -7466,7 +7466,8 @@ static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu)
>  	run->emulation_failure.ndata = 0;
>  	run->emulation_failure.flags = 0;
>  
> -	if (insn_size) {
> +	if (insn_size &&
> +	    (flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES)) {
>  		run->emulation_failure.ndata = 3;
>  		run->emulation_failure.flags |=
>  			KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES;
> @@ -7476,6 +7477,14 @@ static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu)
>  		memcpy(run->emulation_failure.insn_bytes,
>  		       ctxt->fetch.data, insn_size);
>  	}
> +
> +	if (flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_EXIT_REASON) {

This flag is always passed so this check if superfluous. Perhaps change
`int flags` to `bool instruction_bytes` and have it control only whether
the instruction bytes are populated.

> +		run->emulation_failure.ndata = 4;
> +		run->emulation_failure.flags |=
> +			KVM_INTERNAL_ERROR_EMULATION_FLAG_EXIT_REASON;
> +		run->emulation_failure.exit_reason =
> +			static_call(kvm_x86_get_exit_reason)(vcpu);
> +	}
>  }
>  
>  static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
> @@ -7492,16 +7501,18 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
>  
>  	if (kvm->arch.exit_on_emulation_error ||
>  	    (emulation_type & EMULTYPE_SKIP)) {
> -		prepare_emulation_failure_exit(vcpu);
> +		prepare_emulation_failure_exit(
> +			vcpu,
> +			KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES |
> +			KVM_INTERNAL_ERROR_EMULATION_FLAG_EXIT_REASON);
>  		return 0;
>  	}
>  
>  	kvm_queue_exception(vcpu, UD_VECTOR);
>  
>  	if (!is_guest_mode(vcpu) && static_call(kvm_x86_get_cpl)(vcpu) == 0) {
> -		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
> -		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
> -		vcpu->run->internal.ndata = 0;
> +		prepare_emulation_failure_exit(
> +			vcpu, KVM_INTERNAL_ERROR_EMULATION_FLAG_EXIT_REASON);

Should kvm_task_switch and kvm_handle_memory_failure also be updated
like this?

>  		return 0;
>  	}
>  
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 68c9e6d8bbda..3e4126652a67 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -282,6 +282,7 @@ struct kvm_xen_exit {
>  
>  /* Flags that describe what fields in emulation_failure hold valid data. */
>  #define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0)
> +#define KVM_INTERNAL_ERROR_EMULATION_FLAG_EXIT_REASON       (1ULL << 1)
>  
>  /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
>  struct kvm_run {
> @@ -404,6 +405,7 @@ struct kvm_run {
>  			__u64 flags;
>  			__u8  insn_size;
>  			__u8  insn_bytes[15];
> +			__u64 exit_reason;

Please document what this field contains, especially since its contents
depend on AMD versus Intel.

>  		} emulation_failure;
>  		/* KVM_EXIT_OSI */
>  		struct {
> -- 
> 2.30.2
>
David Edmondson July 2, 2021, 8:44 a.m. UTC | #2
On Wednesday, 2021-06-30 at 16:48:42 UTC, David Matlack wrote:

> On Mon, Jun 28, 2021 at 06:31:52PM +0100, David Edmondson wrote:
>> To aid in debugging.
>
> Please add more context to the commit message.

Okay.

>> 
>> Suggested-by: Joao Martins <joao.m.martins@oracle.com>
>> Signed-off-by: David Edmondson <david.edmondson@oracle.com>
>> ---
>>  arch/x86/kvm/x86.c       | 23 +++++++++++++++++------
>>  include/uapi/linux/kvm.h |  2 ++
>>  2 files changed, 19 insertions(+), 6 deletions(-)
>> 
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index 8166ad113fb2..48ef0dc68faf 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -7455,7 +7455,7 @@ void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
>>  }
>>  EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
>>  
>> -static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu)
>> +static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, uint64_t flags)
>>  {
>>  	struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
>>  	u32 insn_size = ctxt->fetch.end - ctxt->fetch.data;
>> @@ -7466,7 +7466,8 @@ static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu)
>>  	run->emulation_failure.ndata = 0;
>>  	run->emulation_failure.flags = 0;
>>  
>> -	if (insn_size) {
>> +	if (insn_size &&
>> +	    (flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES)) {
>>  		run->emulation_failure.ndata = 3;
>>  		run->emulation_failure.flags |=
>>  			KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES;
>> @@ -7476,6 +7477,14 @@ static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu)
>>  		memcpy(run->emulation_failure.insn_bytes,
>>  		       ctxt->fetch.data, insn_size);
>>  	}
>> +
>> +	if (flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_EXIT_REASON) {
>
> This flag is always passed so this check if superfluous. Perhaps change
> `int flags` to `bool instruction_bytes` and have it control only whether
> the instruction bytes are populated.

Okay.

>> +		run->emulation_failure.ndata = 4;
>> +		run->emulation_failure.flags |=
>> +			KVM_INTERNAL_ERROR_EMULATION_FLAG_EXIT_REASON;
>> +		run->emulation_failure.exit_reason =
>> +			static_call(kvm_x86_get_exit_reason)(vcpu);
>> +	}
>>  }
>>  
>>  static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
>> @@ -7492,16 +7501,18 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
>>  
>>  	if (kvm->arch.exit_on_emulation_error ||
>>  	    (emulation_type & EMULTYPE_SKIP)) {
>> -		prepare_emulation_failure_exit(vcpu);
>> +		prepare_emulation_failure_exit(
>> +			vcpu,
>> +			KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES |
>> +			KVM_INTERNAL_ERROR_EMULATION_FLAG_EXIT_REASON);
>>  		return 0;
>>  	}
>>  
>>  	kvm_queue_exception(vcpu, UD_VECTOR);
>>  
>>  	if (!is_guest_mode(vcpu) && static_call(kvm_x86_get_cpl)(vcpu) == 0) {
>> -		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
>> -		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
>> -		vcpu->run->internal.ndata = 0;
>> +		prepare_emulation_failure_exit(
>> +			vcpu, KVM_INTERNAL_ERROR_EMULATION_FLAG_EXIT_REASON);
>
> Should kvm_task_switch and kvm_handle_memory_failure also be updated
> like this?

Will do in v2.

sgx_handle_emulation_failure() seems like an existing user of
KVM_INTERNAL_ERROR_EMULATION that doesn't follow the new protocol (use
the emulation_failure part of the union).

Sean: If I add another flag for this case, what is the existing
user-level consumer?

>>  		return 0;
>>  	}
>>  
>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>> index 68c9e6d8bbda..3e4126652a67 100644
>> --- a/include/uapi/linux/kvm.h
>> +++ b/include/uapi/linux/kvm.h
>> @@ -282,6 +282,7 @@ struct kvm_xen_exit {
>>  
>>  /* Flags that describe what fields in emulation_failure hold valid data. */
>>  #define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0)
>> +#define KVM_INTERNAL_ERROR_EMULATION_FLAG_EXIT_REASON       (1ULL << 1)
>>  
>>  /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
>>  struct kvm_run {
>> @@ -404,6 +405,7 @@ struct kvm_run {
>>  			__u64 flags;
>>  			__u8  insn_size;
>>  			__u8  insn_bytes[15];
>> +			__u64 exit_reason;
>
> Please document what this field contains, especially since its contents
> depend on AMD versus Intel.

Okay.

>>  		} emulation_failure;
>>  		/* KVM_EXIT_OSI */
>>  		struct {
>> -- 
>> 2.30.2
>> 

dme.
Sean Christopherson July 9, 2021, 9:58 p.m. UTC | #3
On Fri, Jul 02, 2021, David Edmondson wrote:
> On Wednesday, 2021-06-30 at 16:48:42 UTC, David Matlack wrote:
> 
> > On Mon, Jun 28, 2021 at 06:31:52PM +0100, David Edmondson wrote:
> >>  	if (!is_guest_mode(vcpu) && static_call(kvm_x86_get_cpl)(vcpu) == 0) {
> >> -		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
> >> -		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
> >> -		vcpu->run->internal.ndata = 0;
> >> +		prepare_emulation_failure_exit(
> >> +			vcpu, KVM_INTERNAL_ERROR_EMULATION_FLAG_EXIT_REASON);
> >
> > Should kvm_task_switch and kvm_handle_memory_failure also be updated
> > like this?
> 
> Will do in v2.
> 
> sgx_handle_emulation_failure() seems like an existing user of
> KVM_INTERNAL_ERROR_EMULATION that doesn't follow the new protocol (use
> the emulation_failure part of the union).
> 
> Sean: If I add another flag for this case, what is the existing
> user-level consumer?

Doh, the SGX case should have been updated as part of commit c88339d88b0a ("kvm:
x86: Allow userspace to handle emulation errors").  The easiest fix for SGX would
be to zero out 'flags', bump ndata, and shift the existing field usage.  That
would resolve the existing problem of the address being misinterpreted as flags,
and would play nice _if_ additional flags are added.  I'll send a patch for that.

Regarding the consumer, there is no existing consumer per se.  SGX is simply
dumping the bad address that prevented emulation (the only SGX emulation failure
scenarios are bad/missing memslots/vmas).  The SGX case is very similar to
nested VMX instruction emulation, where failure is either due to a bad userspace
configuration (bad/missing memslot) or a busted L1 kernel (SGX instruction data
operand points at emulated MMIO).  A bad userspace configuration is almost always
going to be fatal, and I highly doubt any userspace VMM will bother emulating
SGX+MMIO.  In other words, the info dumped by SGX is purely for debug.

Which brings me back to adding another flag when dumping the exit reason.  Unless
there is a concrete use case for programmatically taking action in reponse to
failed emulation, e.g. attemping emulation in userspace using insn_bytes+insn_size,
I think we should not add a flag and instead dump info for debug/triage purposes
without committing to an ABI.  I.e. define the ABI such that KVM can dump
arbitrary info in the unused portions of data[].

Not having a true ABI will be a bit gross, but digging into these types of
failures is going to be painful no matter what; having to deduce the format of
the data is unlikely to shift the needle much.  And the code should be
straightforward, especially for userspace, e.g. dump all of data[] if emulation
in userspace failed.
David Edmondson July 29, 2021, 1:48 p.m. UTC | #4
On Friday, 2021-07-09 at 21:58:12 GMT, Sean Christopherson wrote:

> On Fri, Jul 02, 2021, David Edmondson wrote:
>> On Wednesday, 2021-06-30 at 16:48:42 UTC, David Matlack wrote:
>> 
>> > On Mon, Jun 28, 2021 at 06:31:52PM +0100, David Edmondson wrote:
>> >>  	if (!is_guest_mode(vcpu) && static_call(kvm_x86_get_cpl)(vcpu) == 0) {
>> >> -		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
>> >> -		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
>> >> -		vcpu->run->internal.ndata = 0;
>> >> +		prepare_emulation_failure_exit(
>> >> +			vcpu, KVM_INTERNAL_ERROR_EMULATION_FLAG_EXIT_REASON);
>> >
>> > Should kvm_task_switch and kvm_handle_memory_failure also be updated
>> > like this?
>> 
>> Will do in v2.
>> 
>> sgx_handle_emulation_failure() seems like an existing user of
>> KVM_INTERNAL_ERROR_EMULATION that doesn't follow the new protocol (use
>> the emulation_failure part of the union).
>> 
>> Sean: If I add another flag for this case, what is the existing
>> user-level consumer?
>
> Doh, the SGX case should have been updated as part of commit c88339d88b0a ("kvm:
> x86: Allow userspace to handle emulation errors").  The easiest fix for SGX would
> be to zero out 'flags', bump ndata, and shift the existing field usage.  That
> would resolve the existing problem of the address being misinterpreted as flags,
> and would play nice _if_ additional flags are added.  I'll send a patch for that.
>
> [...]
>
> Which brings me back to adding another flag when dumping the exit reason.  Unless
> there is a concrete use case for programmatically taking action in reponse to
> failed emulation, e.g. attemping emulation in userspace using insn_bytes+insn_size,
> I think we should not add a flag and instead dump info for debug/triage purposes
> without committing to an ABI.  I.e. define the ABI such that KVM can dump
> arbitrary info in the unused portions of data[].

https://lore.kernel.org/r/20210729133931.1129696-1-david.edmondson@oracle.com
includes both of these suggestions.
diff mbox series

Patch

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8166ad113fb2..48ef0dc68faf 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7455,7 +7455,7 @@  void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
 }
 EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
 
-static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu)
+static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, uint64_t flags)
 {
 	struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
 	u32 insn_size = ctxt->fetch.end - ctxt->fetch.data;
@@ -7466,7 +7466,8 @@  static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu)
 	run->emulation_failure.ndata = 0;
 	run->emulation_failure.flags = 0;
 
-	if (insn_size) {
+	if (insn_size &&
+	    (flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES)) {
 		run->emulation_failure.ndata = 3;
 		run->emulation_failure.flags |=
 			KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES;
@@ -7476,6 +7477,14 @@  static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu)
 		memcpy(run->emulation_failure.insn_bytes,
 		       ctxt->fetch.data, insn_size);
 	}
+
+	if (flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_EXIT_REASON) {
+		run->emulation_failure.ndata = 4;
+		run->emulation_failure.flags |=
+			KVM_INTERNAL_ERROR_EMULATION_FLAG_EXIT_REASON;
+		run->emulation_failure.exit_reason =
+			static_call(kvm_x86_get_exit_reason)(vcpu);
+	}
 }
 
 static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
@@ -7492,16 +7501,18 @@  static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
 
 	if (kvm->arch.exit_on_emulation_error ||
 	    (emulation_type & EMULTYPE_SKIP)) {
-		prepare_emulation_failure_exit(vcpu);
+		prepare_emulation_failure_exit(
+			vcpu,
+			KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES |
+			KVM_INTERNAL_ERROR_EMULATION_FLAG_EXIT_REASON);
 		return 0;
 	}
 
 	kvm_queue_exception(vcpu, UD_VECTOR);
 
 	if (!is_guest_mode(vcpu) && static_call(kvm_x86_get_cpl)(vcpu) == 0) {
-		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
-		vcpu->run->internal.ndata = 0;
+		prepare_emulation_failure_exit(
+			vcpu, KVM_INTERNAL_ERROR_EMULATION_FLAG_EXIT_REASON);
 		return 0;
 	}
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 68c9e6d8bbda..3e4126652a67 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -282,6 +282,7 @@  struct kvm_xen_exit {
 
 /* Flags that describe what fields in emulation_failure hold valid data. */
 #define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0)
+#define KVM_INTERNAL_ERROR_EMULATION_FLAG_EXIT_REASON       (1ULL << 1)
 
 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
 struct kvm_run {
@@ -404,6 +405,7 @@  struct kvm_run {
 			__u64 flags;
 			__u8  insn_size;
 			__u8  insn_bytes[15];
+			__u64 exit_reason;
 		} emulation_failure;
 		/* KVM_EXIT_OSI */
 		struct {