diff mbox series

[Part2,RFC,v4,01/40] KVM: SVM: Add support to handle AP reset MSR protocol

Message ID 20210707183616.5620-2-brijesh.singh@amd.com (mailing list archive)
State New
Headers show
Series Add AMD Secure Nested Paging (SEV-SNP) Hypervisor Support | expand

Commit Message

Brijesh Singh July 7, 2021, 6:35 p.m. UTC
From: Tom Lendacky <thomas.lendacky@amd.com>

Add support for AP Reset Hold being invoked using the GHCB MSR protocol,
available in version 2 of the GHCB specification.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
---
 arch/x86/include/asm/sev-common.h |  6 ++++
 arch/x86/kvm/svm/sev.c            | 56 ++++++++++++++++++++++++++-----
 arch/x86/kvm/svm/svm.h            |  1 +
 3 files changed, 55 insertions(+), 8 deletions(-)

Comments

Sean Christopherson July 14, 2021, 8:17 p.m. UTC | #1
On Wed, Jul 07, 2021, Brijesh Singh wrote:
> From: Tom Lendacky <thomas.lendacky@amd.com>
> 
> Add support for AP Reset Hold being invoked using the GHCB MSR protocol,
> available in version 2 of the GHCB specification.

Please provide a brief overview of the protocol, and why it's needed.  I assume
it's to allow AP wakeup without a shared GHCB?

> Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> ---

...

>  static u8 sev_enc_bit;
>  static DECLARE_RWSEM(sev_deactivate_lock);
>  static DEFINE_MUTEX(sev_bitmap_lock);
> @@ -2199,6 +2203,9 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
>  
>  void sev_es_unmap_ghcb(struct vcpu_svm *svm)
>  {
> +	/* Clear any indication that the vCPU is in a type of AP Reset Hold */
> +	svm->ap_reset_hold_type = AP_RESET_HOLD_NONE;
> +
>  	if (!svm->ghcb)
>  		return;
>  
> @@ -2404,6 +2411,22 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
>  				  GHCB_MSR_INFO_POS);
>  		break;
>  	}
> +	case GHCB_MSR_AP_RESET_HOLD_REQ:
> +		svm->ap_reset_hold_type = AP_RESET_HOLD_MSR_PROTO;
> +		ret = kvm_emulate_ap_reset_hold(&svm->vcpu);

The hold type feels like it should be a param to kvm_emulate_ap_reset_hold().

> +
> +		/*
> +		 * Preset the result to a non-SIPI return and then only set
> +		 * the result to non-zero when delivering a SIPI.
> +		 */
> +		set_ghcb_msr_bits(svm, 0,
> +				  GHCB_MSR_AP_RESET_HOLD_RESULT_MASK,
> +				  GHCB_MSR_AP_RESET_HOLD_RESULT_POS);
> +
> +		set_ghcb_msr_bits(svm, GHCB_MSR_AP_RESET_HOLD_RESP,
> +				  GHCB_MSR_INFO_MASK,
> +				  GHCB_MSR_INFO_POS);

It looks like all uses set an arbitrary value and then the response.  I think
folding the response into the helper would improve both readability and robustness.
I also suspect the helper needs to do WRITE_ONCE() to guarantee the guest sees
what it's supposed to see, though memory ordering is not my strong suit.

Might even be able to squeeze in a build-time assertion.

Also, do the guest-provided contents actually need to be preserved?  That seems
somewhat odd.

E.g. can it be

static void set_ghcb_msr_response(struct vcpu_svm *svm, u64 response, u64 value,
				  u64 mask, unsigned int pos)
{
	u64 val = (response << GHCB_MSR_INFO_POS) | (val << pos);

	WRITE_ONCE(svm->vmcb->control.ghcb_gpa |= (value & mask) << pos;
}

and

		set_ghcb_msr_response(svm, GHCB_MSR_AP_RESET_HOLD_RESP,
				      GHCB_MSR_AP_RESET_HOLD_RESULT_MASK,
				      GHCB_MSR_AP_RESET_HOLD_RESULT_POS);

> +		break;
>  	case GHCB_MSR_TERM_REQ: {
>  		u64 reason_set, reason_code;
>  
> @@ -2491,6 +2514,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
>  		ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_IRET);
>  		break;
>  	case SVM_VMGEXIT_AP_HLT_LOOP:
> +		svm->ap_reset_hold_type = AP_RESET_HOLD_NAE_EVENT;
>  		ret = kvm_emulate_ap_reset_hold(vcpu);
>  		break;
>  	case SVM_VMGEXIT_AP_JUMP_TABLE: {
> @@ -2628,13 +2652,29 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
>  		return;
>  	}
>  
> -	/*
> -	 * Subsequent SIPI: Return from an AP Reset Hold VMGEXIT, where
> -	 * the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a
> -	 * non-zero value.
> -	 */
> -	if (!svm->ghcb)
> -		return;
> +	/* Subsequent SIPI */
> +	switch (svm->ap_reset_hold_type) {
> +	case AP_RESET_HOLD_NAE_EVENT:
> +		/*
> +		 * Return from an AP Reset Hold VMGEXIT, where the guest will
> +		 * set the CS and RIP. Set SW_EXIT_INFO_2 to a non-zero value.
> +		 */
> +		ghcb_set_sw_exit_info_2(svm->ghcb, 1);
> +		break;
> +	case AP_RESET_HOLD_MSR_PROTO:
> +		/*
> +		 * Return from an AP Reset Hold VMGEXIT, where the guest will
> +		 * set the CS and RIP. Set GHCB data field to a non-zero value.
> +		 */
> +		set_ghcb_msr_bits(svm, 1,
> +				  GHCB_MSR_AP_RESET_HOLD_RESULT_MASK,
> +				  GHCB_MSR_AP_RESET_HOLD_RESULT_POS);
>  
> -	ghcb_set_sw_exit_info_2(svm->ghcb, 1);
> +		set_ghcb_msr_bits(svm, GHCB_MSR_AP_RESET_HOLD_RESP,
> +				  GHCB_MSR_INFO_MASK,
> +				  GHCB_MSR_INFO_POS);
> +		break;
> +	default:
> +		break;
> +	}
>  }
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index 0b89aee51b74..ad12ca26b2d8 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -174,6 +174,7 @@ struct vcpu_svm {
>  	struct ghcb *ghcb;
>  	struct kvm_host_map ghcb_map;
>  	bool received_first_sipi;
> +	unsigned int ap_reset_hold_type;

Can't this be a u8?

>  
>  	/* SEV-ES scratch area support */
>  	void *ghcb_sa;
> -- 
> 2.17.1
>
Joerg Roedel July 15, 2021, 7:39 a.m. UTC | #2
On Wed, Jul 14, 2021 at 08:17:29PM +0000, Sean Christopherson wrote:
> On Wed, Jul 07, 2021, Brijesh Singh wrote:
> > From: Tom Lendacky <thomas.lendacky@amd.com>
> > 
> > Add support for AP Reset Hold being invoked using the GHCB MSR protocol,
> > available in version 2 of the GHCB specification.
> 
> Please provide a brief overview of the protocol, and why it's needed.  I assume
> it's to allow AP wakeup without a shared GHCB?

Yes, this is needed for SEV-ES kexec support to park APs without the
need for memory that will be owned by the new kernel when APs are woken
up.

You can have a look into my SEV-ES kexec/kdump patch-set for details:

	https://lore.kernel.org/lkml/20210705082443.14721-1-joro@8bytes.org/

I also sent this patch separatly earlier this week to enable GHCB
protocol version 2 support in KVM.

Regards,

	Joerg
Tom Lendacky July 15, 2021, 1:42 p.m. UTC | #3
On 7/14/21 3:17 PM, Sean Christopherson wrote:
> On Wed, Jul 07, 2021, Brijesh Singh wrote:
>> From: Tom Lendacky <thomas.lendacky@amd.com>
>>
>> Add support for AP Reset Hold being invoked using the GHCB MSR protocol,
>> available in version 2 of the GHCB specification.
> 
> Please provide a brief overview of the protocol, and why it's needed.  I assume
> it's to allow AP wakeup without a shared GHCB?

Right, mainly the ability to be able to issue an AP reset hold from a mode
that would not be able to access the GHCB as a shared page, e.g. 32-bit
mode without paging enabled where reads/writes are always encrypted for an
SEV guest.

> 
>> Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
>> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
>> ---
> 
> ...
> 
>>  static u8 sev_enc_bit;
>>  static DECLARE_RWSEM(sev_deactivate_lock);
>>  static DEFINE_MUTEX(sev_bitmap_lock);
>> @@ -2199,6 +2203,9 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
>>  
>>  void sev_es_unmap_ghcb(struct vcpu_svm *svm)
>>  {
>> +	/* Clear any indication that the vCPU is in a type of AP Reset Hold */
>> +	svm->ap_reset_hold_type = AP_RESET_HOLD_NONE;
>> +
>>  	if (!svm->ghcb)
>>  		return;
>>  
>> @@ -2404,6 +2411,22 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
>>  				  GHCB_MSR_INFO_POS);
>>  		break;
>>  	}
>> +	case GHCB_MSR_AP_RESET_HOLD_REQ:
>> +		svm->ap_reset_hold_type = AP_RESET_HOLD_MSR_PROTO;
>> +		ret = kvm_emulate_ap_reset_hold(&svm->vcpu);
> 
> The hold type feels like it should be a param to kvm_emulate_ap_reset_hold().

I suppose it could be, but then the type would have to be tracked in the
kvm_vcpu_arch struct instead of the vcpu_svm struct, so I opted for the
latter. Maybe a helper function, sev_ap_reset_hold(), that sets the type
and then calls kvm_emulate_ap_reset_hold(), but I'm not seeing a big need
for it.

> 
>> +
>> +		/*
>> +		 * Preset the result to a non-SIPI return and then only set
>> +		 * the result to non-zero when delivering a SIPI.
>> +		 */
>> +		set_ghcb_msr_bits(svm, 0,
>> +				  GHCB_MSR_AP_RESET_HOLD_RESULT_MASK,
>> +				  GHCB_MSR_AP_RESET_HOLD_RESULT_POS);
>> +
>> +		set_ghcb_msr_bits(svm, GHCB_MSR_AP_RESET_HOLD_RESP,
>> +				  GHCB_MSR_INFO_MASK,
>> +				  GHCB_MSR_INFO_POS);
> 
> It looks like all uses set an arbitrary value and then the response.  I think
> folding the response into the helper would improve both readability and robustness.

Joerg pulled this patch out and submitted it as part of a small, three
patch series, so it might be best to address this in general in the
SEV-SNP patches or as a follow-on series specifically for this re-work.

> I also suspect the helper needs to do WRITE_ONCE() to guarantee the guest sees
> what it's supposed to see, though memory ordering is not my strong suit.

This is writing to the VMCB that is then used to set the value of the
guest MSR. I don't see anything done in general for writes to the VMCB, so
I wouldn't think this should be any different.

> 
> Might even be able to squeeze in a build-time assertion.
> 
> Also, do the guest-provided contents actually need to be preserved?  That seems
> somewhat odd.

Hmmm... not sure I see where the guest contents are being preserved.

> 
> E.g. can it be
> 
> static void set_ghcb_msr_response(struct vcpu_svm *svm, u64 response, u64 value,
> 				  u64 mask, unsigned int pos)
> {
> 	u64 val = (response << GHCB_MSR_INFO_POS) | (val << pos);
> 
> 	WRITE_ONCE(svm->vmcb->control.ghcb_gpa |= (value & mask) << pos;
> }
> 
> and
> 
> 		set_ghcb_msr_response(svm, GHCB_MSR_AP_RESET_HOLD_RESP,
> 				      GHCB_MSR_AP_RESET_HOLD_RESULT_MASK,
> 				      GHCB_MSR_AP_RESET_HOLD_RESULT_POS);
> 
>> +		break;
>>  	case GHCB_MSR_TERM_REQ: {
>>  		u64 reason_set, reason_code;
>>  
>> @@ -2491,6 +2514,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
>>  		ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_IRET);
>>  		break;
>>  	case SVM_VMGEXIT_AP_HLT_LOOP:
>> +		svm->ap_reset_hold_type = AP_RESET_HOLD_NAE_EVENT;
>>  		ret = kvm_emulate_ap_reset_hold(vcpu);
>>  		break;
>>  	case SVM_VMGEXIT_AP_JUMP_TABLE: {
>> @@ -2628,13 +2652,29 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
>>  		return;
>>  	}
>>  
>> -	/*
>> -	 * Subsequent SIPI: Return from an AP Reset Hold VMGEXIT, where
>> -	 * the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a
>> -	 * non-zero value.
>> -	 */
>> -	if (!svm->ghcb)
>> -		return;
>> +	/* Subsequent SIPI */
>> +	switch (svm->ap_reset_hold_type) {
>> +	case AP_RESET_HOLD_NAE_EVENT:
>> +		/*
>> +		 * Return from an AP Reset Hold VMGEXIT, where the guest will
>> +		 * set the CS and RIP. Set SW_EXIT_INFO_2 to a non-zero value.
>> +		 */
>> +		ghcb_set_sw_exit_info_2(svm->ghcb, 1);
>> +		break;
>> +	case AP_RESET_HOLD_MSR_PROTO:
>> +		/*
>> +		 * Return from an AP Reset Hold VMGEXIT, where the guest will
>> +		 * set the CS and RIP. Set GHCB data field to a non-zero value.
>> +		 */
>> +		set_ghcb_msr_bits(svm, 1,
>> +				  GHCB_MSR_AP_RESET_HOLD_RESULT_MASK,
>> +				  GHCB_MSR_AP_RESET_HOLD_RESULT_POS);
>>  
>> -	ghcb_set_sw_exit_info_2(svm->ghcb, 1);
>> +		set_ghcb_msr_bits(svm, GHCB_MSR_AP_RESET_HOLD_RESP,
>> +				  GHCB_MSR_INFO_MASK,
>> +				  GHCB_MSR_INFO_POS);
>> +		break;
>> +	default:
>> +		break;
>> +	}
>>  }
>> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
>> index 0b89aee51b74..ad12ca26b2d8 100644
>> --- a/arch/x86/kvm/svm/svm.h
>> +++ b/arch/x86/kvm/svm/svm.h
>> @@ -174,6 +174,7 @@ struct vcpu_svm {
>>  	struct ghcb *ghcb;
>>  	struct kvm_host_map ghcb_map;
>>  	bool received_first_sipi;
>> +	unsigned int ap_reset_hold_type;
> 
> Can't this be a u8?

Yes, it could be, maybe even an enum and let the compiler decide on the size.

Thanks,
Tom

> 
>>  
>>  	/* SEV-ES scratch area support */
>>  	void *ghcb_sa;
>> -- 
>> 2.17.1
>>
Sean Christopherson July 15, 2021, 3:45 p.m. UTC | #4
On Thu, Jul 15, 2021, Tom Lendacky wrote:
> On 7/14/21 3:17 PM, Sean Christopherson wrote:
> >> +	case GHCB_MSR_AP_RESET_HOLD_REQ:
> >> +		svm->ap_reset_hold_type = AP_RESET_HOLD_MSR_PROTO;
> >> +		ret = kvm_emulate_ap_reset_hold(&svm->vcpu);
> > 
> > The hold type feels like it should be a param to kvm_emulate_ap_reset_hold().
> 
> I suppose it could be, but then the type would have to be tracked in the
> kvm_vcpu_arch struct instead of the vcpu_svm struct, so I opted for the
> latter. Maybe a helper function, sev_ap_reset_hold(), that sets the type
> and then calls kvm_emulate_ap_reset_hold(), but I'm not seeing a big need
> for it.

Huh.  Why is kvm_emulate_ap_reset_hold() in x86.c?  That entire concept is very
much SEV specific.  And if anyone argues its not SEV specific, then the hold type
should also be considered generic, i.e. put in kvm_vcpu_arch.

> >> +
> >> +		/*
> >> +		 * Preset the result to a non-SIPI return and then only set
> >> +		 * the result to non-zero when delivering a SIPI.
> >> +		 */
> >> +		set_ghcb_msr_bits(svm, 0,
> >> +				  GHCB_MSR_AP_RESET_HOLD_RESULT_MASK,
> >> +				  GHCB_MSR_AP_RESET_HOLD_RESULT_POS);
> >> +
> >> +		set_ghcb_msr_bits(svm, GHCB_MSR_AP_RESET_HOLD_RESP,
> >> +				  GHCB_MSR_INFO_MASK,
> >> +				  GHCB_MSR_INFO_POS);
> > 
> > It looks like all uses set an arbitrary value and then the response.  I think
> > folding the response into the helper would improve both readability and robustness.
> 
> Joerg pulled this patch out and submitted it as part of a small, three
> patch series, so it might be best to address this in general in the
> SEV-SNP patches or as a follow-on series specifically for this re-work.
> 
> > I also suspect the helper needs to do WRITE_ONCE() to guarantee the guest sees
> > what it's supposed to see, though memory ordering is not my strong suit.
> 
> This is writing to the VMCB that is then used to set the value of the
> guest MSR. I don't see anything done in general for writes to the VMCB, so
> I wouldn't think this should be any different.

Ooooh, right.  I was thinking this was writing memory that's shared with the
guest, but this is KVM's copy of the GCHB MSR, not the GHCB itself.  Thanks!

> > Might even be able to squeeze in a build-time assertion.
> > 
> > Also, do the guest-provided contents actually need to be preserved?  That seems
> > somewhat odd.
> 
> Hmmm... not sure I see where the guest contents are being preserved.

The fact that set_ghcb_msr_bits() is a RMW flow implies _something_ is being
preserved.  And unless KVM explicitly zeros/initializes control.ghcb_gpa, the
value being preserved is the value last written by the guest.  E.g. for CPUID
emulation, KVM reads the guest-requested function and register from ghcb_gpa,
then writes back the result.  But set_ghcb_msr_bits() is a RMW on a subset of
bits, and thus it's preserving the guest's value for the bits not being written.

Unless there is an explicit need to preserve the guest value, the whole RMW thing
is unnecessary and confusing.

	case GHCB_MSR_CPUID_REQ: {
		u64 cpuid_fn, cpuid_reg, cpuid_value;

		cpuid_fn = get_ghcb_msr_bits(svm,
					     GHCB_MSR_CPUID_FUNC_MASK,
					     GHCB_MSR_CPUID_FUNC_POS);

		/* Initialize the registers needed by the CPUID intercept */
		vcpu->arch.regs[VCPU_REGS_RAX] = cpuid_fn;
		vcpu->arch.regs[VCPU_REGS_RCX] = 0;

		ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_CPUID);
		if (!ret) {
			ret = -EINVAL;
			break;
		}

		cpuid_reg = get_ghcb_msr_bits(svm,
					      GHCB_MSR_CPUID_REG_MASK,
					      GHCB_MSR_CPUID_REG_POS);
		if (cpuid_reg == 0)
			cpuid_value = vcpu->arch.regs[VCPU_REGS_RAX];
		else if (cpuid_reg == 1)
			cpuid_value = vcpu->arch.regs[VCPU_REGS_RBX];
		else if (cpuid_reg == 2)
			cpuid_value = vcpu->arch.regs[VCPU_REGS_RCX];
		else
			cpuid_value = vcpu->arch.regs[VCPU_REGS_RDX];

		set_ghcb_msr_bits(svm, cpuid_value,
				  GHCB_MSR_CPUID_VALUE_MASK,
				  GHCB_MSR_CPUID_VALUE_POS);

		set_ghcb_msr_bits(svm, GHCB_MSR_CPUID_RESP,
				  GHCB_MSR_INFO_MASK,
				  GHCB_MSR_INFO_POS);
		break;
	}
Tom Lendacky July 15, 2021, 5:05 p.m. UTC | #5
On 7/15/21 10:45 AM, Sean Christopherson wrote:
> On Thu, Jul 15, 2021, Tom Lendacky wrote:
>> On 7/14/21 3:17 PM, Sean Christopherson wrote:
>>>> +	case GHCB_MSR_AP_RESET_HOLD_REQ:
>>>> +		svm->ap_reset_hold_type = AP_RESET_HOLD_MSR_PROTO;
>>>> +		ret = kvm_emulate_ap_reset_hold(&svm->vcpu);
>>>
>>> The hold type feels like it should be a param to kvm_emulate_ap_reset_hold().
>>
>> I suppose it could be, but then the type would have to be tracked in the
>> kvm_vcpu_arch struct instead of the vcpu_svm struct, so I opted for the
>> latter. Maybe a helper function, sev_ap_reset_hold(), that sets the type
>> and then calls kvm_emulate_ap_reset_hold(), but I'm not seeing a big need
>> for it.
> 
> Huh.  Why is kvm_emulate_ap_reset_hold() in x86.c?  That entire concept is very
> much SEV specific.  And if anyone argues its not SEV specific, then the hold type
> should also be considered generic, i.e. put in kvm_vcpu_arch.

That was based on review comments where it was desired that the halt be
identified as specifically from the AP reset hold vs a normal halt. So
kvm_emulate_ap_reset_hold() was created using KVM_MP_STATE_AP_RESET_HOLD
and KVM_EXIT_AP_RESET_HOLD instead of exporting a version of
kvm_vcpu_halt() with the state and reason as arguments.

If there's no objection, then I don't have any issues with moving the hold
type to kvm_vcpu_arch and adding a param to kvm_emulate_ap_reset_hold().

> 
>>>> +
>>>> +		/*
>>>> +		 * Preset the result to a non-SIPI return and then only set
>>>> +		 * the result to non-zero when delivering a SIPI.
>>>> +		 */
>>>> +		set_ghcb_msr_bits(svm, 0,
>>>> +				  GHCB_MSR_AP_RESET_HOLD_RESULT_MASK,
>>>> +				  GHCB_MSR_AP_RESET_HOLD_RESULT_POS);
>>>> +
>>>> +		set_ghcb_msr_bits(svm, GHCB_MSR_AP_RESET_HOLD_RESP,
>>>> +				  GHCB_MSR_INFO_MASK,
>>>> +				  GHCB_MSR_INFO_POS);
>>>
>>> It looks like all uses set an arbitrary value and then the response.  I think
>>> folding the response into the helper would improve both readability and robustness.
>>
>> Joerg pulled this patch out and submitted it as part of a small, three
>> patch series, so it might be best to address this in general in the
>> SEV-SNP patches or as a follow-on series specifically for this re-work.
>>
>>> I also suspect the helper needs to do WRITE_ONCE() to guarantee the guest sees
>>> what it's supposed to see, though memory ordering is not my strong suit.
>>
>> This is writing to the VMCB that is then used to set the value of the
>> guest MSR. I don't see anything done in general for writes to the VMCB, so
>> I wouldn't think this should be any different.
> 
> Ooooh, right.  I was thinking this was writing memory that's shared with the
> guest, but this is KVM's copy of the GCHB MSR, not the GHCB itself.  Thanks!
> 
>>> Might even be able to squeeze in a build-time assertion.
>>>
>>> Also, do the guest-provided contents actually need to be preserved?  That seems
>>> somewhat odd.
>>
>> Hmmm... not sure I see where the guest contents are being preserved.
> 
> The fact that set_ghcb_msr_bits() is a RMW flow implies _something_ is being
> preserved.  And unless KVM explicitly zeros/initializes control.ghcb_gpa, the
> value being preserved is the value last written by the guest.  E.g. for CPUID
> emulation, KVM reads the guest-requested function and register from ghcb_gpa,
> then writes back the result.  But set_ghcb_msr_bits() is a RMW on a subset of
> bits, and thus it's preserving the guest's value for the bits not being written.

Yes, set_ghcb_msr_bits() is a RMW helper, but the intent was to set every
bit. So for CPUID, I missed setting the reserved area to 0. There wouldn't
be an issue initializing the whole field to zero once everything has been
pulled out for the MSR protocol function being invoked.

> 
> Unless there is an explicit need to preserve the guest value, the whole RMW thing
> is unnecessary and confusing.

I guess it depends on who's reading the code. I don't find it confusing,
which is probably why I implemented it that way :) But, yes, it certainly
can be changed to create the result and then have a single function that
combines the result and response code and sets the ghcb_gpa, which would
have eliminated the missed setting of the reserved area.

Thanks,
Tom
diff mbox series

Patch

diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
index e14d24f0950c..466baa9cd0f5 100644
--- a/arch/x86/include/asm/sev-common.h
+++ b/arch/x86/include/asm/sev-common.h
@@ -45,6 +45,12 @@ 
 		(((unsigned long)reg & GHCB_MSR_CPUID_REG_MASK) << GHCB_MSR_CPUID_REG_POS) | \
 		(((unsigned long)fn) << GHCB_MSR_CPUID_FUNC_POS))
 
+/* AP Reset Hold */
+#define GHCB_MSR_AP_RESET_HOLD_REQ		0x006
+#define GHCB_MSR_AP_RESET_HOLD_RESP		0x007
+#define GHCB_MSR_AP_RESET_HOLD_RESULT_POS	12
+#define GHCB_MSR_AP_RESET_HOLD_RESULT_MASK	GENMASK_ULL(51, 0)
+
 /* GHCB GPA Register */
 #define GHCB_MSR_GPA_REG_REQ		0x012
 #define GHCB_MSR_GPA_REG_VALUE_POS	12
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index d93a1c368b61..7d0b98dbe523 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -57,6 +57,10 @@  module_param_named(sev_es, sev_es_enabled, bool, 0444);
 #define sev_es_enabled false
 #endif /* CONFIG_KVM_AMD_SEV */
 
+#define AP_RESET_HOLD_NONE		0
+#define AP_RESET_HOLD_NAE_EVENT		1
+#define AP_RESET_HOLD_MSR_PROTO		2
+
 static u8 sev_enc_bit;
 static DECLARE_RWSEM(sev_deactivate_lock);
 static DEFINE_MUTEX(sev_bitmap_lock);
@@ -2199,6 +2203,9 @@  static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
 
 void sev_es_unmap_ghcb(struct vcpu_svm *svm)
 {
+	/* Clear any indication that the vCPU is in a type of AP Reset Hold */
+	svm->ap_reset_hold_type = AP_RESET_HOLD_NONE;
+
 	if (!svm->ghcb)
 		return;
 
@@ -2404,6 +2411,22 @@  static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
 				  GHCB_MSR_INFO_POS);
 		break;
 	}
+	case GHCB_MSR_AP_RESET_HOLD_REQ:
+		svm->ap_reset_hold_type = AP_RESET_HOLD_MSR_PROTO;
+		ret = kvm_emulate_ap_reset_hold(&svm->vcpu);
+
+		/*
+		 * Preset the result to a non-SIPI return and then only set
+		 * the result to non-zero when delivering a SIPI.
+		 */
+		set_ghcb_msr_bits(svm, 0,
+				  GHCB_MSR_AP_RESET_HOLD_RESULT_MASK,
+				  GHCB_MSR_AP_RESET_HOLD_RESULT_POS);
+
+		set_ghcb_msr_bits(svm, GHCB_MSR_AP_RESET_HOLD_RESP,
+				  GHCB_MSR_INFO_MASK,
+				  GHCB_MSR_INFO_POS);
+		break;
 	case GHCB_MSR_TERM_REQ: {
 		u64 reason_set, reason_code;
 
@@ -2491,6 +2514,7 @@  int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
 		ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_IRET);
 		break;
 	case SVM_VMGEXIT_AP_HLT_LOOP:
+		svm->ap_reset_hold_type = AP_RESET_HOLD_NAE_EVENT;
 		ret = kvm_emulate_ap_reset_hold(vcpu);
 		break;
 	case SVM_VMGEXIT_AP_JUMP_TABLE: {
@@ -2628,13 +2652,29 @@  void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
 		return;
 	}
 
-	/*
-	 * Subsequent SIPI: Return from an AP Reset Hold VMGEXIT, where
-	 * the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a
-	 * non-zero value.
-	 */
-	if (!svm->ghcb)
-		return;
+	/* Subsequent SIPI */
+	switch (svm->ap_reset_hold_type) {
+	case AP_RESET_HOLD_NAE_EVENT:
+		/*
+		 * Return from an AP Reset Hold VMGEXIT, where the guest will
+		 * set the CS and RIP. Set SW_EXIT_INFO_2 to a non-zero value.
+		 */
+		ghcb_set_sw_exit_info_2(svm->ghcb, 1);
+		break;
+	case AP_RESET_HOLD_MSR_PROTO:
+		/*
+		 * Return from an AP Reset Hold VMGEXIT, where the guest will
+		 * set the CS and RIP. Set GHCB data field to a non-zero value.
+		 */
+		set_ghcb_msr_bits(svm, 1,
+				  GHCB_MSR_AP_RESET_HOLD_RESULT_MASK,
+				  GHCB_MSR_AP_RESET_HOLD_RESULT_POS);
 
-	ghcb_set_sw_exit_info_2(svm->ghcb, 1);
+		set_ghcb_msr_bits(svm, GHCB_MSR_AP_RESET_HOLD_RESP,
+				  GHCB_MSR_INFO_MASK,
+				  GHCB_MSR_INFO_POS);
+		break;
+	default:
+		break;
+	}
 }
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 0b89aee51b74..ad12ca26b2d8 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -174,6 +174,7 @@  struct vcpu_svm {
 	struct ghcb *ghcb;
 	struct kvm_host_map ghcb_map;
 	bool received_first_sipi;
+	unsigned int ap_reset_hold_type;
 
 	/* SEV-ES scratch area support */
 	void *ghcb_sa;