diff mbox series

[RFC,v3,02/19] KVM: x86: inhibit APICv/AVIC when the guest and/or host changes apic id/base from the defaults.

Message ID 20220427200314.276673-3-mlevitsk@redhat.com (mailing list archive)
State New, archived
Headers show
Series RFC: nested AVIC | expand

Commit Message

Maxim Levitsky April 27, 2022, 8:02 p.m. UTC
Neither of these settings should be changed by the guest and it is
a burden to support it in the acceleration code, so just inhibit
it instead.

Also add a boolean 'apic_id_changed' to indicate if apic id ever changed.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  3 +++
 arch/x86/kvm/lapic.c            | 25 ++++++++++++++++++++++---
 arch/x86/kvm/lapic.h            |  8 ++++++++
 3 files changed, 33 insertions(+), 3 deletions(-)

Comments

Chao Gao May 18, 2022, 8:28 a.m. UTC | #1
On Wed, Apr 27, 2022 at 11:02:57PM +0300, Maxim Levitsky wrote:
>Neither of these settings should be changed by the guest and it is
>a burden to support it in the acceleration code, so just inhibit
>it instead.
>
>Also add a boolean 'apic_id_changed' to indicate if apic id ever changed.
>
>Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
>---
> arch/x86/include/asm/kvm_host.h |  3 +++
> arch/x86/kvm/lapic.c            | 25 ++++++++++++++++++++++---
> arch/x86/kvm/lapic.h            |  8 ++++++++
> 3 files changed, 33 insertions(+), 3 deletions(-)
>
>diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>index 63eae00625bda..636df87542555 100644
>--- a/arch/x86/include/asm/kvm_host.h
>+++ b/arch/x86/include/asm/kvm_host.h
>@@ -1070,6 +1070,8 @@ enum kvm_apicv_inhibit {
> 	APICV_INHIBIT_REASON_ABSENT,
> 	/* AVIC is disabled because SEV doesn't support it */
> 	APICV_INHIBIT_REASON_SEV,
>+	/* APIC ID and/or APIC base was changed by the guest */
>+	APICV_INHIBIT_REASON_RO_SETTINGS,

You need to add it to check_apicv_inhibit_reasons as well.

> };
> 
> struct kvm_arch {
>@@ -1258,6 +1260,7 @@ struct kvm_arch {
> 	hpa_t	hv_root_tdp;
> 	spinlock_t hv_root_tdp_lock;
> #endif
>+	bool apic_id_changed;

What's the value of this boolean? No one reads it.

> };
> 
> struct kvm_vm_stat {
>diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
>index 66b0eb0bda94e..8996675b3ef4c 100644
>--- a/arch/x86/kvm/lapic.c
>+++ b/arch/x86/kvm/lapic.c
>@@ -2038,6 +2038,19 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
> 	}
> }
> 
>+static void kvm_lapic_check_initial_apic_id(struct kvm_lapic *apic)
>+{
>+	if (kvm_apic_has_initial_apic_id(apic))
>+		return;
>+
>+	pr_warn_once("APIC ID change is unsupported by KVM");

It is misleading because changing xAPIC ID is supported by KVM; it just
isn't compatible with APICv. Probably this pr_warn_once() should be
removed.

>+
>+	kvm_set_apicv_inhibit(apic->vcpu->kvm,
>+			APICV_INHIBIT_REASON_RO_SETTINGS);

The indentation here looks incorrect to me.
	kvm_set_apicv_inhibit(apic->vcpu->kvm,
			      APICV_INHIBIT_REASON_RO_SETTINGS);

>+
>+	apic->vcpu->kvm->arch.apic_id_changed = true;
>+}
>+
> static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
> {
> 	int ret = 0;
>@@ -2046,9 +2059,11 @@ static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
> 
> 	switch (reg) {
> 	case APIC_ID:		/* Local APIC ID */
>-		if (!apic_x2apic_mode(apic))
>+		if (!apic_x2apic_mode(apic)) {
>+
> 			kvm_apic_set_xapic_id(apic, val >> 24);
>-		else
>+			kvm_lapic_check_initial_apic_id(apic);
>+		} else
> 			ret = 1;
> 		break;
> 
>@@ -2335,8 +2350,11 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
> 			     MSR_IA32_APICBASE_BASE;
> 
> 	if ((value & MSR_IA32_APICBASE_ENABLE) &&
>-	     apic->base_address != APIC_DEFAULT_PHYS_BASE)
>+	     apic->base_address != APIC_DEFAULT_PHYS_BASE) {
>+		kvm_set_apicv_inhibit(apic->vcpu->kvm,
>+				APICV_INHIBIT_REASON_RO_SETTINGS);
> 		pr_warn_once("APIC base relocation is unsupported by KVM");
>+	}
> }
> 
> void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
>@@ -2649,6 +2667,7 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
> 		}
> 	}
> 
>+	kvm_lapic_check_initial_apic_id(vcpu->arch.apic);
> 	return 0;
> }
> 
>diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
>index 4e4f8a22754f9..b9c406d383080 100644
>--- a/arch/x86/kvm/lapic.h
>+++ b/arch/x86/kvm/lapic.h
>@@ -252,4 +252,12 @@ static inline u8 kvm_xapic_id(struct kvm_lapic *apic)
> 	return kvm_lapic_get_reg(apic, APIC_ID) >> 24;
> }
> 
>+static inline bool kvm_apic_has_initial_apic_id(struct kvm_lapic *apic)
>+{
>+	if (apic_x2apic_mode(apic))
>+		return true;

I suggest warning of x2apic mode:
	if (WARN_ON_ONCE(apic_x2apic_mode(apic)))

Because it is weird that callers care about initial apic id when apic is
in x2apic mode.
Maxim Levitsky May 18, 2022, 9:50 a.m. UTC | #2
On Wed, 2022-05-18 at 16:28 +0800, Chao Gao wrote:
> On Wed, Apr 27, 2022 at 11:02:57PM +0300, Maxim Levitsky wrote:
> > Neither of these settings should be changed by the guest and it is
> > a burden to support it in the acceleration code, so just inhibit
> > it instead.
> > 
> > Also add a boolean 'apic_id_changed' to indicate if apic id ever changed.
> > 
> > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> > ---
> > arch/x86/include/asm/kvm_host.h |  3 +++
> > arch/x86/kvm/lapic.c            | 25 ++++++++++++++++++++++---
> > arch/x86/kvm/lapic.h            |  8 ++++++++
> > 3 files changed, 33 insertions(+), 3 deletions(-)
> > 
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index 63eae00625bda..636df87542555 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -1070,6 +1070,8 @@ enum kvm_apicv_inhibit {
> > 	APICV_INHIBIT_REASON_ABSENT,
> > 	/* AVIC is disabled because SEV doesn't support it */
> > 	APICV_INHIBIT_REASON_SEV,
> > +	/* APIC ID and/or APIC base was changed by the guest */
> > +	APICV_INHIBIT_REASON_RO_SETTINGS,
> 
> You need to add it to check_apicv_inhibit_reasons as well.
True, forgot about it.

> 
> > };
> > 
> > struct kvm_arch {
> > @@ -1258,6 +1260,7 @@ struct kvm_arch {
> > 	hpa_t	hv_root_tdp;
> > 	spinlock_t hv_root_tdp_lock;
> > #endif
> > +	bool apic_id_changed;
> 
> What's the value of this boolean? No one reads it.

I use it in later patches to kill the guest during nested VM entry 
if it attempts to use nested AVIC after any vCPU changed APIC ID.

I mentioned this boolean in the commit description.

This boolean avoids the need to go over all vCPUs and checking
if they still have the initial apic id.

In the future maybe we can introduce a more generic 'taint'
bitmap with various flags like that, indicating that the guest
did something unexpected.

BTW, the other option in regard to the nested AVIC is just to ignore this issue completely.
The code itself always uses vcpu_id's, thus regardless of when/how often the guest changes
its apic ids, my code would just use the initial APIC ID values consistently.

In this case I won't need this boolean.

> 
> > };
> > 
> > struct kvm_vm_stat {
> > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> > index 66b0eb0bda94e..8996675b3ef4c 100644
> > --- a/arch/x86/kvm/lapic.c
> > +++ b/arch/x86/kvm/lapic.c
> > @@ -2038,6 +2038,19 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
> > 	}
> > }
> > 
> > +static void kvm_lapic_check_initial_apic_id(struct kvm_lapic *apic)
> > +{
> > +	if (kvm_apic_has_initial_apic_id(apic))
> > +		return;
> > +
> > +	pr_warn_once("APIC ID change is unsupported by KVM");
> 
> It is misleading because changing xAPIC ID is supported by KVM; it just
> isn't compatible with APICv. Probably this pr_warn_once() should be
> removed.

Honestly since nobody uses this feature, I am not sure if to call this supported,
I am sure that KVM has more bugs in regard of using non standard APIC ID.
This warning might hopefuly make someone complain about it if this
feature is actually used somewhere.

> 
> > +
> > +	kvm_set_apicv_inhibit(apic->vcpu->kvm,
> > +			APICV_INHIBIT_REASON_RO_SETTINGS);
> 
> The indentation here looks incorrect to me.
> 	kvm_set_apicv_inhibit(apic->vcpu->kvm,
> 			      APICV_INHIBIT_REASON_RO_SETTINGS);

True, will fix.

> 
> > +
> > +	apic->vcpu->kvm->arch.apic_id_changed = true;
> > +}
> > +
> > static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
> > {
> > 	int ret = 0;
> > @@ -2046,9 +2059,11 @@ static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
> > 
> > 	switch (reg) {
> > 	case APIC_ID:		/* Local APIC ID */
> > -		if (!apic_x2apic_mode(apic))
> > +		if (!apic_x2apic_mode(apic)) {
> > +
> > 			kvm_apic_set_xapic_id(apic, val >> 24);
> > -		else
> > +			kvm_lapic_check_initial_apic_id(apic);
> > +		} else
> > 			ret = 1;
> > 		break;
> > 
> > @@ -2335,8 +2350,11 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
> > 			     MSR_IA32_APICBASE_BASE;
> > 
> > 	if ((value & MSR_IA32_APICBASE_ENABLE) &&
> > -	     apic->base_address != APIC_DEFAULT_PHYS_BASE)
> > +	     apic->base_address != APIC_DEFAULT_PHYS_BASE) {
> > +		kvm_set_apicv_inhibit(apic->vcpu->kvm,
> > +				APICV_INHIBIT_REASON_RO_SETTINGS);
> > 		pr_warn_once("APIC base relocation is unsupported by KVM");
> > +	}
> > }
> > 
> > void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
> > @@ -2649,6 +2667,7 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
> > 		}
> > 	}
> > 
> > +	kvm_lapic_check_initial_apic_id(vcpu->arch.apic);
> > 	return 0;
> > }
> > 
> > diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> > index 4e4f8a22754f9..b9c406d383080 100644
> > --- a/arch/x86/kvm/lapic.h
> > +++ b/arch/x86/kvm/lapic.h
> > @@ -252,4 +252,12 @@ static inline u8 kvm_xapic_id(struct kvm_lapic *apic)
> > 	return kvm_lapic_get_reg(apic, APIC_ID) >> 24;
> > }
> > 
> > +static inline bool kvm_apic_has_initial_apic_id(struct kvm_lapic *apic)
> > +{
> > +	if (apic_x2apic_mode(apic))
> > +		return true;
> 
> I suggest warning of x2apic mode:
> 	if (WARN_ON_ONCE(apic_x2apic_mode(apic)))
> 
> Because it is weird that callers care about initial apic id when apic is
> in x2apic mode.

Yes but due to something I don't agree with, but also something that I gave up
on arguing upon, KVM userspace API kind of supports setting APIC ID != initial apic id,
even in x2apic mode, and disallowing it, is considered API breakage,
therefore this case is possible.

This case should still trigger a warning in kvm_lapic_check_initial_apic_id.

Best regards,
	Maxim Levitsky


>
Chao Gao May 18, 2022, 11:51 a.m. UTC | #3
On Wed, May 18, 2022 at 12:50:27PM +0300, Maxim Levitsky wrote:
>> > struct kvm_arch {
>> > @@ -1258,6 +1260,7 @@ struct kvm_arch {
>> > 	hpa_t	hv_root_tdp;
>> > 	spinlock_t hv_root_tdp_lock;
>> > #endif
>> > +	bool apic_id_changed;
>> 
>> What's the value of this boolean? No one reads it.
>
>I use it in later patches to kill the guest during nested VM entry 
>if it attempts to use nested AVIC after any vCPU changed APIC ID.
>
>I mentioned this boolean in the commit description.
>
>This boolean avoids the need to go over all vCPUs and checking
>if they still have the initial apic id.

Do you want to kill the guest if APIC base got changed? If yes,
you can check if APICV_INHIBIT_REASON_RO_SETTINGS is set and save
the boolean.

>
>In the future maybe we can introduce a more generic 'taint'
>bitmap with various flags like that, indicating that the guest
>did something unexpected.
>
>BTW, the other option in regard to the nested AVIC is just to ignore this issue completely.
>The code itself always uses vcpu_id's, thus regardless of when/how often the guest changes
>its apic ids, my code would just use the initial APIC ID values consistently.
>
>In this case I won't need this boolean.
>
>> 
>> > };
>> > 
>> > struct kvm_vm_stat {
>> > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
>> > index 66b0eb0bda94e..8996675b3ef4c 100644
>> > --- a/arch/x86/kvm/lapic.c
>> > +++ b/arch/x86/kvm/lapic.c
>> > @@ -2038,6 +2038,19 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
>> > 	}
>> > }
>> > 
>> > +static void kvm_lapic_check_initial_apic_id(struct kvm_lapic *apic)
>> > +{
>> > +	if (kvm_apic_has_initial_apic_id(apic))
>> > +		return;
>> > +
>> > +	pr_warn_once("APIC ID change is unsupported by KVM");
>> 
>> It is misleading because changing xAPIC ID is supported by KVM; it just
>> isn't compatible with APICv. Probably this pr_warn_once() should be
>> removed.
>
>Honestly since nobody uses this feature, I am not sure if to call this supported,
>I am sure that KVM has more bugs in regard of using non standard APIC ID.
>This warning might hopefuly make someone complain about it if this
>feature is actually used somewhere.

Now I got you. It is fine to me.
Maxim Levitsky May 18, 2022, 12:36 p.m. UTC | #4
On Wed, 2022-05-18 at 19:51 +0800, Chao Gao wrote:
> On Wed, May 18, 2022 at 12:50:27PM +0300, Maxim Levitsky wrote:
> > > > struct kvm_arch {
> > > > @@ -1258,6 +1260,7 @@ struct kvm_arch {
> > > > 	hpa_t	hv_root_tdp;
> > > > 	spinlock_t hv_root_tdp_lock;
> > > > #endif
> > > > +	bool apic_id_changed;
> > > 
> > > What's the value of this boolean? No one reads it.
> > 
> > I use it in later patches to kill the guest during nested VM entry 
> > if it attempts to use nested AVIC after any vCPU changed APIC ID.
> > 
> > I mentioned this boolean in the commit description.
> > 
> > This boolean avoids the need to go over all vCPUs and checking
> > if they still have the initial apic id.
> 
> Do you want to kill the guest if APIC base got changed? If yes,
> you can check if APICV_INHIBIT_REASON_RO_SETTINGS is set and save
> the boolean.

Yep, I thrown in the apic base just because I can. It doesn't matter to 
my nested AVIC logic at all, but since it is also something that guests
don't change, I also don't care if this will lead to inhibit and
killing the guest if it attempts to use nested AVIC.

That boolean should have the same value as the APICV_INHIBIT_REASON_RO_SETTINGS
inhibit, so yes I can instead check if the inhibit is active.

I don't know if that is cleaner that this boolean though, individual
inhibit value is currently not something that anybody uses in logic.

Best regards,
	Maxim Levitsky


> 
> > In the future maybe we can introduce a more generic 'taint'
> > bitmap with various flags like that, indicating that the guest
> > did something unexpected.
> > 
> > BTW, the other option in regard to the nested AVIC is just to ignore this issue completely.
> > The code itself always uses vcpu_id's, thus regardless of when/how often the guest changes
> > its apic ids, my code would just use the initial APIC ID values consistently.
> > 
> > In this case I won't need this boolean.
> > 
> > > > };
> > > > 
> > > > struct kvm_vm_stat {
> > > > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> > > > index 66b0eb0bda94e..8996675b3ef4c 100644
> > > > --- a/arch/x86/kvm/lapic.c
> > > > +++ b/arch/x86/kvm/lapic.c
> > > > @@ -2038,6 +2038,19 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
> > > > 	}
> > > > }
> > > > 
> > > > +static void kvm_lapic_check_initial_apic_id(struct kvm_lapic *apic)
> > > > +{
> > > > +	if (kvm_apic_has_initial_apic_id(apic))
> > > > +		return;
> > > > +
> > > > +	pr_warn_once("APIC ID change is unsupported by KVM");
> > > 
> > > It is misleading because changing xAPIC ID is supported by KVM; it just
> > > isn't compatible with APICv. Probably this pr_warn_once() should be
> > > removed.
> > 
> > Honestly since nobody uses this feature, I am not sure if to call this supported,
> > I am sure that KVM has more bugs in regard of using non standard APIC ID.
> > This warning might hopefuly make someone complain about it if this
> > feature is actually used somewhere.
> 
> Now I got you. It is fine to me.
>
Sean Christopherson May 18, 2022, 3:39 p.m. UTC | #5
On Wed, May 18, 2022, Maxim Levitsky wrote:
> On Wed, 2022-05-18 at 16:28 +0800, Chao Gao wrote:
> > > struct kvm_arch {
> > > @@ -1258,6 +1260,7 @@ struct kvm_arch {
> > > 	hpa_t	hv_root_tdp;
> > > 	spinlock_t hv_root_tdp_lock;
> > > #endif
> > > +	bool apic_id_changed;
> > 
> > What's the value of this boolean? No one reads it.
> 
> I use it in later patches to kill the guest during nested VM entry 
> if it attempts to use nested AVIC after any vCPU changed APIC ID.

Then the flag should be introduced in the later patch, because (a) it's dead code
if that patch is never merged and (b) it's impossible to review this patch for
correctness without seeing the usage, e.g. setting apic_id_changed isn't guarded
with a lock and so the usage may or may not be susceptible to races.

> > > +	apic->vcpu->kvm->arch.apic_id_changed = true;
> > > +}
> > > +
Maxim Levitsky May 18, 2022, 5:15 p.m. UTC | #6
On Wed, 2022-05-18 at 15:39 +0000, Sean Christopherson wrote:
> On Wed, May 18, 2022, Maxim Levitsky wrote:
> > On Wed, 2022-05-18 at 16:28 +0800, Chao Gao wrote:
> > > > struct kvm_arch {
> > > > @@ -1258,6 +1260,7 @@ struct kvm_arch {
> > > > 	hpa_t	hv_root_tdp;
> > > > 	spinlock_t hv_root_tdp_lock;
> > > > #endif
> > > > +	bool apic_id_changed;
> > > 
> > > What's the value of this boolean? No one reads it.
> > 
> > I use it in later patches to kill the guest during nested VM entry 
> > if it attempts to use nested AVIC after any vCPU changed APIC ID.
> 
> Then the flag should be introduced in the later patch, because (a) it's dead code
> if that patch is never merged and (b) it's impossible to review this patch for
> correctness without seeing the usage, e.g. setting apic_id_changed isn't guarded
> with a lock and so the usage may or may not be susceptible to races.

I can't disagree with you on this, this was just somewhat a hack I wasn't sure
(and not yet 100% sure I will move forward with) so I cut this corner.

Thanks for the review!

Best regards,
	Maxim Levitsky

> 
> > > > +	apic->vcpu->kvm->arch.apic_id_changed = true;
> > > > +}
> > > > +
Sean Christopherson May 19, 2022, 4:06 p.m. UTC | #7
On Wed, Apr 27, 2022, Maxim Levitsky wrote:
> Neither of these settings should be changed by the guest and it is
> a burden to support it in the acceleration code, so just inhibit
> it instead.
> 
> Also add a boolean 'apic_id_changed' to indicate if apic id ever changed.
> 
> Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  3 +++
>  arch/x86/kvm/lapic.c            | 25 ++++++++++++++++++++++---
>  arch/x86/kvm/lapic.h            |  8 ++++++++
>  3 files changed, 33 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 63eae00625bda..636df87542555 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1070,6 +1070,8 @@ enum kvm_apicv_inhibit {
>  	APICV_INHIBIT_REASON_ABSENT,
>  	/* AVIC is disabled because SEV doesn't support it */
>  	APICV_INHIBIT_REASON_SEV,
> +	/* APIC ID and/or APIC base was changed by the guest */

I don't see any reason to inhibit APICv if the APIC base is changed.  KVM has
never supported that, and disabling APICv won't "fix" anything.

Ignoring that is a minor simplification, but also allows for a more intuitive
name, e.g.

	APICV_INHIBIT_REASON_APIC_ID_MODIFIED,

The inhibit also needs to be added avic_check_apicv_inhibit_reasons() and
vmx_check_apicv_inhibit_reasons().

> +	APICV_INHIBIT_REASON_RO_SETTINGS,
>  };
>  
>  struct kvm_arch {
> @@ -1258,6 +1260,7 @@ struct kvm_arch {
>  	hpa_t	hv_root_tdp;
>  	spinlock_t hv_root_tdp_lock;
>  #endif
> +	bool apic_id_changed;
>  };
>  
>  struct kvm_vm_stat {
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 66b0eb0bda94e..8996675b3ef4c 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -2038,6 +2038,19 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
>  	}
>  }
>  
> +static void kvm_lapic_check_initial_apic_id(struct kvm_lapic *apic)

The "check" part is misleading/confusing.  "check" helpers usually query and return
state.  I assume you avoided "changed" because the ID may or may not actually be
changing.  Maybe kvm_apic_id_updated()?  Ah, better idea.  What about
kvm_lapic_xapic_id_updated()?  See below for reasoning.

> +{
> +	if (kvm_apic_has_initial_apic_id(apic))

Rather than add a single-use helper, invoke the helper from kvm_apic_state_fixup()
in the !x2APIC path, then this can KVM_BUG_ON() x2APIC to help document that KVM
should never allow the ID to change for x2APIC.

> +		return;
> +
> +	pr_warn_once("APIC ID change is unsupported by KVM");

It's supported (modulo x2APIC shenanigans), otherwise KVM wouldn't need to disable
APICv.

> +	kvm_set_apicv_inhibit(apic->vcpu->kvm,
> +			APICV_INHIBIT_REASON_RO_SETTINGS);
> +
> +	apic->vcpu->kvm->arch.apic_id_changed = true;
> +}
> +
>  static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
>  {
>  	int ret = 0;
> @@ -2046,9 +2059,11 @@ static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
>  
>  	switch (reg) {
>  	case APIC_ID:		/* Local APIC ID */
> -		if (!apic_x2apic_mode(apic))
> +		if (!apic_x2apic_mode(apic)) {
> +

Spurious newline.

>  			kvm_apic_set_xapic_id(apic, val >> 24);
> -		else
> +			kvm_lapic_check_initial_apic_id(apic);
> +		} else

Needs curly braces for both paths.

>  			ret = 1;
>  		break;
>  

E.g.

---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/lapic.c            | 21 +++++++++++++++++++--
 arch/x86/kvm/svm/avic.c         |  3 ++-
 arch/x86/kvm/vmx/vmx.c          |  3 ++-
 4 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d895d25c5b2f..d888fa1bae77 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1071,6 +1071,7 @@ enum kvm_apicv_inhibit {
 	APICV_INHIBIT_REASON_BLOCKIRQ,
 	APICV_INHIBIT_REASON_ABSENT,
 	APICV_INHIBIT_REASON_SEV,
+	APICV_INHIBIT_REASON_APIC_ID_MODIFIED,
 };

 struct kvm_arch {
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 5fd678c90288..6fe8f20f03d8 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2039,6 +2039,19 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
 	}
 }

+static void kvm_lapic_xapic_id_updated(struct kvm_lapic *apic)
+{
+	struct kvm *kvm = apic->vcpu->kvm;
+
+	if (KVM_BUG_ON(apic_x2apic_mode(apic), kvm))
+		return;
+
+	if (kvm_xapic_id(apic) == apic->vcpu->vcpu_id)
+		return;
+
+	kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED);
+}
+
 static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 {
 	int ret = 0;
@@ -2047,10 +2060,12 @@ static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)

 	switch (reg) {
 	case APIC_ID:		/* Local APIC ID */
-		if (!apic_x2apic_mode(apic))
+		if (!apic_x2apic_mode(apic)) {
 			kvm_apic_set_xapic_id(apic, val >> 24);
-		else
+			kvm_lapic_xapic_id_updated(apic);
+		} else {
 			ret = 1;
+		}
 		break;

 	case APIC_TASKPRI:
@@ -2665,6 +2680,8 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
 			icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR);
 			__kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32);
 		}
+	} else {
+		kvm_lapic_xapic_id_updated(vcpu->arch.apic);
 	}

 	return 0;
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 54fe03714f8a..239c3e8b1f3f 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -910,7 +910,8 @@ bool avic_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason)
 			  BIT(APICV_INHIBIT_REASON_PIT_REINJ) |
 			  BIT(APICV_INHIBIT_REASON_X2APIC) |
 			  BIT(APICV_INHIBIT_REASON_BLOCKIRQ) |
-			  BIT(APICV_INHIBIT_REASON_SEV);
+			  BIT(APICV_INHIBIT_REASON_SEV) |
+			  BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED);

 	return supported & BIT(reason);
 }
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index b06eafa5884d..941adade21ea 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7818,7 +7818,8 @@ static bool vmx_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason)
 	ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
 			  BIT(APICV_INHIBIT_REASON_ABSENT) |
 			  BIT(APICV_INHIBIT_REASON_HYPERV) |
-			  BIT(APICV_INHIBIT_REASON_BLOCKIRQ);
+			  BIT(APICV_INHIBIT_REASON_BLOCKIRQ) |
+			  BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED);

 	return supported & BIT(reason);
 }

base-commit: 6ab6e3842d18e4529fa524fb6c668ae8a8bf54f4
--
Maxim Levitsky May 22, 2022, 9:03 a.m. UTC | #8
On Thu, 2022-05-19 at 16:06 +0000, Sean Christopherson wrote:
> On Wed, Apr 27, 2022, Maxim Levitsky wrote:
> > Neither of these settings should be changed by the guest and it is
> > a burden to support it in the acceleration code, so just inhibit
> > it instead.
> > 
> > Also add a boolean 'apic_id_changed' to indicate if apic id ever changed.
> > 
> > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> > ---
> >  arch/x86/include/asm/kvm_host.h |  3 +++
> >  arch/x86/kvm/lapic.c            | 25 ++++++++++++++++++++++---
> >  arch/x86/kvm/lapic.h            |  8 ++++++++
> >  3 files changed, 33 insertions(+), 3 deletions(-)
> > 
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index 63eae00625bda..636df87542555 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -1070,6 +1070,8 @@ enum kvm_apicv_inhibit {
> >  	APICV_INHIBIT_REASON_ABSENT,
> >  	/* AVIC is disabled because SEV doesn't support it */
> >  	APICV_INHIBIT_REASON_SEV,
> > +	/* APIC ID and/or APIC base was changed by the guest */
> 
> I don't see any reason to inhibit APICv if the APIC base is changed.  KVM has
> never supported that, and disabling APICv won't "fix" anything.

I kind of tacked the APIC base on the thing just to be a good citezen.

In theory currently if the guest changes the APIC base, neither APICv
nor AVIC will even notice, so the guest will still be able to access the
default APIC base and the new APIC base, which is kind of wrong.

Inhibiting APICv/AVIC in this case makes it better and it is very cheap to do.

If you still think that it shouln't be done, I'll remove it.


> 
> Ignoring that is a minor simplification, but also allows for a more intuitive
> name, e.g.
> 
> 	APICV_INHIBIT_REASON_APIC_ID_MODIFIED,
> 
> The inhibit also needs to be added avic_check_apicv_inhibit_reasons() and
> vmx_check_apicv_inhibit_reasons().
> 
> > +	APICV_INHIBIT_REASON_RO_SETTINGS,

> >  };
> >  
> >  struct kvm_arch {
> > @@ -1258,6 +1260,7 @@ struct kvm_arch {
> >  	hpa_t	hv_root_tdp;
> >  	spinlock_t hv_root_tdp_lock;
> >  #endif
> > +	bool apic_id_changed;
> >  };
> >  
> >  struct kvm_vm_stat {
> > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> > index 66b0eb0bda94e..8996675b3ef4c 100644
> > --- a/arch/x86/kvm/lapic.c
> > +++ b/arch/x86/kvm/lapic.c
> > @@ -2038,6 +2038,19 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
> >  	}
> >  }
> >  
> > +static void kvm_lapic_check_initial_apic_id(struct kvm_lapic *apic)
> 
> The "check" part is misleading/confusing.  "check" helpers usually query and return
> state.  I assume you avoided "changed" because the ID may or may not actually be
> changing.  Maybe kvm_apic_id_updated()?  Ah, better idea.  What about
> kvm_lapic_xapic_id_updated()?  See below for reasoning.

This is a very good idea!

> 
> > +{
> > +	if (kvm_apic_has_initial_apic_id(apic))
> 
> Rather than add a single-use helper, invoke the helper from kvm_apic_state_fixup()
> in the !x2APIC path, then this can KVM_BUG_ON() x2APIC to help document that KVM
> should never allow the ID to change for x2APIC.

yes, but we do allow non default x2apic id via userspace api - I wasn't able to convience
you to remove this :)

> 
> > +		return;
> > +
> > +	pr_warn_once("APIC ID change is unsupported by KVM");
> 
> It's supported (modulo x2APIC shenanigans), otherwise KVM wouldn't need to disable
> APICv.

Here, as I said, it would be nice to see that warning if someone complains.
Fact is that AVIC code was totally broken in this regard, and there are probably more,
so it would be nice to see if anybody complains.

If you insist, I'll remove this warning.

> 
> > +	kvm_set_apicv_inhibit(apic->vcpu->kvm,
> > +			APICV_INHIBIT_REASON_RO_SETTINGS);
> > +
> > +	apic->vcpu->kvm->arch.apic_id_changed = true;
> > +}
> > +
> >  static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
> >  {
> >  	int ret = 0;
> > @@ -2046,9 +2059,11 @@ static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
> >  
> >  	switch (reg) {
> >  	case APIC_ID:		/* Local APIC ID */
> > -		if (!apic_x2apic_mode(apic))
> > +		if (!apic_x2apic_mode(apic)) {
> > +
> 
> Spurious newline.
Will fix.
> 
> >  			kvm_apic_set_xapic_id(apic, val >> 24);
> > -		else
> > +			kvm_lapic_check_initial_apic_id(apic);
> > +		} else
> 
> Needs curly braces for both paths.
Will fix.
> 
> >  			ret = 1;
> >  		break;
> >  
> 
> E.g.
> 
> ---
>  arch/x86/include/asm/kvm_host.h |  1 +
>  arch/x86/kvm/lapic.c            | 21 +++++++++++++++++++--
>  arch/x86/kvm/svm/avic.c         |  3 ++-
>  arch/x86/kvm/vmx/vmx.c          |  3 ++-
>  4 files changed, 24 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index d895d25c5b2f..d888fa1bae77 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1071,6 +1071,7 @@ enum kvm_apicv_inhibit {
>  	APICV_INHIBIT_REASON_BLOCKIRQ,
>  	APICV_INHIBIT_REASON_ABSENT,
>  	APICV_INHIBIT_REASON_SEV,
> +	APICV_INHIBIT_REASON_APIC_ID_MODIFIED,
>  };
> 
>  struct kvm_arch {
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 5fd678c90288..6fe8f20f03d8 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -2039,6 +2039,19 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
>  	}
>  }
> 
> +static void kvm_lapic_xapic_id_updated(struct kvm_lapic *apic)
> +{
> +	struct kvm *kvm = apic->vcpu->kvm;
> +
> +	if (KVM_BUG_ON(apic_x2apic_mode(apic), kvm))
> +		return;
> +
> +	if (kvm_xapic_id(apic) == apic->vcpu->vcpu_id)
> +		return;
> +
> +	kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED);
> +}
> +
>  static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
>  {
>  	int ret = 0;
> @@ -2047,10 +2060,12 @@ static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
> 
>  	switch (reg) {
>  	case APIC_ID:		/* Local APIC ID */
> -		if (!apic_x2apic_mode(apic))
> +		if (!apic_x2apic_mode(apic)) {
>  			kvm_apic_set_xapic_id(apic, val >> 24);
> -		else
> +			kvm_lapic_xapic_id_updated(apic);
> +		} else {
>  			ret = 1;
> +		}
>  		break;
> 
>  	case APIC_TASKPRI:
> @@ -2665,6 +2680,8 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
>  			icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR);
>  			__kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32);
>  		}
> +	} else {
> +		kvm_lapic_xapic_id_updated(vcpu->arch.apic);
>  	}
> 
>  	return 0;
> diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
> index 54fe03714f8a..239c3e8b1f3f 100644
> --- a/arch/x86/kvm/svm/avic.c
> +++ b/arch/x86/kvm/svm/avic.c
> @@ -910,7 +910,8 @@ bool avic_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason)
>  			  BIT(APICV_INHIBIT_REASON_PIT_REINJ) |
>  			  BIT(APICV_INHIBIT_REASON_X2APIC) |
>  			  BIT(APICV_INHIBIT_REASON_BLOCKIRQ) |
> -			  BIT(APICV_INHIBIT_REASON_SEV);
> +			  BIT(APICV_INHIBIT_REASON_SEV) |
> +			  BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED);
> 
>  	return supported & BIT(reason);
>  }
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index b06eafa5884d..941adade21ea 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -7818,7 +7818,8 @@ static bool vmx_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason)
>  	ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
>  			  BIT(APICV_INHIBIT_REASON_ABSENT) |
>  			  BIT(APICV_INHIBIT_REASON_HYPERV) |
> -			  BIT(APICV_INHIBIT_REASON_BLOCKIRQ);
> +			  BIT(APICV_INHIBIT_REASON_BLOCKIRQ) |
> +			  BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED);
> 
>  	return supported & BIT(reason);
>  }
> 
> base-commit: 6ab6e3842d18e4529fa524fb6c668ae8a8bf54f4



Best regards,
	Thanks for the review,
		Maxim Levitsky
> --
>
Jim Mattson May 22, 2022, 2:47 p.m. UTC | #9
On Sun, May 22, 2022 at 2:03 AM Maxim Levitsky <mlevitsk@redhat.com> wrote:
>
> On Thu, 2022-05-19 at 16:06 +0000, Sean Christopherson wrote:
> > On Wed, Apr 27, 2022, Maxim Levitsky wrote:
> > > Neither of these settings should be changed by the guest and it is
> > > a burden to support it in the acceleration code, so just inhibit
> > > it instead.
> > >
> > > Also add a boolean 'apic_id_changed' to indicate if apic id ever changed.
> > >
> > > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> > > ---
> > > +           return;
> > > +
> > > +   pr_warn_once("APIC ID change is unsupported by KVM");
> >
> > It's supported (modulo x2APIC shenanigans), otherwise KVM wouldn't need to disable
> > APICv.
>
> Here, as I said, it would be nice to see that warning if someone complains.
> Fact is that AVIC code was totally broken in this regard, and there are probably more,
> so it would be nice to see if anybody complains.
>
> If you insist, I'll remove this warning.

This may be fine for a hobbyist, but it's a terrible API in an
enterprise environment. To be honest, I have no way of propagating
this warning from /var/log/messages on a particular host to a
potentially impacted customer. Worse, if they're not the first
impacted customer since the last host reboot, there's no warning to
propagate. I suppose I could just tell every later customer, "Your VM
was scheduled to run on a host that previously reported, 'APIC ID
change is unsupported by KVM.' If you notice any unusual behavior,
that might be the reason for it," but that isn't going to inspire
confidence. I could schedule a drain and reboot of the host, but that
defeats the whole point of the "_once" suffix.

I know that there's a long history of doing this in KVM, but I'd like
to ask that we:
a) stop piling on
b) start fixing the existing uses

If KVM cannot emulate a perfectly valid operation, an exit to
userspace with KVM_EXIT_INTERNAL_ERROR is warranted. Perhaps for
operations that we suspect KVM might get wrong, we should have a new
userspace exit: KVM_EXIT_WARNING?

I'm not saying that you should remove the warning. I'm just asking
that it be augmented with a direct signal to userspace that KVM may no
longer be reliable.
Maxim Levitsky May 23, 2022, 6:50 a.m. UTC | #10
On Sun, 2022-05-22 at 07:47 -0700, Jim Mattson wrote:
> On Sun, May 22, 2022 at 2:03 AM Maxim Levitsky <mlevitsk@redhat.com> wrote:
> > On Thu, 2022-05-19 at 16:06 +0000, Sean Christopherson wrote:
> > > On Wed, Apr 27, 2022, Maxim Levitsky wrote:
> > > > Neither of these settings should be changed by the guest and it is
> > > > a burden to support it in the acceleration code, so just inhibit
> > > > it instead.
> > > > 
> > > > Also add a boolean 'apic_id_changed' to indicate if apic id ever changed.
> > > > 
> > > > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> > > > ---
> > > > +           return;
> > > > +
> > > > +   pr_warn_once("APIC ID change is unsupported by KVM");
> > > 
> > > It's supported (modulo x2APIC shenanigans), otherwise KVM wouldn't need to disable
> > > APICv.
> > 
> > Here, as I said, it would be nice to see that warning if someone complains.
> > Fact is that AVIC code was totally broken in this regard, and there are probably more,
> > so it would be nice to see if anybody complains.
> > 
> > If you insist, I'll remove this warning.
> 
> This may be fine for a hobbyist, but it's a terrible API in an
> enterprise environment. To be honest, I have no way of propagating
> this warning from /var/log/messages on a particular host to a
> potentially impacted customer. Worse, if they're not the first
> impacted customer since the last host reboot, there's no warning to
> propagate. I suppose I could just tell every later customer, "Your VM
> was scheduled to run on a host that previously reported, 'APIC ID
> change is unsupported by KVM.' If you notice any unusual behavior,
> that might be the reason for it," but that isn't going to inspire
> confidence. I could schedule a drain and reboot of the host, but that
> defeats the whole point of the "_once" suffix.

Mostly agree, and I read alrady few discussions about exactly this,
those warnings are mostly useless, but they are used in the
cases where we don't have the courage to just exit with KVM_EXIT_INTERNAL_ERROR.

I do not thing though that the warning is completely useless, 
as we often have the kernel log of the target machine when things go wrong, 
so *we* can notice it.
In other words a kernel warning is mostly useless but better that nothing.

About KVM_EXIT_WARNING, this is IMHO a very good idea, probably combined
with some form of taint flag, which could be read by qemu and then shown
over hmp/qmp interfaces.

Best regards,
	Maxim levitsky


> 
> I know that there's a long history of doing this in KVM, but I'd like
> to ask that we:
> a) stop piling on
> b) start fixing the existing uses
> 
> If KVM cannot emulate a perfectly valid operation, an exit to
> userspace with KVM_EXIT_INTERNAL_ERROR is warranted. Perhaps for
> operations that we suspect KVM might get wrong, we should have a new
> userspace exit: KVM_EXIT_WARNING?
> 
> I'm not saying that you should remove the warning. I'm just asking
> that it be augmented with a direct signal to userspace that KVM may no
> longer be reliable.
>
Jim Mattson May 23, 2022, 5:22 p.m. UTC | #11
On Sun, May 22, 2022 at 11:50 PM Maxim Levitsky <mlevitsk@redhat.com> wrote:
>
> On Sun, 2022-05-22 at 07:47 -0700, Jim Mattson wrote:
> > On Sun, May 22, 2022 at 2:03 AM Maxim Levitsky <mlevitsk@redhat.com> wrote:
> > > On Thu, 2022-05-19 at 16:06 +0000, Sean Christopherson wrote:
> > > > On Wed, Apr 27, 2022, Maxim Levitsky wrote:
> > > > > Neither of these settings should be changed by the guest and it is
> > > > > a burden to support it in the acceleration code, so just inhibit
> > > > > it instead.
> > > > >
> > > > > Also add a boolean 'apic_id_changed' to indicate if apic id ever changed.
> > > > >
> > > > > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> > > > > ---
> > > > > +           return;
> > > > > +
> > > > > +   pr_warn_once("APIC ID change is unsupported by KVM");
> > > >
> > > > It's supported (modulo x2APIC shenanigans), otherwise KVM wouldn't need to disable
> > > > APICv.
> > >
> > > Here, as I said, it would be nice to see that warning if someone complains.
> > > Fact is that AVIC code was totally broken in this regard, and there are probably more,
> > > so it would be nice to see if anybody complains.
> > >
> > > If you insist, I'll remove this warning.
> >
> > This may be fine for a hobbyist, but it's a terrible API in an
> > enterprise environment. To be honest, I have no way of propagating
> > this warning from /var/log/messages on a particular host to a
> > potentially impacted customer. Worse, if they're not the first
> > impacted customer since the last host reboot, there's no warning to
> > propagate. I suppose I could just tell every later customer, "Your VM
> > was scheduled to run on a host that previously reported, 'APIC ID
> > change is unsupported by KVM.' If you notice any unusual behavior,
> > that might be the reason for it," but that isn't going to inspire
> > confidence. I could schedule a drain and reboot of the host, but that
> > defeats the whole point of the "_once" suffix.
>
> Mostly agree, and I read alrady few discussions about exactly this,
> those warnings are mostly useless, but they are used in the
> cases where we don't have the courage to just exit with KVM_EXIT_INTERNAL_ERROR.
>
> I do not thing though that the warning is completely useless,
> as we often have the kernel log of the target machine when things go wrong,
> so *we* can notice it.
> In other words a kernel warning is mostly useless but better that nothing.

I don't know how this works for you, but *we* are rarely involved when
things go wrong. :-(

> About KVM_EXIT_WARNING, this is IMHO a very good idea, probably combined
> with some form of taint flag, which could be read by qemu and then shown
> over hmp/qmp interfaces.
>
> Best regards,
>         Maxim levitsky
>
>
> >
> > I know that there's a long history of doing this in KVM, but I'd like
> > to ask that we:
> > a) stop piling on
> > b) start fixing the existing uses
> >
> > If KVM cannot emulate a perfectly valid operation, an exit to
> > userspace with KVM_EXIT_INTERNAL_ERROR is warranted. Perhaps for
> > operations that we suspect KVM might get wrong, we should have a new
> > userspace exit: KVM_EXIT_WARNING?
> >
> > I'm not saying that you should remove the warning. I'm just asking
> > that it be augmented with a direct signal to userspace that KVM may no
> > longer be reliable.
> >
>
>
Sean Christopherson May 23, 2022, 5:31 p.m. UTC | #12
On Mon, May 23, 2022, Maxim Levitsky wrote:
> On Sun, 2022-05-22 at 07:47 -0700, Jim Mattson wrote:
> > On Sun, May 22, 2022 at 2:03 AM Maxim Levitsky <mlevitsk@redhat.com> wrote:
> > > On Thu, 2022-05-19 at 16:06 +0000, Sean Christopherson wrote:
> > > > On Wed, Apr 27, 2022, Maxim Levitsky wrote:
> > > > > Neither of these settings should be changed by the guest and it is
> > > > > a burden to support it in the acceleration code, so just inhibit
> > > > > it instead.
> > > > > 
> > > > > Also add a boolean 'apic_id_changed' to indicate if apic id ever changed.
> > > > > 
> > > > > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> > > > > ---
> > > > > +           return;
> > > > > +
> > > > > +   pr_warn_once("APIC ID change is unsupported by KVM");
> > > > 
> > > > It's supported (modulo x2APIC shenanigans), otherwise KVM wouldn't need to disable
> > > > APICv.
> > > 
> > > Here, as I said, it would be nice to see that warning if someone complains.
> > > Fact is that AVIC code was totally broken in this regard, and there are probably more,
> > > so it would be nice to see if anybody complains.
> > > 
> > > If you insist, I'll remove this warning.
> > 
> > This may be fine for a hobbyist, but it's a terrible API in an
> > enterprise environment. To be honest, I have no way of propagating
> > this warning from /var/log/messages on a particular host to a
> > potentially impacted customer. Worse, if they're not the first
> > impacted customer since the last host reboot, there's no warning to
> > propagate. I suppose I could just tell every later customer, "Your VM
> > was scheduled to run on a host that previously reported, 'APIC ID
> > change is unsupported by KVM.' If you notice any unusual behavior,
> > that might be the reason for it," but that isn't going to inspire
> > confidence. I could schedule a drain and reboot of the host, but that
> > defeats the whole point of the "_once" suffix.
> 
> Mostly agree, and I read alrady few discussions about exactly this,
> those warnings are mostly useless, but they are used in the
> cases where we don't have the courage to just exit with KVM_EXIT_INTERNAL_ERROR.
> 
> I do not thing though that the warning is completely useless, 
> as we often have the kernel log of the target machine when things go wrong, 
> so *we* can notice it.
> In other words a kernel warning is mostly useless but better that nothing.

IMO, it's worse than doing nothing.  Us developers become desensitized to the
kernel message due to running tests, the existence of these message propagates
the notion that they are a good thing (and we keep rehashing these discussions...),
users may not realize it's a _once() printk and so think they _aren't_ affected
when re-running a workload, etc...

And in this case, "APIC ID change is unsupported by KVM" is partly wrong.  KVM
fully models Intel's behavior where the ID change isn't carried across x2APIC
enabling, the only unsupported behavior is that the guest will lose APICv
acceleration.

> About KVM_EXIT_WARNING, this is IMHO a very good idea, probably combined
> with some form of taint flag, which could be read by qemu and then shown
> over hmp/qmp interfaces.
Maxim Levitsky June 23, 2022, 9:44 a.m. UTC | #13
On Thu, 2022-05-19 at 16:06 +0000, Sean Christopherson wrote:
> On Wed, Apr 27, 2022, Maxim Levitsky wrote:
> > Neither of these settings should be changed by the guest and it is
> > a burden to support it in the acceleration code, so just inhibit
> > it instead.
> > 
> > Also add a boolean 'apic_id_changed' to indicate if apic id ever changed.
> > 
> > Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
> > ---
> >  arch/x86/include/asm/kvm_host.h |  3 +++
> >  arch/x86/kvm/lapic.c            | 25 ++++++++++++++++++++++---
> >  arch/x86/kvm/lapic.h            |  8 ++++++++
> >  3 files changed, 33 insertions(+), 3 deletions(-)
> > 
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index 63eae00625bda..636df87542555 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -1070,6 +1070,8 @@ enum kvm_apicv_inhibit {
> >  	APICV_INHIBIT_REASON_ABSENT,
> >  	/* AVIC is disabled because SEV doesn't support it */
> >  	APICV_INHIBIT_REASON_SEV,
> > +	/* APIC ID and/or APIC base was changed by the guest */
> 
> I don't see any reason to inhibit APICv if the APIC base is changed.  KVM has
> never supported that, and disabling APICv won't "fix" anything.
> 
> Ignoring that is a minor simplification, but also allows for a more intuitive
> name, e.g.
> 
> 	APICV_INHIBIT_REASON_APIC_ID_MODIFIED,
> 
> The inhibit also needs to be added avic_check_apicv_inhibit_reasons() and
> vmx_check_apicv_inhibit_reasons().
> 
> > +	APICV_INHIBIT_REASON_RO_SETTINGS,
> >  };
> >  
> >  struct kvm_arch {
> > @@ -1258,6 +1260,7 @@ struct kvm_arch {
> >  	hpa_t	hv_root_tdp;
> >  	spinlock_t hv_root_tdp_lock;
> >  #endif
> > +	bool apic_id_changed;
> >  };
> >  
> >  struct kvm_vm_stat {
> > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> > index 66b0eb0bda94e..8996675b3ef4c 100644
> > --- a/arch/x86/kvm/lapic.c
> > +++ b/arch/x86/kvm/lapic.c
> > @@ -2038,6 +2038,19 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
> >  	}
> >  }
> >  
> > +static void kvm_lapic_check_initial_apic_id(struct kvm_lapic *apic)
> 
> The "check" part is misleading/confusing.  "check" helpers usually query and return
> state.  I assume you avoided "changed" because the ID may or may not actually be
> changing.  Maybe kvm_apic_id_updated()?  Ah, better idea.  What about
> kvm_lapic_xapic_id_updated()?  See below for reasoning.
> 
> > +{
> > +	if (kvm_apic_has_initial_apic_id(apic))
> 
> Rather than add a single-use helper, invoke the helper from kvm_apic_state_fixup()
> in the !x2APIC path, then this can KVM_BUG_ON() x2APIC to help document that KVM
> should never allow the ID to change for x2APIC.
> 
> > +		return;
> > +
> > +	pr_warn_once("APIC ID change is unsupported by KVM");
> 
> It's supported (modulo x2APIC shenanigans), otherwise KVM wouldn't need to disable
> APICv.
> 
> > +	kvm_set_apicv_inhibit(apic->vcpu->kvm,
> > +			APICV_INHIBIT_REASON_RO_SETTINGS);
> > +
> > +	apic->vcpu->kvm->arch.apic_id_changed = true;
> > +}
> > +
> >  static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
> >  {
> >  	int ret = 0;
> > @@ -2046,9 +2059,11 @@ static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
> >  
> >  	switch (reg) {
> >  	case APIC_ID:		/* Local APIC ID */
> > -		if (!apic_x2apic_mode(apic))
> > +		if (!apic_x2apic_mode(apic)) {
> > +
> 
> Spurious newline.
> 
> >  			kvm_apic_set_xapic_id(apic, val >> 24);
> > -		else
> > +			kvm_lapic_check_initial_apic_id(apic);
> > +		} else
> 
> Needs curly braces for both paths.
> 
> >  			ret = 1;
> >  		break;
> >  
> 
> E.g.
> 
> ---
>  arch/x86/include/asm/kvm_host.h |  1 +
>  arch/x86/kvm/lapic.c            | 21 +++++++++++++++++++--
>  arch/x86/kvm/svm/avic.c         |  3 ++-
>  arch/x86/kvm/vmx/vmx.c          |  3 ++-
>  4 files changed, 24 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index d895d25c5b2f..d888fa1bae77 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1071,6 +1071,7 @@ enum kvm_apicv_inhibit {
>  	APICV_INHIBIT_REASON_BLOCKIRQ,
>  	APICV_INHIBIT_REASON_ABSENT,
>  	APICV_INHIBIT_REASON_SEV,
> +	APICV_INHIBIT_REASON_APIC_ID_MODIFIED,
>  };
> 
>  struct kvm_arch {
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 5fd678c90288..6fe8f20f03d8 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -2039,6 +2039,19 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
>  	}
>  }
> 
> +static void kvm_lapic_xapic_id_updated(struct kvm_lapic *apic)
> +{
> +	struct kvm *kvm = apic->vcpu->kvm;
> +
> +	if (KVM_BUG_ON(apic_x2apic_mode(apic), kvm))
> +		return;
> +
> +	if (kvm_xapic_id(apic) == apic->vcpu->vcpu_id)
> +		return;
> +
> +	kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED);
> +}
> +
>  static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
>  {
>  	int ret = 0;
> @@ -2047,10 +2060,12 @@ static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
> 
>  	switch (reg) {
>  	case APIC_ID:		/* Local APIC ID */
> -		if (!apic_x2apic_mode(apic))
> +		if (!apic_x2apic_mode(apic)) {
>  			kvm_apic_set_xapic_id(apic, val >> 24);
> -		else
> +			kvm_lapic_xapic_id_updated(apic);
> +		} else {
>  			ret = 1;
> +		}
>  		break;
> 
>  	case APIC_TASKPRI:
> @@ -2665,6 +2680,8 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
>  			icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR);
>  			__kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32);
>  		}
> +	} else {
> +		kvm_lapic_xapic_id_updated(vcpu->arch.apic);
>  	}
> 
>  	return 0;
> diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
> index 54fe03714f8a..239c3e8b1f3f 100644
> --- a/arch/x86/kvm/svm/avic.c
> +++ b/arch/x86/kvm/svm/avic.c
> @@ -910,7 +910,8 @@ bool avic_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason)
>  			  BIT(APICV_INHIBIT_REASON_PIT_REINJ) |
>  			  BIT(APICV_INHIBIT_REASON_X2APIC) |
>  			  BIT(APICV_INHIBIT_REASON_BLOCKIRQ) |
> -			  BIT(APICV_INHIBIT_REASON_SEV);
> +			  BIT(APICV_INHIBIT_REASON_SEV) |
> +			  BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED);
> 
>  	return supported & BIT(reason);
>  }
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index b06eafa5884d..941adade21ea 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -7818,7 +7818,8 @@ static bool vmx_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason)
>  	ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
>  			  BIT(APICV_INHIBIT_REASON_ABSENT) |
>  			  BIT(APICV_INHIBIT_REASON_HYPERV) |
> -			  BIT(APICV_INHIBIT_REASON_BLOCKIRQ);
> +			  BIT(APICV_INHIBIT_REASON_BLOCKIRQ) |
> +			  BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED);
> 
>  	return supported & BIT(reason);
>  }
> 
> base-commit: 6ab6e3842d18e4529fa524fb6c668ae8a8bf54f4
> --
> 


Hi Sean!

So, I decided to stop beeing lazy and to understand how KVM actually treats the whole thing:


- kvm_apic_set_xapic_id - called when apic id changes either by guest write,
  cpu reset or x2apic beeing disabled due to write to apic base msr.
  apic register is updated, and apic map is recalculated


- kvm_apic_set_x2apic_id - called only when apic base write (guest or userspace),
  enables x2apic. caller uses vcpu->vcpu_id explicity


- kvm_apic_state_fixup - when apic state is uploaded by userspace, has check
  that check for x2apic api. Also triggers apic map update


- kvm_recalculate_apic_map
  this updates the apic map that we use in IPI emulation.
  - xapic id (aka APIC_ID >> 24) is only used for APICs which are not in xapic mode.
  - x2apic ids (aka vcpu->vcpu_id) are used for all APICs which are in x2apic mode,
    and also (as a hack, when an apic has vcpu_id > 255, even if not in x2apic mode,
    its x2apic id is still put in the map)


Conclusions:

- Practically speaking, when an apic is in x2apic mode, even if userspace uploaded
non standard APIC_ID, it is ignored, and just read back (garbage in - garbage out)

- Non standard APIC ID is lost when switching to x2apic mode.




Best regards,
	Maxim Levitsky



PS: sending this so this info is not lost.

Thankfully my APICv inhibit patch got accepted upstream,
so one issue less to deal with.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 63eae00625bda..636df87542555 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1070,6 +1070,8 @@  enum kvm_apicv_inhibit {
 	APICV_INHIBIT_REASON_ABSENT,
 	/* AVIC is disabled because SEV doesn't support it */
 	APICV_INHIBIT_REASON_SEV,
+	/* APIC ID and/or APIC base was changed by the guest */
+	APICV_INHIBIT_REASON_RO_SETTINGS,
 };
 
 struct kvm_arch {
@@ -1258,6 +1260,7 @@  struct kvm_arch {
 	hpa_t	hv_root_tdp;
 	spinlock_t hv_root_tdp_lock;
 #endif
+	bool apic_id_changed;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 66b0eb0bda94e..8996675b3ef4c 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2038,6 +2038,19 @@  static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
 	}
 }
 
+static void kvm_lapic_check_initial_apic_id(struct kvm_lapic *apic)
+{
+	if (kvm_apic_has_initial_apic_id(apic))
+		return;
+
+	pr_warn_once("APIC ID change is unsupported by KVM");
+
+	kvm_set_apicv_inhibit(apic->vcpu->kvm,
+			APICV_INHIBIT_REASON_RO_SETTINGS);
+
+	apic->vcpu->kvm->arch.apic_id_changed = true;
+}
+
 static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 {
 	int ret = 0;
@@ -2046,9 +2059,11 @@  static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 
 	switch (reg) {
 	case APIC_ID:		/* Local APIC ID */
-		if (!apic_x2apic_mode(apic))
+		if (!apic_x2apic_mode(apic)) {
+
 			kvm_apic_set_xapic_id(apic, val >> 24);
-		else
+			kvm_lapic_check_initial_apic_id(apic);
+		} else
 			ret = 1;
 		break;
 
@@ -2335,8 +2350,11 @@  void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 			     MSR_IA32_APICBASE_BASE;
 
 	if ((value & MSR_IA32_APICBASE_ENABLE) &&
-	     apic->base_address != APIC_DEFAULT_PHYS_BASE)
+	     apic->base_address != APIC_DEFAULT_PHYS_BASE) {
+		kvm_set_apicv_inhibit(apic->vcpu->kvm,
+				APICV_INHIBIT_REASON_RO_SETTINGS);
 		pr_warn_once("APIC base relocation is unsupported by KVM");
+	}
 }
 
 void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
@@ -2649,6 +2667,7 @@  static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
 		}
 	}
 
+	kvm_lapic_check_initial_apic_id(vcpu->arch.apic);
 	return 0;
 }
 
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 4e4f8a22754f9..b9c406d383080 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -252,4 +252,12 @@  static inline u8 kvm_xapic_id(struct kvm_lapic *apic)
 	return kvm_lapic_get_reg(apic, APIC_ID) >> 24;
 }
 
+static inline bool kvm_apic_has_initial_apic_id(struct kvm_lapic *apic)
+{
+	if (apic_x2apic_mode(apic))
+		return true;
+
+	return kvm_xapic_id(apic) == apic->vcpu->vcpu_id;
+}
+
 #endif