diff mbox

[RFC,7/9] x86/SVM: Add vcpu scheduling support for AVIC

Message ID 1474264368-4104-8-git-send-email-suravee.suthikulpanit@amd.com (mailing list archive)
State New, archived
Headers show

Commit Message

Suravee Suthikulpanit Sept. 19, 2016, 5:52 a.m. UTC
Add hooks to manage AVIC data structure during vcpu scheduling.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
---
 xen/arch/x86/hvm/svm/avic.c | 82 +++++++++++++++++++++++++++++++++++++++++++++
 xen/arch/x86/hvm/svm/svm.c  | 10 ++++++
 2 files changed, 92 insertions(+)

Comments

Konrad Rzeszutek Wilk Oct. 14, 2016, 3:31 p.m. UTC | #1
On Mon, Sep 19, 2016 at 12:52:46AM -0500, Suravee Suthikulpanit wrote:
> Add hooks to manage AVIC data structure during vcpu scheduling.
> 
> Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
> ---
>  xen/arch/x86/hvm/svm/avic.c | 82 +++++++++++++++++++++++++++++++++++++++++++++
>  xen/arch/x86/hvm/svm/svm.c  | 10 ++++++
>  2 files changed, 92 insertions(+)
> 
> diff --git a/xen/arch/x86/hvm/svm/avic.c b/xen/arch/x86/hvm/svm/avic.c
> index 90df181..cd8a9d4 100644
> --- a/xen/arch/x86/hvm/svm/avic.c
> +++ b/xen/arch/x86/hvm/svm/avic.c
> @@ -45,6 +45,83 @@ avic_get_phy_ait_entry(struct vcpu *v, int index)
>  }
>  
>  /***************************************************************
> + * AVIC VCPU SCHEDULING
> + */
> +static void avic_vcpu_load(struct vcpu *v)
> +{
> +    struct arch_svm_struct *s = &v->arch.hvm_svm;
> +    int h_phy_apic_id;
> +    struct svm_avic_phy_ait_entry entry;
> +
> +    if ( !svm_avic || !s->avic_phy_id_cache )
> +        return;
> +
> +    if ( test_bit(_VPF_blocked, &v->pause_flags) )
> +        return;
> +
> +    /* Note: APIC ID = 0xff is used for broadcast.

Please put the Note: .. on a newline.

So you have it like so:

/*
 * Note: ...


> +     *       APIC ID > 0xff is reserved.
> +     */
> +    h_phy_apic_id = cpu_data[v->processor].apicid;
> +    if ( h_phy_apic_id >= AVIC_PHY_APIC_ID_MAX )

What does that mean to the guest? I presume it means it will always
get an VMEXIT b/c the is_running is not set?

Meaning whatever guest ends up unhappily on an physical CPU with
the APIC ID of 255 is screwed :-(?

Perhaps at bootup time when SVM AVIC is initialized we have a check
for APIC ID of 255 and put a blurb saying:

"You have CPU%u with APIC ID 255. That value for SVM AVIC is reserved
which has the unfortunate consequence that AVIC is disabled on CPU%u."

So that the admin can perhaps schedule/pin dom0 on that CPU?


> +        return;
> +
> +    entry = *(s->avic_phy_id_cache);

Perhaps instead of '_cache' say '_last' ?
Like 'avic_last_phy_id'?

> +    smp_rmb();
> +    entry.host_phy_apic_id = h_phy_apic_id;
> +    entry.is_running = 1;
> +    *(s->avic_phy_id_cache) = entry;
> +    smp_wmb();
> +}
> +
> +static void avic_vcpu_put(struct vcpu *v)
> +{
> +    struct arch_svm_struct *s = &v->arch.hvm_svm;
> +    struct svm_avic_phy_ait_entry entry;
> +
> +    if ( !svm_avic || !s->avic_phy_id_cache )
> +        return;
> +
> +    entry = *(s->avic_phy_id_cache);
> +    smp_rmb();
> +    entry.is_running = 0;
> +    *(s->avic_phy_id_cache) = entry;
> +    smp_wmb();
> +}
> +
> +static void avic_vcpu_resume(struct vcpu *v)
> +{
> +    struct svm_avic_phy_ait_entry entry;
> +    struct arch_svm_struct *s = &v->arch.hvm_svm;
> +
> +    if ( !svm_avic_vcpu_enabled(v) || !s->avic_phy_id_cache )
> +        return;
> +
> +    ASSERT(!test_bit(_VPF_blocked, &v->pause_flags));
> +
> +    entry = *(s->avic_phy_id_cache);
> +    smp_rmb();
> +    entry.is_running = 1;
> +    *(s->avic_phy_id_cache)= entry;
                              ^
Please add a space here.

> +    smp_wmb();
> +}
> +
> +static void avic_vcpu_block(struct vcpu *v)
> +{
> +    struct svm_avic_phy_ait_entry entry;
> +    struct arch_svm_struct *s = &v->arch.hvm_svm;
> +
> +    if ( !svm_avic_vcpu_enabled(v) || !s->avic_phy_id_cache )
> +        return;
> +

Should there be a corresponding ASSERT? Or is that
done after these hooks are called?

> +    entry = *(s->avic_phy_id_cache);
> +    smp_rmb();
> +    entry.is_running = 0;
> +    *(s->avic_phy_id_cache) = entry;
> +    smp_wmb();
> +}
> +
> +/***************************************************************
>   * AVIC APIs
>   */
>  int svm_avic_dom_init(struct domain *d)
> @@ -97,6 +174,11 @@ int svm_avic_dom_init(struct domain *d)
>      clear_domain_page(_mfn(mfn));
>      d->arch.hvm_domain.svm.avic_phy_ait_mfn = mfn;
>  
> +    d->arch.hvm_domain.pi_ops.pi_switch_to = avic_vcpu_put;
> +    d->arch.hvm_domain.pi_ops.pi_switch_from = avic_vcpu_load;
> +    d->arch.hvm_domain.pi_ops.vcpu_block = avic_vcpu_block;
> +    d->arch.hvm_domain.pi_ops.pi_do_resume = avic_vcpu_resume;
> +
>      return ret;
>  err_out:
>      svm_avic_dom_destroy(d);
> diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
> index 409096a..aafbfa1 100644
> --- a/xen/arch/x86/hvm/svm/svm.c
> +++ b/xen/arch/x86/hvm/svm/svm.c
> @@ -1011,6 +1011,10 @@ static void svm_ctxt_switch_from(struct vcpu *v)
>      svm_tsc_ratio_save(v);
>  
>      svm_sync_vmcb(v);
> +
> +    if ( v->domain->arch.hvm_domain.pi_ops.pi_switch_from )
> +        v->domain->arch.hvm_domain.pi_ops.pi_switch_from(v);
> +
>      svm_vmload(per_cpu(root_vmcb, cpu));
>  
>      /* Resume use of ISTs now that the host TR is reinstated. */
> @@ -1050,6 +1054,9 @@ static void svm_ctxt_switch_to(struct vcpu *v)
>      svm_lwp_load(v);
>      svm_tsc_ratio_load(v);
>  
> +    if ( v->domain->arch.hvm_domain.pi_ops.pi_switch_to )
> +        v->domain->arch.hvm_domain.pi_ops.pi_switch_to(v);
> +
>      if ( cpu_has_rdtscp )
>          wrmsrl(MSR_TSC_AUX, hvm_msr_tsc_aux(v));
>  }
> @@ -1095,6 +1102,9 @@ static void noreturn svm_do_resume(struct vcpu *v)
>          vmcb_set_vintr(vmcb, intr);
>      }
>  
> +    if ( v->domain->arch.hvm_domain.pi_ops.pi_do_resume )
> +        v->domain->arch.hvm_domain.pi_ops.pi_do_resume(v);
> +
>      hvm_do_resume(v);
>  
>      reset_stack_and_jump(svm_asm_do_resume);
> -- 
> 1.9.1
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> https://lists.xen.org/xen-devel
Jan Beulich Oct. 24, 2016, 11:19 a.m. UTC | #2
>>> On 14.10.16 at 17:31, <konrad.wilk@oracle.com> wrote:
> On Mon, Sep 19, 2016 at 12:52:46AM -0500, Suravee Suthikulpanit wrote:
>> +     *       APIC ID > 0xff is reserved.
>> +     */
>> +    h_phy_apic_id = cpu_data[v->processor].apicid;
>> +    if ( h_phy_apic_id >= AVIC_PHY_APIC_ID_MAX )
> 
> What does that mean to the guest? I presume it means it will always
> get an VMEXIT b/c the is_running is not set?
> 
> Meaning whatever guest ends up unhappily on an physical CPU with
> the APIC ID of 255 is screwed :-(?

Such a CPU would be unusable - 0xFF is the "broadcast" destination
specifier.

Jan
Jan Beulich Dec. 22, 2016, 11:32 a.m. UTC | #3
>>> On 19.09.16 at 07:52, <suravee.suthikulpanit@amd.com> wrote:
> --- a/xen/arch/x86/hvm/svm/avic.c
> +++ b/xen/arch/x86/hvm/svm/avic.c
> @@ -45,6 +45,83 @@ avic_get_phy_ait_entry(struct vcpu *v, int index)
>  }
>  
>  /***************************************************************
> + * AVIC VCPU SCHEDULING
> + */
> +static void avic_vcpu_load(struct vcpu *v)
> +{
> +    struct arch_svm_struct *s = &v->arch.hvm_svm;
> +    int h_phy_apic_id;
> +    struct svm_avic_phy_ait_entry entry;
> +
> +    if ( !svm_avic || !s->avic_phy_id_cache )

Instead of adding checks of svm_avic inside the functions, please don't
install them in the first place when !svm_avic.

> +        return;
> +
> +    if ( test_bit(_VPF_blocked, &v->pause_flags) )
> +        return;
> +
> +    /* Note: APIC ID = 0xff is used for broadcast.
> +     *       APIC ID > 0xff is reserved.
> +     */
> +    h_phy_apic_id = cpu_data[v->processor].apicid;
> +    if ( h_phy_apic_id >= AVIC_PHY_APIC_ID_MAX )
> +        return;

Wouldn't such better be an ASSERT(), if at all? Without x2APIC
support I can't see how such a processor could be in use.

> +    entry = *(s->avic_phy_id_cache);
> +    smp_rmb();
> +    entry.host_phy_apic_id = h_phy_apic_id;
> +    entry.is_running = 1;
> +    *(s->avic_phy_id_cache) = entry;
> +    smp_wmb();
> +}
> +
> +static void avic_vcpu_put(struct vcpu *v)

May I recommend giving this and its counterpart a consistent pair of
names (put not really being the opposite of load)?

Jan
diff mbox

Patch

diff --git a/xen/arch/x86/hvm/svm/avic.c b/xen/arch/x86/hvm/svm/avic.c
index 90df181..cd8a9d4 100644
--- a/xen/arch/x86/hvm/svm/avic.c
+++ b/xen/arch/x86/hvm/svm/avic.c
@@ -45,6 +45,83 @@  avic_get_phy_ait_entry(struct vcpu *v, int index)
 }
 
 /***************************************************************
+ * AVIC VCPU SCHEDULING
+ */
+static void avic_vcpu_load(struct vcpu *v)
+{
+    struct arch_svm_struct *s = &v->arch.hvm_svm;
+    int h_phy_apic_id;
+    struct svm_avic_phy_ait_entry entry;
+
+    if ( !svm_avic || !s->avic_phy_id_cache )
+        return;
+
+    if ( test_bit(_VPF_blocked, &v->pause_flags) )
+        return;
+
+    /* Note: APIC ID = 0xff is used for broadcast.
+     *       APIC ID > 0xff is reserved.
+     */
+    h_phy_apic_id = cpu_data[v->processor].apicid;
+    if ( h_phy_apic_id >= AVIC_PHY_APIC_ID_MAX )
+        return;
+
+    entry = *(s->avic_phy_id_cache);
+    smp_rmb();
+    entry.host_phy_apic_id = h_phy_apic_id;
+    entry.is_running = 1;
+    *(s->avic_phy_id_cache) = entry;
+    smp_wmb();
+}
+
+static void avic_vcpu_put(struct vcpu *v)
+{
+    struct arch_svm_struct *s = &v->arch.hvm_svm;
+    struct svm_avic_phy_ait_entry entry;
+
+    if ( !svm_avic || !s->avic_phy_id_cache )
+        return;
+
+    entry = *(s->avic_phy_id_cache);
+    smp_rmb();
+    entry.is_running = 0;
+    *(s->avic_phy_id_cache) = entry;
+    smp_wmb();
+}
+
+static void avic_vcpu_resume(struct vcpu *v)
+{
+    struct svm_avic_phy_ait_entry entry;
+    struct arch_svm_struct *s = &v->arch.hvm_svm;
+
+    if ( !svm_avic_vcpu_enabled(v) || !s->avic_phy_id_cache )
+        return;
+
+    ASSERT(!test_bit(_VPF_blocked, &v->pause_flags));
+
+    entry = *(s->avic_phy_id_cache);
+    smp_rmb();
+    entry.is_running = 1;
+    *(s->avic_phy_id_cache)= entry;
+    smp_wmb();
+}
+
+static void avic_vcpu_block(struct vcpu *v)
+{
+    struct svm_avic_phy_ait_entry entry;
+    struct arch_svm_struct *s = &v->arch.hvm_svm;
+
+    if ( !svm_avic_vcpu_enabled(v) || !s->avic_phy_id_cache )
+        return;
+
+    entry = *(s->avic_phy_id_cache);
+    smp_rmb();
+    entry.is_running = 0;
+    *(s->avic_phy_id_cache) = entry;
+    smp_wmb();
+}
+
+/***************************************************************
  * AVIC APIs
  */
 int svm_avic_dom_init(struct domain *d)
@@ -97,6 +174,11 @@  int svm_avic_dom_init(struct domain *d)
     clear_domain_page(_mfn(mfn));
     d->arch.hvm_domain.svm.avic_phy_ait_mfn = mfn;
 
+    d->arch.hvm_domain.pi_ops.pi_switch_to = avic_vcpu_put;
+    d->arch.hvm_domain.pi_ops.pi_switch_from = avic_vcpu_load;
+    d->arch.hvm_domain.pi_ops.vcpu_block = avic_vcpu_block;
+    d->arch.hvm_domain.pi_ops.pi_do_resume = avic_vcpu_resume;
+
     return ret;
 err_out:
     svm_avic_dom_destroy(d);
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 409096a..aafbfa1 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -1011,6 +1011,10 @@  static void svm_ctxt_switch_from(struct vcpu *v)
     svm_tsc_ratio_save(v);
 
     svm_sync_vmcb(v);
+
+    if ( v->domain->arch.hvm_domain.pi_ops.pi_switch_from )
+        v->domain->arch.hvm_domain.pi_ops.pi_switch_from(v);
+
     svm_vmload(per_cpu(root_vmcb, cpu));
 
     /* Resume use of ISTs now that the host TR is reinstated. */
@@ -1050,6 +1054,9 @@  static void svm_ctxt_switch_to(struct vcpu *v)
     svm_lwp_load(v);
     svm_tsc_ratio_load(v);
 
+    if ( v->domain->arch.hvm_domain.pi_ops.pi_switch_to )
+        v->domain->arch.hvm_domain.pi_ops.pi_switch_to(v);
+
     if ( cpu_has_rdtscp )
         wrmsrl(MSR_TSC_AUX, hvm_msr_tsc_aux(v));
 }
@@ -1095,6 +1102,9 @@  static void noreturn svm_do_resume(struct vcpu *v)
         vmcb_set_vintr(vmcb, intr);
     }
 
+    if ( v->domain->arch.hvm_domain.pi_ops.pi_do_resume )
+        v->domain->arch.hvm_domain.pi_ops.pi_do_resume(v);
+
     hvm_do_resume(v);
 
     reset_stack_and_jump(svm_asm_do_resume);