diff mbox

[linux,v3,3/9] xen: introduce xen_vcpu_id mapping

Message ID 1469536228-29932-4-git-send-email-vkuznets@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Vitaly Kuznetsov July 26, 2016, 12:30 p.m. UTC
It may happen that Xen's and Linux's ideas of vCPU id diverge. In
particular, when we crash on a secondary vCPU we may want to do kdump
and unlike plain kexec where we do migrate_to_reboot_cpu() we try booting
on the vCPU which crashed. This doesn't work very well for PVHVM guests as
we have a number of hypercalls where we pass vCPU id as a parameter. These
hypercalls either fail or do something unexpected. To solve the issue
introduce percpu xen_vcpu_id mapping. ARM and PV guests get direct mapping
for now. Boot CPU for PVHVM guest gets its id from CPUID. With secondary
CPUs it is a bit more trickier. Currently, we initialize IPI vectors
before these CPUs boot so we can't use CPUID. Use ACPI ids from MADT
instead.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
Changes since v2:
- Use uint32_t for xen_vcpu_id mapping [Julien Grall]

Changes since v1:
- Introduce xen_vcpu_nr() helper [David Vrabel]
- Use ACPI ids instead of vLAPIC ids /2 [Andrew Cooper, Jan Beulich]
---
 arch/arm/xen/enlighten.c | 10 ++++++++++
 arch/x86/xen/enlighten.c | 23 ++++++++++++++++++++++-
 include/xen/xen-ops.h    |  6 ++++++
 3 files changed, 38 insertions(+), 1 deletion(-)

Comments

Julien Grall Sept. 2, 2016, 3:29 p.m. UTC | #1
Hi Vitaly,

On 26/07/16 13:30, Vitaly Kuznetsov wrote:
> It may happen that Xen's and Linux's ideas of vCPU id diverge. In
> particular, when we crash on a secondary vCPU we may want to do kdump
> and unlike plain kexec where we do migrate_to_reboot_cpu() we try booting
> on the vCPU which crashed. This doesn't work very well for PVHVM guests as
> we have a number of hypercalls where we pass vCPU id as a parameter. These
> hypercalls either fail or do something unexpected. To solve the issue
> introduce percpu xen_vcpu_id mapping. ARM and PV guests get direct mapping
> for now. Boot CPU for PVHVM guest gets its id from CPUID. With secondary
> CPUs it is a bit more trickier. Currently, we initialize IPI vectors
> before these CPUs boot so we can't use CPUID. Use ACPI ids from MADT
> instead.
>
> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
> ---
> Changes since v2:
> - Use uint32_t for xen_vcpu_id mapping [Julien Grall]
>
> Changes since v1:
> - Introduce xen_vcpu_nr() helper [David Vrabel]
> - Use ACPI ids instead of vLAPIC ids /2 [Andrew Cooper, Jan Beulich]
> ---
>  arch/arm/xen/enlighten.c | 10 ++++++++++
>  arch/x86/xen/enlighten.c | 23 ++++++++++++++++++++++-
>  include/xen/xen-ops.h    |  6 ++++++
>  3 files changed, 38 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
> index 75cd734..fe32267 100644
> --- a/arch/arm/xen/enlighten.c
> +++ b/arch/arm/xen/enlighten.c
> @@ -46,6 +46,10 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info;
>  DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
>  static struct vcpu_info __percpu *xen_vcpu_info;
>
> +/* Linux <-> Xen vCPU id mapping */
> +DEFINE_PER_CPU(uint32_t, xen_vcpu_id) = U32_MAX;
> +EXPORT_PER_CPU_SYMBOL(xen_vcpu_id);
> +
>  /* These are unused until we support booting "pre-ballooned" */
>  unsigned long xen_released_pages;
>  struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
> @@ -179,6 +183,9 @@ static void xen_percpu_init(void)
>  	pr_info("Xen: initializing cpu%d\n", cpu);
>  	vcpup = per_cpu_ptr(xen_vcpu_info, cpu);
>
> +	/* Direct vCPU id mapping for ARM guests. */
> +	per_cpu(xen_vcpu_id, cpu) = cpu;
> +

We did some internal testing on ARM64 with the latest Linux kernel 
(4.8-rc4) and noticed that this patch is breaking SMP support. Sorry for 
noticing the issue that late.

This function is called on the running CPU whilst some code (e.g 
init_control_block in drivers/xen/events/events_fifo.c) is executed 
whilst preparing the CPU on the boot CPU.

So xen_vcpu_nr(cpu) will always return 0 in this case and 
init_control_block will fail to execute.

I am not sure how to fix. I guess we could setup per_cpu(xen_vcpu_id, *) 
in xen_guest_init. Any opinions?

[...]

> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
> index 0f87db2..c833912 100644
> --- a/arch/x86/xen/enlighten.c
> +++ b/arch/x86/xen/enlighten.c
> @@ -1795,6 +1806,12 @@ static void __init init_hvm_pv_info(void)
>
>  	xen_setup_features();
>
> +	cpuid(base + 4, &eax, &ebx, &ecx, &edx);
> +	if (eax & XEN_HVM_CPUID_VCPU_ID_PRESENT)
> +		this_cpu_write(xen_vcpu_id, ebx);
> +	else
> +		this_cpu_write(xen_vcpu_id, smp_processor_id());
> +
>  	pv_info.name = "Xen HVM";
>
>  	xen_domain_type = XEN_HVM_DOMAIN;
> @@ -1806,6 +1823,10 @@ static int xen_hvm_cpu_notify(struct notifier_block *self, unsigned long action,
>  	int cpu = (long)hcpu;
>  	switch (action) {
>  	case CPU_UP_PREPARE:
> +		if (cpu_acpi_id(cpu) != U32_MAX)
> +			per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu);
> +		else
> +			per_cpu(xen_vcpu_id, cpu) = cpu;

I have not tried myself. But looking at the code, the notifiers 
xen_hvm_cpu_notifier and evtchn_fifo_cpu_notifier have the same 
priority. So what does prevent the code above to be executed after the 
event channel callback?

>  		xen_vcpu_setup(cpu);
>  		if (xen_have_vector_callback) {
>  			if (xen_feature(XENFEAT_hvm_safe_pvclock))
> diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
> index 86abe07..648ce814 100644
> --- a/include/xen/xen-ops.h
> +++ b/include/xen/xen-ops.h
> @@ -9,6 +9,12 @@
>
>  DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
>
> +DECLARE_PER_CPU(uint32_t, xen_vcpu_id);
> +static inline int xen_vcpu_nr(int cpu)
> +{
> +	return per_cpu(xen_vcpu_id, cpu);
> +}
> +
>  void xen_arch_pre_suspend(void);
>  void xen_arch_post_suspend(int suspend_cancelled);

Regards,
Boris Ostrovsky Sept. 4, 2016, 9:12 p.m. UTC | #2
On 09/02/2016 11:29 AM, Julien Grall wrote:
>
>> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
>> index 0f87db2..c833912 100644
>> --- a/arch/x86/xen/enlighten.c
>> +++ b/arch/x86/xen/enlighten.c
>> @@ -1795,6 +1806,12 @@ static void __init init_hvm_pv_info(void)
>>
>>      xen_setup_features();
>>
>> +    cpuid(base + 4, &eax, &ebx, &ecx, &edx);
>> +    if (eax & XEN_HVM_CPUID_VCPU_ID_PRESENT)
>> +        this_cpu_write(xen_vcpu_id, ebx);
>> +    else
>> +        this_cpu_write(xen_vcpu_id, smp_processor_id());
>> +
>>      pv_info.name = "Xen HVM";
>>
>>      xen_domain_type = XEN_HVM_DOMAIN;
>> @@ -1806,6 +1823,10 @@ static int xen_hvm_cpu_notify(struct
>> notifier_block *self, unsigned long action,
>>      int cpu = (long)hcpu;
>>      switch (action) {
>>      case CPU_UP_PREPARE:
>> +        if (cpu_acpi_id(cpu) != U32_MAX)
>> +            per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu);
>> +        else
>> +            per_cpu(xen_vcpu_id, cpu) = cpu;
>
> I have not tried myself. But looking at the code, the notifiers
> xen_hvm_cpu_notifier and evtchn_fifo_cpu_notifier have the same
> priority. So what does prevent the code above to be executed after the
> event channel callback?


We will be converting to new hotplug state machine where this order will
be guaranteed:

 https://lists.xenproject.org/archives/html/xen-devel/2016-08/msg01914.html


-boris
diff mbox

Patch

diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 75cd734..fe32267 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -46,6 +46,10 @@  struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info;
 DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
 static struct vcpu_info __percpu *xen_vcpu_info;
 
+/* Linux <-> Xen vCPU id mapping */
+DEFINE_PER_CPU(uint32_t, xen_vcpu_id) = U32_MAX;
+EXPORT_PER_CPU_SYMBOL(xen_vcpu_id);
+
 /* These are unused until we support booting "pre-ballooned" */
 unsigned long xen_released_pages;
 struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
@@ -179,6 +183,9 @@  static void xen_percpu_init(void)
 	pr_info("Xen: initializing cpu%d\n", cpu);
 	vcpup = per_cpu_ptr(xen_vcpu_info, cpu);
 
+	/* Direct vCPU id mapping for ARM guests. */
+	per_cpu(xen_vcpu_id, cpu) = cpu;
+
 	info.mfn = virt_to_gfn(vcpup);
 	info.offset = xen_offset_in_page(vcpup);
 
@@ -328,6 +335,9 @@  static int __init xen_guest_init(void)
 	if (xen_vcpu_info == NULL)
 		return -ENOMEM;
 
+	/* Direct vCPU id mapping for ARM guests. */
+	per_cpu(xen_vcpu_id, 0) = 0;
+
 	if (gnttab_setup_auto_xlat_frames(grant_frames)) {
 		free_percpu(xen_vcpu_info);
 		return -ENOMEM;
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 0f87db2..c833912 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -59,6 +59,7 @@ 
 #include <asm/xen/pci.h>
 #include <asm/xen/hypercall.h>
 #include <asm/xen/hypervisor.h>
+#include <asm/xen/cpuid.h>
 #include <asm/fixmap.h>
 #include <asm/processor.h>
 #include <asm/proto.h>
@@ -118,6 +119,10 @@  DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
  */
 DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
 
+/* Linux <-> Xen vCPU id mapping */
+DEFINE_PER_CPU(uint32_t, xen_vcpu_id) = U32_MAX;
+EXPORT_PER_CPU_SYMBOL(xen_vcpu_id);
+
 enum xen_domain_type xen_domain_type = XEN_NATIVE;
 EXPORT_SYMBOL_GPL(xen_domain_type);
 
@@ -1135,8 +1140,11 @@  void xen_setup_vcpu_info_placement(void)
 {
 	int cpu;
 
-	for_each_possible_cpu(cpu)
+	for_each_possible_cpu(cpu) {
+		/* Set up direct vCPU id mapping for PV guests. */
+		per_cpu(xen_vcpu_id, cpu) = cpu;
 		xen_vcpu_setup(cpu);
+	}
 
 	/* xen_vcpu_setup managed to place the vcpu_info within the
 	 * percpu area for all cpus, so make use of it. Note that for
@@ -1727,6 +1735,9 @@  asmlinkage __visible void __init xen_start_kernel(void)
 #endif
 	xen_raw_console_write("about to get started...\n");
 
+	/* Let's presume PV guests always boot on vCPU with id 0. */
+	per_cpu(xen_vcpu_id, 0) = 0;
+
 	xen_setup_runstate_info(0);
 
 	xen_efi_init();
@@ -1795,6 +1806,12 @@  static void __init init_hvm_pv_info(void)
 
 	xen_setup_features();
 
+	cpuid(base + 4, &eax, &ebx, &ecx, &edx);
+	if (eax & XEN_HVM_CPUID_VCPU_ID_PRESENT)
+		this_cpu_write(xen_vcpu_id, ebx);
+	else
+		this_cpu_write(xen_vcpu_id, smp_processor_id());
+
 	pv_info.name = "Xen HVM";
 
 	xen_domain_type = XEN_HVM_DOMAIN;
@@ -1806,6 +1823,10 @@  static int xen_hvm_cpu_notify(struct notifier_block *self, unsigned long action,
 	int cpu = (long)hcpu;
 	switch (action) {
 	case CPU_UP_PREPARE:
+		if (cpu_acpi_id(cpu) != U32_MAX)
+			per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu);
+		else
+			per_cpu(xen_vcpu_id, cpu) = cpu;
 		xen_vcpu_setup(cpu);
 		if (xen_have_vector_callback) {
 			if (xen_feature(XENFEAT_hvm_safe_pvclock))
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index 86abe07..648ce814 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -9,6 +9,12 @@ 
 
 DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
 
+DECLARE_PER_CPU(uint32_t, xen_vcpu_id);
+static inline int xen_vcpu_nr(int cpu)
+{
+	return per_cpu(xen_vcpu_id, cpu);
+}
+
 void xen_arch_pre_suspend(void);
 void xen_arch_post_suspend(int suspend_cancelled);