Message ID | 15b9b94e1acb70ba713391de5bae42a622c29747.1726746877.git.oleksii.kurochko@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Move percpu code to common | expand |
Hi, On 19/09/2024 17:59, Oleksii Kurochko wrote: > Centralize per-cpu area management to reduce code duplication and > enhance maintainability across architectures. > > The per-cpu area management code, which is largely common among > architectures, is moved to a shared implementation in > xen/common/percpu.c. This change includes: > * Add arch_percpu_area_init_status() and arch_cpu_percpu_callback() > functions to address architecture-specific variations. > * Remove percpu.c from the Arm architecture. > * For x86, percpu.c now only defines arch_percpu_area_init_status() > and arch_cpu_percpu_callback(), and INVALID_PERCPU_AREA. > * Drop the declaration of __per_cpu_offset[] from stubs.c in > PPC and RISC-V to facilitate the build of the common per-cpu code. > > No functional changes. > > Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com> > --- > xen/arch/arm/Makefile | 1 - > xen/arch/arm/percpu.c | 85 -------------------- > xen/arch/ppc/stubs.c | 1 - > xen/arch/riscv/stubs.c | 1 - > xen/arch/x86/include/asm/Makefile | 1 - > xen/arch/x86/include/asm/percpu.h | 18 +++++ > xen/arch/x86/percpu.c | 92 ++-------------------- > xen/common/Makefile | 1 + > xen/common/percpu.c | 127 ++++++++++++++++++++++++++++++ > xen/include/asm-generic/percpu.h | 8 ++ > 10 files changed, 159 insertions(+), 176 deletions(-) > delete mode 100644 xen/arch/arm/percpu.c > create mode 100644 xen/arch/x86/include/asm/percpu.h > create mode 100644 xen/common/percpu.c > > diff --git a/xen/arch/arm/Makefile b/xen/arch/arm/Makefile > index 7792bff597..e4ad1ce851 100644 > --- a/xen/arch/arm/Makefile > +++ b/xen/arch/arm/Makefile > @@ -39,7 +39,6 @@ obj-$(CONFIG_MEM_ACCESS) += mem_access.o > obj-y += mm.o > obj-y += monitor.o > obj-y += p2m.o > -obj-y += percpu.o > obj-y += platform.o > obj-y += platform_hypercall.o > obj-y += physdev.o > diff --git a/xen/arch/arm/percpu.c b/xen/arch/arm/percpu.c > deleted file mode 100644 > index 87fe960330..0000000000 > --- a/xen/arch/arm/percpu.c > +++ /dev/null > @@ -1,85 +0,0 @@ > -/* SPDX-License-Identifier: GPL-2.0 */ > -#include <xen/percpu.h> > -#include <xen/cpu.h> > -#include <xen/init.h> > -#include <xen/mm.h> > -#include <xen/rcupdate.h> > - > -unsigned long __per_cpu_offset[NR_CPUS]; > -#define INVALID_PERCPU_AREA (-(long)__per_cpu_start) > -#define PERCPU_ORDER (get_order_from_bytes(__per_cpu_data_end-__per_cpu_start)) > - > -void __init percpu_init_areas(void) > -{ > - unsigned int cpu; > - for ( cpu = 1; cpu < NR_CPUS; cpu++ ) > - __per_cpu_offset[cpu] = INVALID_PERCPU_AREA; > -} > - > -static int init_percpu_area(unsigned int cpu) > -{ > - char *p; > - if ( __per_cpu_offset[cpu] != INVALID_PERCPU_AREA ) > - return -EBUSY; > - if ( (p = alloc_xenheap_pages(PERCPU_ORDER, 0)) == NULL ) > - return -ENOMEM; > - memset(p, 0, __per_cpu_data_end - __per_cpu_start); > - __per_cpu_offset[cpu] = p - __per_cpu_start; > - return 0; > -} > - > -struct free_info { > - unsigned int cpu; > - struct rcu_head rcu; > -}; > -static DEFINE_PER_CPU(struct free_info, free_info); > - > -static void _free_percpu_area(struct rcu_head *head) > -{ > - struct free_info *info = container_of(head, struct free_info, rcu); > - unsigned int cpu = info->cpu; > - char *p = __per_cpu_start + __per_cpu_offset[cpu]; > - free_xenheap_pages(p, PERCPU_ORDER); > - __per_cpu_offset[cpu] = INVALID_PERCPU_AREA; > -} > - > -static void free_percpu_area(unsigned int cpu) > -{ > - struct free_info *info = &per_cpu(free_info, cpu); > - info->cpu = cpu; > - call_rcu(&info->rcu, _free_percpu_area); > -} > - > -static int cpu_percpu_callback( > - struct notifier_block *nfb, unsigned long action, void *hcpu) > -{ > - unsigned int cpu = (unsigned long)hcpu; > - int rc = 0; > - > - switch ( action ) > - { > - case CPU_UP_PREPARE: > - rc = init_percpu_area(cpu); > - break; > - case CPU_UP_CANCELED: > - case CPU_DEAD: > - free_percpu_area(cpu); > - break; > - default: > - break; > - } > - > - return notifier_from_errno(rc); > -} > - > -static struct notifier_block cpu_percpu_nfb = { > - .notifier_call = cpu_percpu_callback, > - .priority = 100 /* highest priority */ > -}; > - > -static int __init percpu_presmp_init(void) > -{ > - register_cpu_notifier(&cpu_percpu_nfb); > - return 0; > -} > -presmp_initcall(percpu_presmp_init); > diff --git a/xen/arch/ppc/stubs.c b/xen/arch/ppc/stubs.c > index bdb5f8c66d..fff82f5cf3 100644 > --- a/xen/arch/ppc/stubs.c > +++ b/xen/arch/ppc/stubs.c > @@ -141,7 +141,6 @@ void smp_send_state_dump(unsigned int cpu) > /* domain.c */ > > DEFINE_PER_CPU(struct vcpu *, curr_vcpu); > -unsigned long __per_cpu_offset[NR_CPUS]; > > void context_switch(struct vcpu *prev, struct vcpu *next) > { > diff --git a/xen/arch/riscv/stubs.c b/xen/arch/riscv/stubs.c > index 2aa245f272..5951b0ce91 100644 > --- a/xen/arch/riscv/stubs.c > +++ b/xen/arch/riscv/stubs.c > @@ -133,7 +133,6 @@ void smp_send_state_dump(unsigned int cpu) > /* domain.c */ > > DEFINE_PER_CPU(struct vcpu *, curr_vcpu); > -unsigned long __per_cpu_offset[NR_CPUS]; > > void context_switch(struct vcpu *prev, struct vcpu *next) > { > diff --git a/xen/arch/x86/include/asm/Makefile b/xen/arch/x86/include/asm/Makefile > index daab34ff0a..2c27787d31 100644 > --- a/xen/arch/x86/include/asm/Makefile > +++ b/xen/arch/x86/include/asm/Makefile > @@ -1,3 +1,2 @@ > # SPDX-License-Identifier: GPL-2.0-only > generic-y += div64.h > -generic-y += percpu.h > diff --git a/xen/arch/x86/include/asm/percpu.h b/xen/arch/x86/include/asm/percpu.h > new file mode 100644 > index 0000000000..0f9efba27a > --- /dev/null > +++ b/xen/arch/x86/include/asm/percpu.h > @@ -0,0 +1,18 @@ > +#ifndef __X86_PERCPU_H__ > +#define __X86_PERCPU_H__ > + > +#include <asm-generic/percpu.h> > + > +/* > + * Force uses of per_cpu() with an invalid area to attempt to access the > + * middle of the non-canonical address space resulting in a #GP, rather than a > + * possible #PF at (NULL + a little) which has security implications in the > + * context of PV guests. > + */ > +#define INVALID_PERCPU_AREA (0x8000000000000000UL - (unsigned long)__per_cpu_start) > + > +#define ARCH_PERCPU_AREA_CHECK > + > +#define ARCH_CPU_PERCPU_CALLBACK > + > +#endif /* __X86_PERCPU_H__ */ > diff --git a/xen/arch/x86/percpu.c b/xen/arch/x86/percpu.c > index 3205eacea6..33bded8cac 100644 > --- a/xen/arch/x86/percpu.c > +++ b/xen/arch/x86/percpu.c > @@ -1,79 +1,19 @@ > -#include <xen/percpu.h> > #include <xen/cpu.h> > -#include <xen/init.h> > -#include <xen/mm.h> > -#include <xen/rcupdate.h> > - > -unsigned long __per_cpu_offset[NR_CPUS]; > - > -/* > - * Force uses of per_cpu() with an invalid area to attempt to access the > - * middle of the non-canonical address space resulting in a #GP, rather than a > - * possible #PF at (NULL + a little) which has security implications in the > - * context of PV guests. > - */ > -#define INVALID_PERCPU_AREA (0x8000000000000000UL - (unsigned long)__per_cpu_start) > -#define PERCPU_ORDER get_order_from_bytes(__per_cpu_data_end - __per_cpu_start) > - > -void __init percpu_init_areas(void) > -{ > - unsigned int cpu; > - > - for ( cpu = 1; cpu < NR_CPUS; cpu++ ) > - __per_cpu_offset[cpu] = INVALID_PERCPU_AREA; > -} > +#include <xen/percpu.h> > +#include <xen/smp.h> > > -static int init_percpu_area(unsigned int cpu) > +int arch_percpu_area_init_status(void) I understand that Arm and x86 are returning a different value today. But now that we are provising a common implementation, I think we need to explain the difference. This is probably a question for the x86 folks. > { > - char *p; > - > - if ( __per_cpu_offset[cpu] != INVALID_PERCPU_AREA ) > - return 0; > - > - if ( (p = alloc_xenheap_pages(PERCPU_ORDER, 0)) == NULL ) > - return -ENOMEM; > - > - memset(p, 0, __per_cpu_data_end - __per_cpu_start); > - __per_cpu_offset[cpu] = p - __per_cpu_start; > - > return 0; > } > > -struct free_info { > - unsigned int cpu; > - struct rcu_head rcu; > -}; > -static DEFINE_PER_CPU(struct free_info, free_info); > - > -static void cf_check _free_percpu_area(struct rcu_head *head) > -{ > - struct free_info *info = container_of(head, struct free_info, rcu); > - unsigned int cpu = info->cpu; > - char *p = __per_cpu_start + __per_cpu_offset[cpu]; > - > - free_xenheap_pages(p, PERCPU_ORDER); > - __per_cpu_offset[cpu] = INVALID_PERCPU_AREA; > -} > - > -static void free_percpu_area(unsigned int cpu) > -{ > - struct free_info *info = &per_cpu(free_info, cpu); > - > - info->cpu = cpu; > - call_rcu(&info->rcu, _free_percpu_area); > -} > - > -static int cf_check cpu_percpu_callback( > - struct notifier_block *nfb, unsigned long action, void *hcpu) > +int arch_cpu_percpu_callback(struct notifier_block *nfb, > + unsigned long action, void *hcpu) > { > unsigned int cpu = (unsigned long)hcpu; > - int rc = 0; > > switch ( action ) > { > - case CPU_UP_PREPARE: > - rc = init_percpu_area(cpu); > - break; > case CPU_UP_CANCELED: > case CPU_DEAD: > case CPU_RESUME_FAILED: > @@ -86,27 +26,5 @@ static int cf_check cpu_percpu_callback( > break; > } > > - return notifier_from_errno(rc); > -} > - > -static struct notifier_block cpu_percpu_nfb = { > - .notifier_call = cpu_percpu_callback, > - .priority = 100 /* highest priority */ > -}; > - > -static int __init cf_check percpu_presmp_init(void) > -{ > - register_cpu_notifier(&cpu_percpu_nfb); > - > return 0; > } > -presmp_initcall(percpu_presmp_init); > - > -/* > - * Local variables: > - * mode: C > - * c-file-style: "BSD" > - * c-basic-offset: 4 > - * indent-tabs-mode: nil > - * End: > - */ This needs to stay. > diff --git a/xen/common/Makefile b/xen/common/Makefile > index fc52e0857d..f90bb00d23 100644 > --- a/xen/common/Makefile > +++ b/xen/common/Makefile > @@ -31,6 +31,7 @@ obj-y += notifier.o > obj-$(CONFIG_NUMA) += numa.o > obj-y += page_alloc.o > obj-y += pdx.o > +obj-y += percpu.o > obj-$(CONFIG_PERF_COUNTERS) += perfc.o > obj-bin-$(CONFIG_HAS_PMAP) += pmap.init.o > obj-y += preempt.o > diff --git a/xen/common/percpu.c b/xen/common/percpu.c > new file mode 100644 > index 0000000000..3837ef5714 > --- /dev/null > +++ b/xen/common/percpu.c > @@ -0,0 +1,127 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +#include <xen/percpu.h> > +#include <xen/cpu.h> > +#include <xen/init.h> > +#include <xen/mm.h> > +#include <xen/rcupdate.h> > + > +unsigned long __per_cpu_offset[NR_CPUS]; > + > +#ifndef INVALID_PERCPU_AREA > +#define INVALID_PERCPU_AREA (-(long)__per_cpu_start) > +#endif > + > +#ifndef ARCH_PERCPU_AREA_CHECK > +inline int arch_percpu_area_init_status(void) This wants to be static. No need for inline as the compiler is free to ignore it... > +{ > + return -EBUSY; See my question above about the difference between x86 and the rest. > +} > +#endif> + > +#ifndef ARCH_CPU_PERCPU_CALLBACK > +inline int arch_cpu_percpu_callback(struct notifier_block *nfb, > + unsigned long action, void *hcpu) I am not entirely sure we should introduce arch_cpu_percpu_callback. It seems there are some redundant work. Looking at the x86 implementation the differences are: * The additional checks * Extra actions (e.g CPU_RESUME_FAILED/CPU_REMOVE). I think the extra actions also make sense for other architectures. For the additional checks, the parking feature is implemented in common/*. So is there any way we could avoid introduce arch_cpu_percpu_callback()? > +{ > + unsigned int cpu = (unsigned long)hcpu; > + > + switch ( action ) > + { > + case CPU_UP_CANCELED: > + case CPU_DEAD: > + free_percpu_area(cpu); > + break; > + } > + > + return 0; > +} > +#endif > + > +#define PERCPU_ORDER get_order_from_bytes(__per_cpu_data_end - __per_cpu_start) > + > +void __init percpu_init_areas(void) > +{ > + unsigned int cpu; > + > + for ( cpu = 1; cpu < NR_CPUS; cpu++ ) > + __per_cpu_offset[cpu] = INVALID_PERCPU_AREA; > +} > + > +static int init_percpu_area(unsigned int cpu) > +{ > + char *p; > + > + if ( __per_cpu_offset[cpu] != INVALID_PERCPU_AREA ) > + return arch_percpu_area_init_status(); > + > + if ( (p = alloc_xenheap_pages(PERCPU_ORDER, 0)) == NULL ) > + return -ENOMEM; > + > + memset(p, 0, __per_cpu_data_end - __per_cpu_start); > + __per_cpu_offset[cpu] = p - __per_cpu_start; > + > + return 0; > +} > + > +struct free_info { > + unsigned int cpu; > + struct rcu_head rcu; > +}; > +static DEFINE_PER_CPU(struct free_info, free_info); > + > +static void cf_check _free_percpu_area(struct rcu_head *head) > +{ > + struct free_info *info = container_of(head, struct free_info, rcu); > + unsigned int cpu = info->cpu; > + char *p = __per_cpu_start + __per_cpu_offset[cpu]; > + > + free_xenheap_pages(p, PERCPU_ORDER); > + __per_cpu_offset[cpu] = INVALID_PERCPU_AREA; > +} > + > +void free_percpu_area(unsigned int cpu) > +{ > + struct free_info *info = &per_cpu(free_info, cpu); > + > + info->cpu = cpu; > + call_rcu(&info->rcu, _free_percpu_area); > +} > + > +static int cf_check cpu_percpu_callback( > + struct notifier_block *nfb, unsigned long action, void *hcpu) > +{ > + unsigned int cpu = (unsigned long)hcpu; > + int rc = 0; > + > + switch ( action ) > + { > + case CPU_UP_PREPARE: > + rc = init_percpu_area(cpu); > + break; > + default: > + arch_cpu_percpu_callback(nfb, action, hcpu); > + } > + > + return notifier_from_errno(rc); > +} > + > +static struct notifier_block cpu_percpu_nfb = { > + .notifier_call = cpu_percpu_callback, > + .priority = 100 /* highest priority */ > +}; > + > +static int __init cf_check percpu_presmp_init(void) > +{ > + register_cpu_notifier(&cpu_percpu_nfb); > + > + return 0; > +} > +presmp_initcall(percpu_presmp_init); > + > +/* > + * Local variables: > + * mode: C > + * c-file-style: "BSD" > + * c-basic-offset: 4 > + * indent-tabs-mode: nil > + * End: > + */ > diff --git a/xen/include/asm-generic/percpu.h b/xen/include/asm-generic/percpu.h > index 60af4f9ff9..c0373b1ad9 100644 > --- a/xen/include/asm-generic/percpu.h > +++ b/xen/include/asm-generic/percpu.h > @@ -12,6 +12,14 @@ extern const char __per_cpu_data_end[]; > extern unsigned long __per_cpu_offset[NR_CPUS]; > void percpu_init_areas(void); > > +void free_percpu_area(unsigned int cpu); > + > +int arch_percpu_area_init_status(void); > + > +struct notifier_block; > +int arch_cpu_percpu_callback(struct notifier_block *nfb, > + unsigned long action, void *hcpu); > + I believe this belong to include/xen/percpu.h. asm-generic is more for common definition that can be overriden by arch specific header. Cheers,
On 22/09/2024 10:23 am, Julien Grall wrote: > On 19/09/2024 17:59, Oleksii Kurochko wrote: >> diff --git a/xen/arch/x86/percpu.c b/xen/arch/x86/percpu.c >> index 3205eacea6..33bded8cac 100644 >> --- a/xen/arch/x86/percpu.c >> +++ b/xen/arch/x86/percpu.c >> @@ -1,79 +1,19 @@ >> -#include <xen/percpu.h> >> #include <xen/cpu.h> >> -#include <xen/init.h> >> -#include <xen/mm.h> >> -#include <xen/rcupdate.h> >> - >> -unsigned long __per_cpu_offset[NR_CPUS]; >> - >> -/* >> - * Force uses of per_cpu() with an invalid area to attempt to access >> the >> - * middle of the non-canonical address space resulting in a #GP, >> rather than a >> - * possible #PF at (NULL + a little) which has security implications >> in the >> - * context of PV guests. >> - */ >> -#define INVALID_PERCPU_AREA (0x8000000000000000UL - (unsigned >> long)__per_cpu_start) >> -#define PERCPU_ORDER get_order_from_bytes(__per_cpu_data_end - >> __per_cpu_start) >> - >> -void __init percpu_init_areas(void) >> -{ >> - unsigned int cpu; >> - >> - for ( cpu = 1; cpu < NR_CPUS; cpu++ ) >> - __per_cpu_offset[cpu] = INVALID_PERCPU_AREA; >> -} >> +#include <xen/percpu.h> >> +#include <xen/smp.h> >> -static int init_percpu_area(unsigned int cpu) >> +int arch_percpu_area_init_status(void) > > I understand that Arm and x86 are returning a different value today. > But now that we are provising a common implementation, I think we need > to explain the difference. This is probably a question for the x86 folks. The declarations for CPU Parking (variable, or compile time false) wants to be in the new common header, at which point arch_percpu_area_init_status() doesn't need to exist. That also makes it very clear that there's a difference in return value based on CPU Parking (and the comment beside the variable explains this is about not quite offlining CPUs), which is far clearer than some arch function. >> diff --git a/xen/common/percpu.c b/xen/common/percpu.c >> new file mode 100644 >> index 0000000000..3837ef5714 >> --- /dev/null >> +++ b/xen/common/percpu.c >> @@ -0,0 +1,127 @@ >> +/* SPDX-License-Identifier: GPL-2.0 */ >> +#include <xen/percpu.h> >> +#include <xen/cpu.h> >> +#include <xen/init.h> >> +#include <xen/mm.h> >> +#include <xen/rcupdate.h> >> + >> +unsigned long __per_cpu_offset[NR_CPUS]; unsigned long __per_cpu_offset[NR_CPUS] = { [0 ... NR_CPUS - 1] = INVALID_PERCPU_AREA, }; should work, removing the need for percpu_init_areas() and avoids a window during boot where all CPUs "share" a percpu area. ~Andrew
On Sun, 2024-09-22 at 10:43 +0200, Andrew Cooper wrote: > On 22/09/2024 10:23 am, Julien Grall wrote: > > On 19/09/2024 17:59, Oleksii Kurochko wrote: > > > diff --git a/xen/arch/x86/percpu.c b/xen/arch/x86/percpu.c > > > index 3205eacea6..33bded8cac 100644 > > > --- a/xen/arch/x86/percpu.c > > > +++ b/xen/arch/x86/percpu.c > > > @@ -1,79 +1,19 @@ > > > -#include <xen/percpu.h> > > > #include <xen/cpu.h> > > > -#include <xen/init.h> > > > -#include <xen/mm.h> > > > -#include <xen/rcupdate.h> > > > - > > > -unsigned long __per_cpu_offset[NR_CPUS]; > > > - > > > -/* > > > - * Force uses of per_cpu() with an invalid area to attempt to > > > access > > > the > > > - * middle of the non-canonical address space resulting in a #GP, > > > rather than a > > > - * possible #PF at (NULL + a little) which has security > > > implications > > > in the > > > - * context of PV guests. > > > - */ > > > -#define INVALID_PERCPU_AREA (0x8000000000000000UL - (unsigned > > > long)__per_cpu_start) > > > -#define PERCPU_ORDER get_order_from_bytes(__per_cpu_data_end - > > > __per_cpu_start) > > > - > > > -void __init percpu_init_areas(void) > > > -{ > > > - unsigned int cpu; > > > - > > > - for ( cpu = 1; cpu < NR_CPUS; cpu++ ) > > > - __per_cpu_offset[cpu] = INVALID_PERCPU_AREA; > > > -} > > > +#include <xen/percpu.h> > > > +#include <xen/smp.h> > > > -static int init_percpu_area(unsigned int cpu) > > > +int arch_percpu_area_init_status(void) > > > > I understand that Arm and x86 are returning a different value > > today. > > But now that we are provising a common implementation, I think we > > need > > to explain the difference. This is probably a question for the x86 > > folks. > > The declarations for CPU Parking (variable, or compile time false) > wants > to be in the new common header, at which point > arch_percpu_area_init_status() doesn't need to exist. > > That also makes it very clear that there's a difference in return > value > based on CPU Parking (and the comment beside the variable explains > this > is about not quite offlining CPUs), which is far clearer than some > arch > function. Thanks for suggestion. It would be better, I had also concerns about arch_percpu_area_init_status but couldn't come up with better thing. Just to make sure I understand correctly—are you saying that I can use park_offline_cpus instead of arch_percpu_area_init_status()? diff --git a/xen/common/percpu.c b/xen/common/percpu.c index 3837ef5714..f997418586 100644 --- a/xen/common/percpu.c +++ b/xen/common/percpu.c @@ -51,7 +51,7 @@ static int init_percpu_area(unsigned int cpu) char *p; if ( __per_cpu_offset[cpu] != INVALID_PERCPU_AREA ) - return arch_percpu_area_init_status(); + return park_offline_cpus ? 0 : -EBUSY; if ( (p = alloc_xenheap_pages(PERCPU_ORDER, 0)) == NULL ) return -ENOMEM; ~ Oleksii
Hi Julien, On Sun, 2024-09-22 at 10:23 +0200, Julien Grall wrote: > > +} > > +#endif> + > > +#ifndef ARCH_CPU_PERCPU_CALLBACK > > +inline int arch_cpu_percpu_callback(struct notifier_block *nfb, > > + unsigned long action, void > > *hcpu) > > I am not entirely sure we should introduce arch_cpu_percpu_callback. > It > seems there are some redundant work. Looking at the x86 > implementation > the differences are: > * The additional checks > * Extra actions (e.g CPU_RESUME_FAILED/CPU_REMOVE). > > I think the extra actions also make sense for other architectures. > For > the additional checks, the parking feature is implemented in > common/*. > > So is there any way we could avoid introduce > arch_cpu_percpu_callback()? Initially, I did not want to introduce arch_cpu_percpu_callback(), and if it were only the park_offline_cpus check, it would be safe enough since other architectures would continue to function as before ( and CPU parking is a common feature so it is to be expected to work on different architectures ), with park_offline_cpus = false for them. I then decided to investigate why the check system_state != SYS_STATE_suspend is necessary, and according to the commit message: ``` xen: don't free percpu areas during suspend Instead of freeing percpu areas during suspend and allocating them again when resuming keep them. Only free an area in case a cpu didn't come up again when resuming. It should be noted that there is a potential change in behaviour as the percpu areas are no longer zeroed out during suspend/resume. While I have checked the called cpu notifier hooks to cope with that there might be some well hidden dependency on the previous behaviour. OTOH a component not registering itself for cpu down/up and expecting to see a zeroed percpu variable after suspend/resume is kind of broken already. And the opposite case, where a component is not registered to be called for cpu down/up and is not expecting a percpu variable suddenly to be zero due to suspend/resume is much more probable, especially as the suspend/resume functionality seems not to be tested that often. ``` It is mentioned that "there is a potential change in behavior as the percpu areas are no longer zeroed out during suspend/resume." I was unsure if this change might break something for Arm. I can attempt to move everything to the common percpu.c and at least verify that there are no issues in CI. If that suffices to confirm that everything is okay, I can create a new patch series version. Does this make sense? ~ Oleksii
On 23/09/2024 10:26 am, oleksii.kurochko@gmail.com wrote: > On Sun, 2024-09-22 at 10:43 +0200, Andrew Cooper wrote: >> On 22/09/2024 10:23 am, Julien Grall wrote: >>> On 19/09/2024 17:59, Oleksii Kurochko wrote: >>>> diff --git a/xen/arch/x86/percpu.c b/xen/arch/x86/percpu.c >>>> index 3205eacea6..33bded8cac 100644 >>>> --- a/xen/arch/x86/percpu.c >>>> +++ b/xen/arch/x86/percpu.c >>>> @@ -1,79 +1,19 @@ >>>> -#include <xen/percpu.h> >>>> #include <xen/cpu.h> >>>> -#include <xen/init.h> >>>> -#include <xen/mm.h> >>>> -#include <xen/rcupdate.h> >>>> - >>>> -unsigned long __per_cpu_offset[NR_CPUS]; >>>> - >>>> -/* >>>> - * Force uses of per_cpu() with an invalid area to attempt to >>>> access >>>> the >>>> - * middle of the non-canonical address space resulting in a #GP, >>>> rather than a >>>> - * possible #PF at (NULL + a little) which has security >>>> implications >>>> in the >>>> - * context of PV guests. >>>> - */ >>>> -#define INVALID_PERCPU_AREA (0x8000000000000000UL - (unsigned >>>> long)__per_cpu_start) >>>> -#define PERCPU_ORDER get_order_from_bytes(__per_cpu_data_end - >>>> __per_cpu_start) >>>> - >>>> -void __init percpu_init_areas(void) >>>> -{ >>>> - unsigned int cpu; >>>> - >>>> - for ( cpu = 1; cpu < NR_CPUS; cpu++ ) >>>> - __per_cpu_offset[cpu] = INVALID_PERCPU_AREA; >>>> -} >>>> +#include <xen/percpu.h> >>>> +#include <xen/smp.h> >>>> -static int init_percpu_area(unsigned int cpu) >>>> +int arch_percpu_area_init_status(void) >>> I understand that Arm and x86 are returning a different value >>> today. >>> But now that we are provising a common implementation, I think we >>> need >>> to explain the difference. This is probably a question for the x86 >>> folks. >> The declarations for CPU Parking (variable, or compile time false) >> wants >> to be in the new common header, at which point >> arch_percpu_area_init_status() doesn't need to exist. >> >> That also makes it very clear that there's a difference in return >> value >> based on CPU Parking (and the comment beside the variable explains >> this >> is about not quite offlining CPUs), which is far clearer than some >> arch >> function. > Thanks for suggestion. It would be better, I had also concerns about > arch_percpu_area_init_status but couldn't come up with better thing. > > Just to make sure I understand correctly—are you saying that I can use > park_offline_cpus instead of arch_percpu_area_init_status()? > diff --git a/xen/common/percpu.c b/xen/common/percpu.c > index 3837ef5714..f997418586 100644 > --- a/xen/common/percpu.c > +++ b/xen/common/percpu.c > @@ -51,7 +51,7 @@ static int init_percpu_area(unsigned int cpu) > char *p; > > if ( __per_cpu_offset[cpu] != INVALID_PERCPU_AREA ) > - return arch_percpu_area_init_status(); > + return park_offline_cpus ? 0 : -EBUSY; > > if ( (p = alloc_xenheap_pages(PERCPU_ORDER, 0)) == NULL ) > return -ENOMEM; Yes, that's exactly what I had in mind. ~Andrew
On Sun, 2024-09-22 at 10:43 +0200, Andrew Cooper wrote: > > > diff --git a/xen/common/percpu.c b/xen/common/percpu.c > > > new file mode 100644 > > > index 0000000000..3837ef5714 > > > --- /dev/null > > > +++ b/xen/common/percpu.c > > > @@ -0,0 +1,127 @@ > > > +/* SPDX-License-Identifier: GPL-2.0 */ > > > +#include <xen/percpu.h> > > > +#include <xen/cpu.h> > > > +#include <xen/init.h> > > > +#include <xen/mm.h> > > > +#include <xen/rcupdate.h> > > > + > > > +unsigned long __per_cpu_offset[NR_CPUS]; > > unsigned long __per_cpu_offset[NR_CPUS] = { > [0 ... NR_CPUS - 1] = INVALID_PERCPU_AREA, > }; > > should work, removing the need for percpu_init_areas() and avoids a > window during boot where all CPUs "share" a percpu area. If to define in this way, it will compilation error: ./arch/x86/include/asm/percpu.h:14:29: error: initializer element is not constant 14 | #define INVALID_PERCPU_AREA (0x8000000000000000UL - (unsigned long)__per_cpu_start) I think it is the reason why separate function was introduced. ~ Oleksii
diff --git a/xen/arch/arm/Makefile b/xen/arch/arm/Makefile index 7792bff597..e4ad1ce851 100644 --- a/xen/arch/arm/Makefile +++ b/xen/arch/arm/Makefile @@ -39,7 +39,6 @@ obj-$(CONFIG_MEM_ACCESS) += mem_access.o obj-y += mm.o obj-y += monitor.o obj-y += p2m.o -obj-y += percpu.o obj-y += platform.o obj-y += platform_hypercall.o obj-y += physdev.o diff --git a/xen/arch/arm/percpu.c b/xen/arch/arm/percpu.c deleted file mode 100644 index 87fe960330..0000000000 --- a/xen/arch/arm/percpu.c +++ /dev/null @@ -1,85 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include <xen/percpu.h> -#include <xen/cpu.h> -#include <xen/init.h> -#include <xen/mm.h> -#include <xen/rcupdate.h> - -unsigned long __per_cpu_offset[NR_CPUS]; -#define INVALID_PERCPU_AREA (-(long)__per_cpu_start) -#define PERCPU_ORDER (get_order_from_bytes(__per_cpu_data_end-__per_cpu_start)) - -void __init percpu_init_areas(void) -{ - unsigned int cpu; - for ( cpu = 1; cpu < NR_CPUS; cpu++ ) - __per_cpu_offset[cpu] = INVALID_PERCPU_AREA; -} - -static int init_percpu_area(unsigned int cpu) -{ - char *p; - if ( __per_cpu_offset[cpu] != INVALID_PERCPU_AREA ) - return -EBUSY; - if ( (p = alloc_xenheap_pages(PERCPU_ORDER, 0)) == NULL ) - return -ENOMEM; - memset(p, 0, __per_cpu_data_end - __per_cpu_start); - __per_cpu_offset[cpu] = p - __per_cpu_start; - return 0; -} - -struct free_info { - unsigned int cpu; - struct rcu_head rcu; -}; -static DEFINE_PER_CPU(struct free_info, free_info); - -static void _free_percpu_area(struct rcu_head *head) -{ - struct free_info *info = container_of(head, struct free_info, rcu); - unsigned int cpu = info->cpu; - char *p = __per_cpu_start + __per_cpu_offset[cpu]; - free_xenheap_pages(p, PERCPU_ORDER); - __per_cpu_offset[cpu] = INVALID_PERCPU_AREA; -} - -static void free_percpu_area(unsigned int cpu) -{ - struct free_info *info = &per_cpu(free_info, cpu); - info->cpu = cpu; - call_rcu(&info->rcu, _free_percpu_area); -} - -static int cpu_percpu_callback( - struct notifier_block *nfb, unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - int rc = 0; - - switch ( action ) - { - case CPU_UP_PREPARE: - rc = init_percpu_area(cpu); - break; - case CPU_UP_CANCELED: - case CPU_DEAD: - free_percpu_area(cpu); - break; - default: - break; - } - - return notifier_from_errno(rc); -} - -static struct notifier_block cpu_percpu_nfb = { - .notifier_call = cpu_percpu_callback, - .priority = 100 /* highest priority */ -}; - -static int __init percpu_presmp_init(void) -{ - register_cpu_notifier(&cpu_percpu_nfb); - return 0; -} -presmp_initcall(percpu_presmp_init); diff --git a/xen/arch/ppc/stubs.c b/xen/arch/ppc/stubs.c index bdb5f8c66d..fff82f5cf3 100644 --- a/xen/arch/ppc/stubs.c +++ b/xen/arch/ppc/stubs.c @@ -141,7 +141,6 @@ void smp_send_state_dump(unsigned int cpu) /* domain.c */ DEFINE_PER_CPU(struct vcpu *, curr_vcpu); -unsigned long __per_cpu_offset[NR_CPUS]; void context_switch(struct vcpu *prev, struct vcpu *next) { diff --git a/xen/arch/riscv/stubs.c b/xen/arch/riscv/stubs.c index 2aa245f272..5951b0ce91 100644 --- a/xen/arch/riscv/stubs.c +++ b/xen/arch/riscv/stubs.c @@ -133,7 +133,6 @@ void smp_send_state_dump(unsigned int cpu) /* domain.c */ DEFINE_PER_CPU(struct vcpu *, curr_vcpu); -unsigned long __per_cpu_offset[NR_CPUS]; void context_switch(struct vcpu *prev, struct vcpu *next) { diff --git a/xen/arch/x86/include/asm/Makefile b/xen/arch/x86/include/asm/Makefile index daab34ff0a..2c27787d31 100644 --- a/xen/arch/x86/include/asm/Makefile +++ b/xen/arch/x86/include/asm/Makefile @@ -1,3 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only generic-y += div64.h -generic-y += percpu.h diff --git a/xen/arch/x86/include/asm/percpu.h b/xen/arch/x86/include/asm/percpu.h new file mode 100644 index 0000000000..0f9efba27a --- /dev/null +++ b/xen/arch/x86/include/asm/percpu.h @@ -0,0 +1,18 @@ +#ifndef __X86_PERCPU_H__ +#define __X86_PERCPU_H__ + +#include <asm-generic/percpu.h> + +/* + * Force uses of per_cpu() with an invalid area to attempt to access the + * middle of the non-canonical address space resulting in a #GP, rather than a + * possible #PF at (NULL + a little) which has security implications in the + * context of PV guests. + */ +#define INVALID_PERCPU_AREA (0x8000000000000000UL - (unsigned long)__per_cpu_start) + +#define ARCH_PERCPU_AREA_CHECK + +#define ARCH_CPU_PERCPU_CALLBACK + +#endif /* __X86_PERCPU_H__ */ diff --git a/xen/arch/x86/percpu.c b/xen/arch/x86/percpu.c index 3205eacea6..33bded8cac 100644 --- a/xen/arch/x86/percpu.c +++ b/xen/arch/x86/percpu.c @@ -1,79 +1,19 @@ -#include <xen/percpu.h> #include <xen/cpu.h> -#include <xen/init.h> -#include <xen/mm.h> -#include <xen/rcupdate.h> - -unsigned long __per_cpu_offset[NR_CPUS]; - -/* - * Force uses of per_cpu() with an invalid area to attempt to access the - * middle of the non-canonical address space resulting in a #GP, rather than a - * possible #PF at (NULL + a little) which has security implications in the - * context of PV guests. - */ -#define INVALID_PERCPU_AREA (0x8000000000000000UL - (unsigned long)__per_cpu_start) -#define PERCPU_ORDER get_order_from_bytes(__per_cpu_data_end - __per_cpu_start) - -void __init percpu_init_areas(void) -{ - unsigned int cpu; - - for ( cpu = 1; cpu < NR_CPUS; cpu++ ) - __per_cpu_offset[cpu] = INVALID_PERCPU_AREA; -} +#include <xen/percpu.h> +#include <xen/smp.h> -static int init_percpu_area(unsigned int cpu) +int arch_percpu_area_init_status(void) { - char *p; - - if ( __per_cpu_offset[cpu] != INVALID_PERCPU_AREA ) - return 0; - - if ( (p = alloc_xenheap_pages(PERCPU_ORDER, 0)) == NULL ) - return -ENOMEM; - - memset(p, 0, __per_cpu_data_end - __per_cpu_start); - __per_cpu_offset[cpu] = p - __per_cpu_start; - return 0; } -struct free_info { - unsigned int cpu; - struct rcu_head rcu; -}; -static DEFINE_PER_CPU(struct free_info, free_info); - -static void cf_check _free_percpu_area(struct rcu_head *head) -{ - struct free_info *info = container_of(head, struct free_info, rcu); - unsigned int cpu = info->cpu; - char *p = __per_cpu_start + __per_cpu_offset[cpu]; - - free_xenheap_pages(p, PERCPU_ORDER); - __per_cpu_offset[cpu] = INVALID_PERCPU_AREA; -} - -static void free_percpu_area(unsigned int cpu) -{ - struct free_info *info = &per_cpu(free_info, cpu); - - info->cpu = cpu; - call_rcu(&info->rcu, _free_percpu_area); -} - -static int cf_check cpu_percpu_callback( - struct notifier_block *nfb, unsigned long action, void *hcpu) +int arch_cpu_percpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; - int rc = 0; switch ( action ) { - case CPU_UP_PREPARE: - rc = init_percpu_area(cpu); - break; case CPU_UP_CANCELED: case CPU_DEAD: case CPU_RESUME_FAILED: @@ -86,27 +26,5 @@ static int cf_check cpu_percpu_callback( break; } - return notifier_from_errno(rc); -} - -static struct notifier_block cpu_percpu_nfb = { - .notifier_call = cpu_percpu_callback, - .priority = 100 /* highest priority */ -}; - -static int __init cf_check percpu_presmp_init(void) -{ - register_cpu_notifier(&cpu_percpu_nfb); - return 0; } -presmp_initcall(percpu_presmp_init); - -/* - * Local variables: - * mode: C - * c-file-style: "BSD" - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - */ diff --git a/xen/common/Makefile b/xen/common/Makefile index fc52e0857d..f90bb00d23 100644 --- a/xen/common/Makefile +++ b/xen/common/Makefile @@ -31,6 +31,7 @@ obj-y += notifier.o obj-$(CONFIG_NUMA) += numa.o obj-y += page_alloc.o obj-y += pdx.o +obj-y += percpu.o obj-$(CONFIG_PERF_COUNTERS) += perfc.o obj-bin-$(CONFIG_HAS_PMAP) += pmap.init.o obj-y += preempt.o diff --git a/xen/common/percpu.c b/xen/common/percpu.c new file mode 100644 index 0000000000..3837ef5714 --- /dev/null +++ b/xen/common/percpu.c @@ -0,0 +1,127 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <xen/percpu.h> +#include <xen/cpu.h> +#include <xen/init.h> +#include <xen/mm.h> +#include <xen/rcupdate.h> + +unsigned long __per_cpu_offset[NR_CPUS]; + +#ifndef INVALID_PERCPU_AREA +#define INVALID_PERCPU_AREA (-(long)__per_cpu_start) +#endif + +#ifndef ARCH_PERCPU_AREA_CHECK +inline int arch_percpu_area_init_status(void) +{ + return -EBUSY; +} +#endif + +#ifndef ARCH_CPU_PERCPU_CALLBACK +inline int arch_cpu_percpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + + switch ( action ) + { + case CPU_UP_CANCELED: + case CPU_DEAD: + free_percpu_area(cpu); + break; + } + + return 0; +} +#endif + +#define PERCPU_ORDER get_order_from_bytes(__per_cpu_data_end - __per_cpu_start) + +void __init percpu_init_areas(void) +{ + unsigned int cpu; + + for ( cpu = 1; cpu < NR_CPUS; cpu++ ) + __per_cpu_offset[cpu] = INVALID_PERCPU_AREA; +} + +static int init_percpu_area(unsigned int cpu) +{ + char *p; + + if ( __per_cpu_offset[cpu] != INVALID_PERCPU_AREA ) + return arch_percpu_area_init_status(); + + if ( (p = alloc_xenheap_pages(PERCPU_ORDER, 0)) == NULL ) + return -ENOMEM; + + memset(p, 0, __per_cpu_data_end - __per_cpu_start); + __per_cpu_offset[cpu] = p - __per_cpu_start; + + return 0; +} + +struct free_info { + unsigned int cpu; + struct rcu_head rcu; +}; +static DEFINE_PER_CPU(struct free_info, free_info); + +static void cf_check _free_percpu_area(struct rcu_head *head) +{ + struct free_info *info = container_of(head, struct free_info, rcu); + unsigned int cpu = info->cpu; + char *p = __per_cpu_start + __per_cpu_offset[cpu]; + + free_xenheap_pages(p, PERCPU_ORDER); + __per_cpu_offset[cpu] = INVALID_PERCPU_AREA; +} + +void free_percpu_area(unsigned int cpu) +{ + struct free_info *info = &per_cpu(free_info, cpu); + + info->cpu = cpu; + call_rcu(&info->rcu, _free_percpu_area); +} + +static int cf_check cpu_percpu_callback( + struct notifier_block *nfb, unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + int rc = 0; + + switch ( action ) + { + case CPU_UP_PREPARE: + rc = init_percpu_area(cpu); + break; + default: + arch_cpu_percpu_callback(nfb, action, hcpu); + } + + return notifier_from_errno(rc); +} + +static struct notifier_block cpu_percpu_nfb = { + .notifier_call = cpu_percpu_callback, + .priority = 100 /* highest priority */ +}; + +static int __init cf_check percpu_presmp_init(void) +{ + register_cpu_notifier(&cpu_percpu_nfb); + + return 0; +} +presmp_initcall(percpu_presmp_init); + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/include/asm-generic/percpu.h b/xen/include/asm-generic/percpu.h index 60af4f9ff9..c0373b1ad9 100644 --- a/xen/include/asm-generic/percpu.h +++ b/xen/include/asm-generic/percpu.h @@ -12,6 +12,14 @@ extern const char __per_cpu_data_end[]; extern unsigned long __per_cpu_offset[NR_CPUS]; void percpu_init_areas(void); +void free_percpu_area(unsigned int cpu); + +int arch_percpu_area_init_status(void); + +struct notifier_block; +int arch_cpu_percpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu); + #define per_cpu(var, cpu) \ (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]))
Centralize per-cpu area management to reduce code duplication and enhance maintainability across architectures. The per-cpu area management code, which is largely common among architectures, is moved to a shared implementation in xen/common/percpu.c. This change includes: * Add arch_percpu_area_init_status() and arch_cpu_percpu_callback() functions to address architecture-specific variations. * Remove percpu.c from the Arm architecture. * For x86, percpu.c now only defines arch_percpu_area_init_status() and arch_cpu_percpu_callback(), and INVALID_PERCPU_AREA. * Drop the declaration of __per_cpu_offset[] from stubs.c in PPC and RISC-V to facilitate the build of the common per-cpu code. No functional changes. Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com> --- xen/arch/arm/Makefile | 1 - xen/arch/arm/percpu.c | 85 -------------------- xen/arch/ppc/stubs.c | 1 - xen/arch/riscv/stubs.c | 1 - xen/arch/x86/include/asm/Makefile | 1 - xen/arch/x86/include/asm/percpu.h | 18 +++++ xen/arch/x86/percpu.c | 92 ++-------------------- xen/common/Makefile | 1 + xen/common/percpu.c | 127 ++++++++++++++++++++++++++++++ xen/include/asm-generic/percpu.h | 8 ++ 10 files changed, 159 insertions(+), 176 deletions(-) delete mode 100644 xen/arch/arm/percpu.c create mode 100644 xen/arch/x86/include/asm/percpu.h create mode 100644 xen/common/percpu.c