Message ID | 20220614094157.95631-1-julien@xen.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | xen/arm: irq: Initialize the per-CPU IRQs while preparing the CPU | expand |
Hi Julien, On 14.06.2022 11:41, Julien Grall wrote: > From: Julien Grall <jgrall@amazon.com> > > Commit 5047cd1d5dea "xen/common: Use enhanced ASSERT_ALLOC_CONTEXT in > xmalloc()" extended the checks in _xmalloc() to catch any use of the > helpers from context with interrupts disabled. > > Unfortunately, the rule is not followed when initializing the per-CPU > IRQs: > > (XEN) Xen call trace: > (XEN) [<002389f4>] _xmalloc+0xfc/0x314 (PC) > (XEN) [<00000000>] 00000000 (LR) > (XEN) [<0021a7c4>] init_one_irq_desc+0x48/0xd0 > (XEN) [<002807a8>] irq.c#init_local_irq_data+0x48/0xa4 > (XEN) [<00280834>] init_secondary_IRQ+0x10/0x2c > (XEN) [<00288fa4>] start_secondary+0x194/0x274 > (XEN) [<40010170>] 40010170 > (XEN) > (XEN) > (XEN) **************************************** > (XEN) Panic on CPU 2: > (XEN) Assertion '!in_irq() && (local_irq_is_enabled() || num_online_cpus() <= 1)' failed at common/xmalloc_tlsf.c:601 > (XEN) **************************************** > > This is happening because zalloc_cpumask_var() may allocate memory > if NR_CPUS is > 2 * sizeof(unsigned long). > > Avoid the problem by allocate the per-CPU IRQs while preparing the > CPU. Shouldn't this be" by initializing the per-CPU IRQs while ..." ? Either way this text is the same like in the previous patch so I think this is not correct. Other than that: Reviewed-by: Michal Orzel <michal.orzel@arm.com>
On 14/06/2022 12:05, Michal Orzel wrote: > Hi Julien, > > On 14.06.2022 11:41, Julien Grall wrote: >> From: Julien Grall <jgrall@amazon.com> >> >> Commit 5047cd1d5dea "xen/common: Use enhanced ASSERT_ALLOC_CONTEXT in >> xmalloc()" extended the checks in _xmalloc() to catch any use of the >> helpers from context with interrupts disabled. >> >> Unfortunately, the rule is not followed when initializing the per-CPU >> IRQs: >> >> (XEN) Xen call trace: >> (XEN) [<002389f4>] _xmalloc+0xfc/0x314 (PC) >> (XEN) [<00000000>] 00000000 (LR) >> (XEN) [<0021a7c4>] init_one_irq_desc+0x48/0xd0 >> (XEN) [<002807a8>] irq.c#init_local_irq_data+0x48/0xa4 >> (XEN) [<00280834>] init_secondary_IRQ+0x10/0x2c >> (XEN) [<00288fa4>] start_secondary+0x194/0x274 >> (XEN) [<40010170>] 40010170 >> (XEN) >> (XEN) >> (XEN) **************************************** >> (XEN) Panic on CPU 2: >> (XEN) Assertion '!in_irq() && (local_irq_is_enabled() || num_online_cpus() <= 1)' failed at common/xmalloc_tlsf.c:601 >> (XEN) **************************************** >> >> This is happening because zalloc_cpumask_var() may allocate memory >> if NR_CPUS is > 2 * sizeof(unsigned long). >> >> Avoid the problem by allocate the per-CPU IRQs while preparing the >> CPU. > Shouldn't this be" by initializing the per-CPU IRQs while ..." ? I am fine with using "initializing" rather than "allocating". > Either way this text is the same like in the previous patch so I think this is not correct. I can't quite parse this. > Other than that: > Reviewed-by: Michal Orzel <michal.orzel@arm.com> Thanks! Cheers,
On Tue, 14 Jun 2022, Julien Grall wrote: > From: Julien Grall <jgrall@amazon.com> > > Commit 5047cd1d5dea "xen/common: Use enhanced ASSERT_ALLOC_CONTEXT in > xmalloc()" extended the checks in _xmalloc() to catch any use of the > helpers from context with interrupts disabled. > > Unfortunately, the rule is not followed when initializing the per-CPU > IRQs: > > (XEN) Xen call trace: > (XEN) [<002389f4>] _xmalloc+0xfc/0x314 (PC) > (XEN) [<00000000>] 00000000 (LR) > (XEN) [<0021a7c4>] init_one_irq_desc+0x48/0xd0 > (XEN) [<002807a8>] irq.c#init_local_irq_data+0x48/0xa4 > (XEN) [<00280834>] init_secondary_IRQ+0x10/0x2c > (XEN) [<00288fa4>] start_secondary+0x194/0x274 > (XEN) [<40010170>] 40010170 > (XEN) > (XEN) > (XEN) **************************************** > (XEN) Panic on CPU 2: > (XEN) Assertion '!in_irq() && (local_irq_is_enabled() || num_online_cpus() <= 1)' failed at common/xmalloc_tlsf.c:601 > (XEN) **************************************** > > This is happening because zalloc_cpumask_var() may allocate memory > if NR_CPUS is > 2 * sizeof(unsigned long). > > Avoid the problem by allocate the per-CPU IRQs while preparing the > CPU. > > This also has the benefit to remove a BUG_ON() in the secondary CPU > code. > > Signed-off-by: Julien Grall <jgrall@amazon.com> > --- > xen/arch/arm/include/asm/irq.h | 1 - > xen/arch/arm/irq.c | 35 +++++++++++++++++++++++++++------- > xen/arch/arm/smpboot.c | 2 -- > 3 files changed, 28 insertions(+), 10 deletions(-) > > diff --git a/xen/arch/arm/include/asm/irq.h b/xen/arch/arm/include/asm/irq.h > index e45d57459899..245f49dcbac5 100644 > --- a/xen/arch/arm/include/asm/irq.h > +++ b/xen/arch/arm/include/asm/irq.h > @@ -73,7 +73,6 @@ static inline bool is_lpi(unsigned int irq) > bool is_assignable_irq(unsigned int irq); > > void init_IRQ(void); > -void init_secondary_IRQ(void); > > int route_irq_to_guest(struct domain *d, unsigned int virq, > unsigned int irq, const char *devname); > diff --git a/xen/arch/arm/irq.c b/xen/arch/arm/irq.c > index b761d90c4063..56bdcb95335d 100644 > --- a/xen/arch/arm/irq.c > +++ b/xen/arch/arm/irq.c > @@ -17,6 +17,7 @@ > * GNU General Public License for more details. > */ > > +#include <xen/cpu.h> > #include <xen/lib.h> > #include <xen/spinlock.h> > #include <xen/irq.h> > @@ -100,7 +101,7 @@ static int __init init_irq_data(void) > return 0; > } > > -static int init_local_irq_data(void) > +static int init_local_irq_data(unsigned int cpu) > { > int irq; > > @@ -108,7 +109,7 @@ static int init_local_irq_data(void) > > for ( irq = 0; irq < NR_LOCAL_IRQS; irq++ ) > { > - struct irq_desc *desc = irq_to_desc(irq); > + struct irq_desc *desc = &per_cpu(local_irq_desc, cpu)[irq]; > int rc = init_one_irq_desc(desc); > > if ( rc ) > @@ -131,6 +132,29 @@ static int init_local_irq_data(void) > return 0; > } > > +static int cpu_callback(struct notifier_block *nfb, unsigned long action, > + void *hcpu) > +{ > + unsigned long cpu = (unsigned long)hcpu; unsigned int cpu ? The rest looks good > + int rc = 0; > + > + switch ( action ) > + { > + case CPU_UP_PREPARE: > + rc = init_local_irq_data(cpu); > + if ( rc ) > + printk(XENLOG_ERR "Unable to allocate local IRQ for CPU%lu\n", > + cpu); > + break; > + } > + > + return !rc ? NOTIFY_DONE : notifier_from_errno(rc); > +} > + > +static struct notifier_block cpu_nfb = { > + .notifier_call = cpu_callback, > +}; > + > void __init init_IRQ(void) > { > int irq; > @@ -140,13 +164,10 @@ void __init init_IRQ(void) > local_irqs_type[irq] = IRQ_TYPE_INVALID; > spin_unlock(&local_irqs_type_lock); > > - BUG_ON(init_local_irq_data() < 0); > + BUG_ON(init_local_irq_data(smp_processor_id()) < 0); > BUG_ON(init_irq_data() < 0); > -} > > -void init_secondary_IRQ(void) > -{ > - BUG_ON(init_local_irq_data() < 0); > + register_cpu_notifier(&cpu_nfb); > } > > static inline struct irq_guest *irq_get_guest_info(struct irq_desc *desc) > diff --git a/xen/arch/arm/smpboot.c b/xen/arch/arm/smpboot.c > index 9bb32a301a70..4888bcd78a5a 100644 > --- a/xen/arch/arm/smpboot.c > +++ b/xen/arch/arm/smpboot.c > @@ -359,8 +359,6 @@ void start_secondary(void) > > gic_init_secondary_cpu(); > > - init_secondary_IRQ(); > - > set_current(idle_vcpu[cpuid]); > > setup_cpu_sibling_map(cpuid); > -- > 2.32.0 >
Hi Stefano, On 15/06/2022 01:32, Stefano Stabellini wrote: > On Tue, 14 Jun 2022, Julien Grall wrote: >> From: Julien Grall <jgrall@amazon.com> >> >> Commit 5047cd1d5dea "xen/common: Use enhanced ASSERT_ALLOC_CONTEXT in >> xmalloc()" extended the checks in _xmalloc() to catch any use of the >> helpers from context with interrupts disabled. >> >> Unfortunately, the rule is not followed when initializing the per-CPU >> IRQs: >> >> (XEN) Xen call trace: >> (XEN) [<002389f4>] _xmalloc+0xfc/0x314 (PC) >> (XEN) [<00000000>] 00000000 (LR) >> (XEN) [<0021a7c4>] init_one_irq_desc+0x48/0xd0 >> (XEN) [<002807a8>] irq.c#init_local_irq_data+0x48/0xa4 >> (XEN) [<00280834>] init_secondary_IRQ+0x10/0x2c >> (XEN) [<00288fa4>] start_secondary+0x194/0x274 >> (XEN) [<40010170>] 40010170 >> (XEN) >> (XEN) >> (XEN) **************************************** >> (XEN) Panic on CPU 2: >> (XEN) Assertion '!in_irq() && (local_irq_is_enabled() || num_online_cpus() <= 1)' failed at common/xmalloc_tlsf.c:601 >> (XEN) **************************************** >> >> This is happening because zalloc_cpumask_var() may allocate memory >> if NR_CPUS is > 2 * sizeof(unsigned long). >> >> Avoid the problem by allocate the per-CPU IRQs while preparing the >> CPU. >> >> This also has the benefit to remove a BUG_ON() in the secondary CPU >> code. >> >> Signed-off-by: Julien Grall <jgrall@amazon.com> >> --- >> xen/arch/arm/include/asm/irq.h | 1 - >> xen/arch/arm/irq.c | 35 +++++++++++++++++++++++++++------- >> xen/arch/arm/smpboot.c | 2 -- >> 3 files changed, 28 insertions(+), 10 deletions(-) >> >> diff --git a/xen/arch/arm/include/asm/irq.h b/xen/arch/arm/include/asm/irq.h >> index e45d57459899..245f49dcbac5 100644 >> --- a/xen/arch/arm/include/asm/irq.h >> +++ b/xen/arch/arm/include/asm/irq.h >> @@ -73,7 +73,6 @@ static inline bool is_lpi(unsigned int irq) >> bool is_assignable_irq(unsigned int irq); >> >> void init_IRQ(void); >> -void init_secondary_IRQ(void); >> >> int route_irq_to_guest(struct domain *d, unsigned int virq, >> unsigned int irq, const char *devname); >> diff --git a/xen/arch/arm/irq.c b/xen/arch/arm/irq.c >> index b761d90c4063..56bdcb95335d 100644 >> --- a/xen/arch/arm/irq.c >> +++ b/xen/arch/arm/irq.c >> @@ -17,6 +17,7 @@ >> * GNU General Public License for more details. >> */ >> >> +#include <xen/cpu.h> >> #include <xen/lib.h> >> #include <xen/spinlock.h> >> #include <xen/irq.h> >> @@ -100,7 +101,7 @@ static int __init init_irq_data(void) >> return 0; >> } >> >> -static int init_local_irq_data(void) >> +static int init_local_irq_data(unsigned int cpu) >> { >> int irq; >> >> @@ -108,7 +109,7 @@ static int init_local_irq_data(void) >> >> for ( irq = 0; irq < NR_LOCAL_IRQS; irq++ ) >> { >> - struct irq_desc *desc = irq_to_desc(irq); >> + struct irq_desc *desc = &per_cpu(local_irq_desc, cpu)[irq]; >> int rc = init_one_irq_desc(desc); >> >> if ( rc ) >> @@ -131,6 +132,29 @@ static int init_local_irq_data(void) >> return 0; >> } >> >> +static int cpu_callback(struct notifier_block *nfb, unsigned long action, >> + void *hcpu) >> +{ >> + unsigned long cpu = (unsigned long)hcpu; > > unsigned int cpu ? Hmmm... We seem to have a mix in the code base. I am OK to switch to unsigned int. > > The rest looks good Can this be converted to an ack or review tag? Cheers,
On Wed, 22 Jun 2022, Julien Grall wrote: > On 15/06/2022 01:32, Stefano Stabellini wrote: > > On Tue, 14 Jun 2022, Julien Grall wrote: > > > From: Julien Grall <jgrall@amazon.com> > > > > > > Commit 5047cd1d5dea "xen/common: Use enhanced ASSERT_ALLOC_CONTEXT in > > > xmalloc()" extended the checks in _xmalloc() to catch any use of the > > > helpers from context with interrupts disabled. > > > > > > Unfortunately, the rule is not followed when initializing the per-CPU > > > IRQs: > > > > > > (XEN) Xen call trace: > > > (XEN) [<002389f4>] _xmalloc+0xfc/0x314 (PC) > > > (XEN) [<00000000>] 00000000 (LR) > > > (XEN) [<0021a7c4>] init_one_irq_desc+0x48/0xd0 > > > (XEN) [<002807a8>] irq.c#init_local_irq_data+0x48/0xa4 > > > (XEN) [<00280834>] init_secondary_IRQ+0x10/0x2c > > > (XEN) [<00288fa4>] start_secondary+0x194/0x274 > > > (XEN) [<40010170>] 40010170 > > > (XEN) > > > (XEN) > > > (XEN) **************************************** > > > (XEN) Panic on CPU 2: > > > (XEN) Assertion '!in_irq() && (local_irq_is_enabled() || num_online_cpus() > > > <= 1)' failed at common/xmalloc_tlsf.c:601 > > > (XEN) **************************************** > > > > > > This is happening because zalloc_cpumask_var() may allocate memory > > > if NR_CPUS is > 2 * sizeof(unsigned long). > > > > > > Avoid the problem by allocate the per-CPU IRQs while preparing the > > > CPU. > > > > > > This also has the benefit to remove a BUG_ON() in the secondary CPU > > > code. > > > > > > Signed-off-by: Julien Grall <jgrall@amazon.com> > > > --- > > > xen/arch/arm/include/asm/irq.h | 1 - > > > xen/arch/arm/irq.c | 35 +++++++++++++++++++++++++++------- > > > xen/arch/arm/smpboot.c | 2 -- > > > 3 files changed, 28 insertions(+), 10 deletions(-) > > > > > > diff --git a/xen/arch/arm/include/asm/irq.h > > > b/xen/arch/arm/include/asm/irq.h > > > index e45d57459899..245f49dcbac5 100644 > > > --- a/xen/arch/arm/include/asm/irq.h > > > +++ b/xen/arch/arm/include/asm/irq.h > > > @@ -73,7 +73,6 @@ static inline bool is_lpi(unsigned int irq) > > > bool is_assignable_irq(unsigned int irq); > > > void init_IRQ(void); > > > -void init_secondary_IRQ(void); > > > int route_irq_to_guest(struct domain *d, unsigned int virq, > > > unsigned int irq, const char *devname); > > > diff --git a/xen/arch/arm/irq.c b/xen/arch/arm/irq.c > > > index b761d90c4063..56bdcb95335d 100644 > > > --- a/xen/arch/arm/irq.c > > > +++ b/xen/arch/arm/irq.c > > > @@ -17,6 +17,7 @@ > > > * GNU General Public License for more details. > > > */ > > > +#include <xen/cpu.h> > > > #include <xen/lib.h> > > > #include <xen/spinlock.h> > > > #include <xen/irq.h> > > > @@ -100,7 +101,7 @@ static int __init init_irq_data(void) > > > return 0; > > > } > > > -static int init_local_irq_data(void) > > > +static int init_local_irq_data(unsigned int cpu) > > > { > > > int irq; > > > @@ -108,7 +109,7 @@ static int init_local_irq_data(void) > > > for ( irq = 0; irq < NR_LOCAL_IRQS; irq++ ) > > > { > > > - struct irq_desc *desc = irq_to_desc(irq); > > > + struct irq_desc *desc = &per_cpu(local_irq_desc, cpu)[irq]; > > > int rc = init_one_irq_desc(desc); > > > if ( rc ) > > > @@ -131,6 +132,29 @@ static int init_local_irq_data(void) > > > return 0; > > > } > > > +static int cpu_callback(struct notifier_block *nfb, unsigned long > > > action, > > > + void *hcpu) > > > +{ > > > + unsigned long cpu = (unsigned long)hcpu; > > > > unsigned int cpu ? > > Hmmm... We seem to have a mix in the code base. I am OK to switch to unsigned > int. > > > > > The rest looks good > Can this be converted to an ack or review tag? Yes, add my reviewed-by
Hi Julien, [OFFLIST] > On 14 Jun 2022, at 10:41, Julien Grall <julien@xen.org> wrote: > > From: Julien Grall <jgrall@amazon.com> > > Commit 5047cd1d5dea "xen/common: Use enhanced ASSERT_ALLOC_CONTEXT in > xmalloc()" extended the checks in _xmalloc() to catch any use of the > helpers from context with interrupts disabled. > > Unfortunately, the rule is not followed when initializing the per-CPU > IRQs: > > (XEN) Xen call trace: > (XEN) [<002389f4>] _xmalloc+0xfc/0x314 (PC) > (XEN) [<00000000>] 00000000 (LR) > (XEN) [<0021a7c4>] init_one_irq_desc+0x48/0xd0 > (XEN) [<002807a8>] irq.c#init_local_irq_data+0x48/0xa4 > (XEN) [<00280834>] init_secondary_IRQ+0x10/0x2c > (XEN) [<00288fa4>] start_secondary+0x194/0x274 > (XEN) [<40010170>] 40010170 > (XEN) > (XEN) > (XEN) **************************************** > (XEN) Panic on CPU 2: > (XEN) Assertion '!in_irq() && (local_irq_is_enabled() || num_online_cpus() <= 1)' failed at common/xmalloc_tlsf.c:601 > (XEN) **************************************** > > This is happening because zalloc_cpumask_var() may allocate memory > if NR_CPUS is > 2 * sizeof(unsigned long). > > Avoid the problem by allocate the per-CPU IRQs while preparing the > CPU. > > This also has the benefit to remove a BUG_ON() in the secondary CPU > code. > > Signed-off-by: Julien Grall <jgrall@amazon.com> I still have issues after applying this patch on qemu-arm32: (XEN) CPU0: Guest atomics will try 1 times before pausing the domain^M^M (XEN) Bringing up CPU1^M^M (XEN) CPU1: Guest atomics will try 1 times before pausing the domain^M^M (XEN) Assertion 'test_bit(_IRQ_DISABLED, &desc->status)' failed at arch/arm/gic.c:124^M^M (XEN) ----[ Xen-4.17-unstable arm32 debug=y Not tainted ]----^M^M (XEN) CPU: 1^M^M (XEN) PC: 0026f134 gic_route_irq_to_xen+0xa4/0xb0^M^M (XEN) CPSR: 400001da MODE:Hypervisor^M^M (XEN) R0: 00000120 R1: 000000a0 R2: 40002538 R3: 00000000^M^M (XEN) R4: 40004f00 R5: 000000a0 R6: 40002538 R7: 8000015a^M^M (XEN) R8: 00000000 R9: 40004f14 R10:3fe10000 R11:43fefeec R12:40002ff8^M^M (XEN) HYP: SP: 43fefed4 LR: 0026f0b8^M^M (XEN) ^M^M (XEN) VTCR_EL2: 00000000^M^M (XEN) VTTBR_EL2: 0000000000000000^M^M (XEN) ^M^M (XEN) SCTLR_EL2: 30cd187f^M^M (XEN) HCR_EL2: 00000038^M^M (XEN) TTBR0_EL2: 00000000bfffa000^M^M (XEN) ^M^M (XEN) ESR_EL2: 00000000^M^M (XEN) HPFAR_EL2: 00000000^M^M (XEN) HDFAR: 00000000^M^M (XEN) HIFAR: 00000000^M^M (XEN) ^M^M (XEN) Xen stack trace from sp=43fefed4:^M^M (XEN) 00000000 40004f00 00000000 40002538 8000015a 43feff0c 00272a4c 40002538^M^M (XEN) 002a47c4 00000019 00000000 0026ee28 40010000 43feff2c 00272b30 00309298^M^M (XEN) 00000001 0033b248 00000001 00000000 40010000 43feff3c 0026f7ac 00000000^M^M (XEN) 00201828 43feff54 0027ac3c bfffa000 00000000 00000000 00000001 00000000^M^M (XEN) 400100c0 00000000 00000000 00000000 00000000 00000000 00000000 00000000^M^M (XEN) 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000^M^M (XEN) 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000^M^M (XEN) 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000^M^M (XEN) 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000^M^M (XEN) 00000000 00000000 00000000^M^M (XEN) Xen call trace:^M^M (XEN) [<0026f134>] gic_route_irq_to_xen+0xa4/0xb0 (PC)^M^M (XEN) [<0026f0b8>] gic_route_irq_to_xen+0x28/0xb0 (LR)^M^M (XEN) [<00272a4c>] setup_irq+0x104/0x178^M^M (XEN) [<00272b30>] request_irq+0x70/0xb4^M^M (XEN) [<0026f7ac>] init_maintenance_interrupt+0x40/0x5c^M^M (XEN) [<0027ac3c>] start_secondary+0x1e8/0x270^M^M (XEN) [<400100c0>] 400100c0^M^M Just wanted to signal before you push this out. I will investigate deeper and come back to you. Cheers Bertrand
Sorry, this was suppose to be off list but nothing wrong in making everybody aware I guess :-) Bertrand > On 24 Jun 2022, at 10:31, Bertrand Marquis <Bertrand.Marquis@arm.com> wrote: > > Hi Julien, > > [OFFLIST] > >> On 14 Jun 2022, at 10:41, Julien Grall <julien@xen.org> wrote: >> >> From: Julien Grall <jgrall@amazon.com> >> >> Commit 5047cd1d5dea "xen/common: Use enhanced ASSERT_ALLOC_CONTEXT in >> xmalloc()" extended the checks in _xmalloc() to catch any use of the >> helpers from context with interrupts disabled. >> >> Unfortunately, the rule is not followed when initializing the per-CPU >> IRQs: >> >> (XEN) Xen call trace: >> (XEN) [<002389f4>] _xmalloc+0xfc/0x314 (PC) >> (XEN) [<00000000>] 00000000 (LR) >> (XEN) [<0021a7c4>] init_one_irq_desc+0x48/0xd0 >> (XEN) [<002807a8>] irq.c#init_local_irq_data+0x48/0xa4 >> (XEN) [<00280834>] init_secondary_IRQ+0x10/0x2c >> (XEN) [<00288fa4>] start_secondary+0x194/0x274 >> (XEN) [<40010170>] 40010170 >> (XEN) >> (XEN) >> (XEN) **************************************** >> (XEN) Panic on CPU 2: >> (XEN) Assertion '!in_irq() && (local_irq_is_enabled() || num_online_cpus() <= 1)' failed at common/xmalloc_tlsf.c:601 >> (XEN) **************************************** >> >> This is happening because zalloc_cpumask_var() may allocate memory >> if NR_CPUS is > 2 * sizeof(unsigned long). >> >> Avoid the problem by allocate the per-CPU IRQs while preparing the >> CPU. >> >> This also has the benefit to remove a BUG_ON() in the secondary CPU >> code. >> >> Signed-off-by: Julien Grall <jgrall@amazon.com> > > I still have issues after applying this patch on qemu-arm32: > > (XEN) CPU0: Guest atomics will try 1 times before pausing the domain^M^M > (XEN) Bringing up CPU1^M^M > (XEN) CPU1: Guest atomics will try 1 times before pausing the domain^M^M > (XEN) Assertion 'test_bit(_IRQ_DISABLED, &desc->status)' failed at arch/arm/gic.c:124^M^M > (XEN) ----[ Xen-4.17-unstable arm32 debug=y Not tainted ]----^M^M > (XEN) CPU: 1^M^M > (XEN) PC: 0026f134 gic_route_irq_to_xen+0xa4/0xb0^M^M > (XEN) CPSR: 400001da MODE:Hypervisor^M^M > (XEN) R0: 00000120 R1: 000000a0 R2: 40002538 R3: 00000000^M^M > (XEN) R4: 40004f00 R5: 000000a0 R6: 40002538 R7: 8000015a^M^M > (XEN) R8: 00000000 R9: 40004f14 R10:3fe10000 R11:43fefeec R12:40002ff8^M^M > (XEN) HYP: SP: 43fefed4 LR: 0026f0b8^M^M > (XEN) ^M^M > (XEN) VTCR_EL2: 00000000^M^M > (XEN) VTTBR_EL2: 0000000000000000^M^M > (XEN) ^M^M > (XEN) SCTLR_EL2: 30cd187f^M^M > (XEN) HCR_EL2: 00000038^M^M > (XEN) TTBR0_EL2: 00000000bfffa000^M^M > (XEN) ^M^M > (XEN) ESR_EL2: 00000000^M^M > (XEN) HPFAR_EL2: 00000000^M^M > (XEN) HDFAR: 00000000^M^M > (XEN) HIFAR: 00000000^M^M > (XEN) ^M^M > (XEN) Xen stack trace from sp=43fefed4:^M^M > (XEN) 00000000 40004f00 00000000 40002538 8000015a 43feff0c 00272a4c 40002538^M^M > (XEN) 002a47c4 00000019 00000000 0026ee28 40010000 43feff2c 00272b30 00309298^M^M > (XEN) 00000001 0033b248 00000001 00000000 40010000 43feff3c 0026f7ac 00000000^M^M > (XEN) 00201828 43feff54 0027ac3c bfffa000 00000000 00000000 00000001 00000000^M^M > (XEN) 400100c0 00000000 00000000 00000000 00000000 00000000 00000000 00000000^M^M > (XEN) 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000^M^M > (XEN) 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000^M^M > (XEN) 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000^M^M > (XEN) 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000^M^M > (XEN) 00000000 00000000 00000000^M^M > (XEN) Xen call trace:^M^M > (XEN) [<0026f134>] gic_route_irq_to_xen+0xa4/0xb0 (PC)^M^M > (XEN) [<0026f0b8>] gic_route_irq_to_xen+0x28/0xb0 (LR)^M^M > (XEN) [<00272a4c>] setup_irq+0x104/0x178^M^M > (XEN) [<00272b30>] request_irq+0x70/0xb4^M^M > (XEN) [<0026f7ac>] init_maintenance_interrupt+0x40/0x5c^M^M > (XEN) [<0027ac3c>] start_secondary+0x1e8/0x270^M^M > (XEN) [<400100c0>] 400100c0^M^M > > Just wanted to signal before you push this out. > > I will investigate deeper and come back to you. > > Cheers > Bertrand
Hi, > On 24 Jun 2022, at 10:31, Bertrand Marquis <Bertrand.Marquis@arm.com> wrote: > > Hi Julien, > > [OFFLIST] > >> On 14 Jun 2022, at 10:41, Julien Grall <julien@xen.org> wrote: >> >> From: Julien Grall <jgrall@amazon.com> >> >> Commit 5047cd1d5dea "xen/common: Use enhanced ASSERT_ALLOC_CONTEXT in >> xmalloc()" extended the checks in _xmalloc() to catch any use of the >> helpers from context with interrupts disabled. >> >> Unfortunately, the rule is not followed when initializing the per-CPU >> IRQs: >> >> (XEN) Xen call trace: >> (XEN) [<002389f4>] _xmalloc+0xfc/0x314 (PC) >> (XEN) [<00000000>] 00000000 (LR) >> (XEN) [<0021a7c4>] init_one_irq_desc+0x48/0xd0 >> (XEN) [<002807a8>] irq.c#init_local_irq_data+0x48/0xa4 >> (XEN) [<00280834>] init_secondary_IRQ+0x10/0x2c >> (XEN) [<00288fa4>] start_secondary+0x194/0x274 >> (XEN) [<40010170>] 40010170 >> (XEN) >> (XEN) >> (XEN) **************************************** >> (XEN) Panic on CPU 2: >> (XEN) Assertion '!in_irq() && (local_irq_is_enabled() || num_online_cpus() <= 1)' failed at common/xmalloc_tlsf.c:601 >> (XEN) **************************************** >> >> This is happening because zalloc_cpumask_var() may allocate memory >> if NR_CPUS is > 2 * sizeof(unsigned long). >> >> Avoid the problem by allocate the per-CPU IRQs while preparing the >> CPU. >> >> This also has the benefit to remove a BUG_ON() in the secondary CPU >> code. >> >> Signed-off-by: Julien Grall <jgrall@amazon.com> > > I still have issues after applying this patch on qemu-arm32: > > (XEN) CPU0: Guest atomics will try 1 times before pausing the domain^M^M > (XEN) Bringing up CPU1^M^M > (XEN) CPU1: Guest atomics will try 1 times before pausing the domain^M^M > (XEN) Assertion 'test_bit(_IRQ_DISABLED, &desc->status)' failed at arch/arm/gic.c:124^M^M > (XEN) ----[ Xen-4.17-unstable arm32 debug=y Not tainted ]----^M^M > (XEN) CPU: 1^M^M > (XEN) PC: 0026f134 gic_route_irq_to_xen+0xa4/0xb0^M^M > (XEN) CPSR: 400001da MODE:Hypervisor^M^M > (XEN) R0: 00000120 R1: 000000a0 R2: 40002538 R3: 00000000^M^M > (XEN) R4: 40004f00 R5: 000000a0 R6: 40002538 R7: 8000015a^M^M > (XEN) R8: 00000000 R9: 40004f14 R10:3fe10000 R11:43fefeec R12:40002ff8^M^M > (XEN) HYP: SP: 43fefed4 LR: 0026f0b8^M^M > (XEN) ^M^M > (XEN) VTCR_EL2: 00000000^M^M > (XEN) VTTBR_EL2: 0000000000000000^M^M > (XEN) ^M^M > (XEN) SCTLR_EL2: 30cd187f^M^M > (XEN) HCR_EL2: 00000038^M^M > (XEN) TTBR0_EL2: 00000000bfffa000^M^M > (XEN) ^M^M > (XEN) ESR_EL2: 00000000^M^M > (XEN) HPFAR_EL2: 00000000^M^M > (XEN) HDFAR: 00000000^M^M > (XEN) HIFAR: 00000000^M^M > (XEN) ^M^M > (XEN) Xen stack trace from sp=43fefed4:^M^M > (XEN) 00000000 40004f00 00000000 40002538 8000015a 43feff0c 00272a4c 40002538^M^M > (XEN) 002a47c4 00000019 00000000 0026ee28 40010000 43feff2c 00272b30 00309298^M^M > (XEN) 00000001 0033b248 00000001 00000000 40010000 43feff3c 0026f7ac 00000000^M^M > (XEN) 00201828 43feff54 0027ac3c bfffa000 00000000 00000000 00000001 00000000^M^M > (XEN) 400100c0 00000000 00000000 00000000 00000000 00000000 00000000 00000000^M^M > (XEN) 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000^M^M > (XEN) 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000^M^M > (XEN) 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000^M^M > (XEN) 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000^M^M > (XEN) 00000000 00000000 00000000^M^M > (XEN) Xen call trace:^M^M > (XEN) [<0026f134>] gic_route_irq_to_xen+0xa4/0xb0 (PC)^M^M > (XEN) [<0026f0b8>] gic_route_irq_to_xen+0x28/0xb0 (LR)^M^M > (XEN) [<00272a4c>] setup_irq+0x104/0x178^M^M > (XEN) [<00272b30>] request_irq+0x70/0xb4^M^M > (XEN) [<0026f7ac>] init_maintenance_interrupt+0x40/0x5c^M^M > (XEN) [<0027ac3c>] start_secondary+0x1e8/0x270^M^M > (XEN) [<400100c0>] 400100c0^M^M > > Just wanted to signal before you push this out. > > I will investigate deeper and come back to you. Pwclient did not apply the whole patch, only the smpboot part on my try run. Re-running it applied it correctly and now my tests are passing so: Reviewed-by: Bertrand Marquis <bertrand.marquis@arm.com> Tested-by: Bertrand Marquis <bertrand.marquis@arm.com> Cheers Bertrand > > Cheers > Bertrand
Hi Bertrand, On 24/06/2022 11:01, Bertrand Marquis wrote: > Reviewed-by: Bertrand Marquis <bertrand.marquis@arm.com> > Tested-by: Bertrand Marquis <bertrand.marquis@arm.com> Thanks! I have committed the patch. Cheers,
diff --git a/xen/arch/arm/include/asm/irq.h b/xen/arch/arm/include/asm/irq.h index e45d57459899..245f49dcbac5 100644 --- a/xen/arch/arm/include/asm/irq.h +++ b/xen/arch/arm/include/asm/irq.h @@ -73,7 +73,6 @@ static inline bool is_lpi(unsigned int irq) bool is_assignable_irq(unsigned int irq); void init_IRQ(void); -void init_secondary_IRQ(void); int route_irq_to_guest(struct domain *d, unsigned int virq, unsigned int irq, const char *devname); diff --git a/xen/arch/arm/irq.c b/xen/arch/arm/irq.c index b761d90c4063..56bdcb95335d 100644 --- a/xen/arch/arm/irq.c +++ b/xen/arch/arm/irq.c @@ -17,6 +17,7 @@ * GNU General Public License for more details. */ +#include <xen/cpu.h> #include <xen/lib.h> #include <xen/spinlock.h> #include <xen/irq.h> @@ -100,7 +101,7 @@ static int __init init_irq_data(void) return 0; } -static int init_local_irq_data(void) +static int init_local_irq_data(unsigned int cpu) { int irq; @@ -108,7 +109,7 @@ static int init_local_irq_data(void) for ( irq = 0; irq < NR_LOCAL_IRQS; irq++ ) { - struct irq_desc *desc = irq_to_desc(irq); + struct irq_desc *desc = &per_cpu(local_irq_desc, cpu)[irq]; int rc = init_one_irq_desc(desc); if ( rc ) @@ -131,6 +132,29 @@ static int init_local_irq_data(void) return 0; } +static int cpu_callback(struct notifier_block *nfb, unsigned long action, + void *hcpu) +{ + unsigned long cpu = (unsigned long)hcpu; + int rc = 0; + + switch ( action ) + { + case CPU_UP_PREPARE: + rc = init_local_irq_data(cpu); + if ( rc ) + printk(XENLOG_ERR "Unable to allocate local IRQ for CPU%lu\n", + cpu); + break; + } + + return !rc ? NOTIFY_DONE : notifier_from_errno(rc); +} + +static struct notifier_block cpu_nfb = { + .notifier_call = cpu_callback, +}; + void __init init_IRQ(void) { int irq; @@ -140,13 +164,10 @@ void __init init_IRQ(void) local_irqs_type[irq] = IRQ_TYPE_INVALID; spin_unlock(&local_irqs_type_lock); - BUG_ON(init_local_irq_data() < 0); + BUG_ON(init_local_irq_data(smp_processor_id()) < 0); BUG_ON(init_irq_data() < 0); -} -void init_secondary_IRQ(void) -{ - BUG_ON(init_local_irq_data() < 0); + register_cpu_notifier(&cpu_nfb); } static inline struct irq_guest *irq_get_guest_info(struct irq_desc *desc) diff --git a/xen/arch/arm/smpboot.c b/xen/arch/arm/smpboot.c index 9bb32a301a70..4888bcd78a5a 100644 --- a/xen/arch/arm/smpboot.c +++ b/xen/arch/arm/smpboot.c @@ -359,8 +359,6 @@ void start_secondary(void) gic_init_secondary_cpu(); - init_secondary_IRQ(); - set_current(idle_vcpu[cpuid]); setup_cpu_sibling_map(cpuid);